In [1]:
from sklearn import datasets
import pandas as pd
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
# для оценки качества решения задачи регрессии
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# для оценки качества решения задачи классификации
from sklearn.metrics import confusion_matrix, classification_report

**Регресия

In [2]:
data_regression = pd.read_csv("../data/kc_house_data.csv")
data_regression["date"]=data_regression["date"].str[:4]
data_regression["date"]=pd.to_numeric(data_regression["date"])
y = data_regression["price"]
X = data_regression.drop(["price"], axis = 1)
data_regression

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,2014,221900.0,3,1.00,1180,5650,1.0,0,0,...,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650
1,6414100192,2014,538000.0,3,2.25,2570,7242,2.0,0,0,...,7,2170,400,1951,1991,98125,47.7210,-122.319,1690,7639
2,5631500400,2015,180000.0,2,1.00,770,10000,1.0,0,0,...,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062
3,2487200875,2014,604000.0,4,3.00,1960,5000,1.0,0,0,...,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000
4,1954400510,2015,510000.0,3,2.00,1680,8080,1.0,0,0,...,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21608,263000018,2014,360000.0,3,2.50,1530,1131,3.0,0,0,...,8,1530,0,2009,0,98103,47.6993,-122.346,1530,1509
21609,6600060120,2015,400000.0,4,2.50,2310,5813,2.0,0,0,...,8,2310,0,2014,0,98146,47.5107,-122.362,1830,7200
21610,1523300141,2014,402101.0,2,0.75,1020,1350,2.0,0,0,...,7,1020,0,2009,0,98144,47.5944,-122.299,1020,2007
21611,291310100,2015,400000.0,3,2.50,1600,2388,2.0,0,0,...,8,1600,0,2004,0,98027,47.5345,-122.069,1410,1287


In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((18371, 20), (18371,), (3242, 20), (3242,))

In [4]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train)
X_reg_train = scaler.transform(X_train)
X_reg_test = scaler.transform(X_test)

In [5]:
from imblearn.under_sampling import RandomUnderSampler
undersampler = RandomUnderSampler()
X_reg_train, y_reg_train = undersampler.fit_resample(X_reg_train, y_train)

In [87]:
model_regression = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(64, activation="relu", input_shape=(20,)),
        tf.keras.layers.Dense(32, activation="relu"),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(16, activation="relu"),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(8, activation="relu"),
        tf.keras.layers.Dropout(0.05),
        tf.keras.layers.Dense(1, activation="linear"),
    ]
)

In [88]:
model_regression.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                1344      
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense_2 (Dense)             (None, 16)                528       
                                                                 
 dropout_1 (Dropout)         (None, 16)                0         
                                                                 
 dense_3 (Dense)             (None, 8)                 136       
                                                                 
 dropout_2 (Dropout)         (None, 8)                 0

In [95]:
model_regression.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.005), loss="mse")

Другие оптимизаторы, доступные в TensorFlow Keras, это: SGD, RMSprop, Adagrad, Adadelta, Adamax, Nadam, Ftrl.

В данном случае, функция ошибки ('loss') установлена на 'mse', что означает mean squared error (среднеквадратичную ошибку). Также в TensorFlow Keras доступны и другие функции ошибки, такие как mean absolute error ('mae'), mean absolute percentage error ('mape'), mean squared logarithmic error ('msle') и другие. Они используются в зависимости от типа задачи и цели обучения модели.

In [96]:
model_regression.fit(X_reg_train, y_reg_train, epochs=80)

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


<keras.callbacks.History at 0x26d4410ab60>

In [97]:
print(mean_absolute_error(y_test, model_regression.predict(X_reg_test)))
print(mean_squared_error(y_test, model_regression.predict(X_reg_test)))
print(r2_score(y_test, model_regression.predict(X_reg_test)))

107339.22433104565
30739188311.36287
0.7575981264971131


In [106]:
model_regression.save('model_regression_neuro.h5')

**КЛАССИФИКАЦИЯ

In [3]:
data_classification = pd.read_csv("../data/card_transdata.csv")
x = data_classification.drop(["fraud"], axis = 1)
Y = data_classification["fraud"]
data_classification

Unnamed: 0,distance_from_home,distance_from_last_transaction,ratio_to_median_purchase_price,repeat_retailer,used_chip,used_pin_number,online_order,fraud
0,57.877857,0.311140,1.945940,1.0,1.0,0.0,0.0,0.0
1,10.829943,0.175592,1.294219,1.0,0.0,0.0,0.0,0.0
2,5.091079,0.805153,0.427715,1.0,0.0,0.0,1.0,0.0
3,2.247564,5.600044,0.362663,1.0,1.0,0.0,1.0,0.0
4,44.190936,0.566486,2.222767,1.0,1.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...
999995,2.207101,0.112651,1.626798,1.0,1.0,0.0,0.0,0.0
999996,19.872726,2.683904,2.778303,1.0,1.0,0.0,0.0,0.0
999997,2.914857,1.472687,0.218075,1.0,1.0,0.0,1.0,0.0
999998,4.258729,0.242023,0.475822,1.0,0.0,0.0,1.0,0.0


In [4]:
X_clf_train, X_clf_test, y_clf_train, y_clf_test = train_test_split(x, Y, test_size=0.15)
X_clf_train.shape, y_clf_train.shape, X_clf_test.shape, y_clf_test.shape

((850000, 7), (850000,), (150000, 7), (150000,))

In [5]:
from imblearn.under_sampling import RandomUnderSampler
undersample = RandomUnderSampler()
x_under, y_under = undersample.fit_resample(X_clf_train, y_clf_train)
x_under.shape, y_under.shape, X_clf_test.shape, y_clf_test.shape

((148686, 7), (148686,), (150000, 7), (150000,))

In [96]:
model_classification = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(64, activation="relu", input_shape=(7,)),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(0.05),
        tf.keras.layers.Dense(32, activation="relu"),
        tf.keras.layers.Dense(16, activation="relu"),
        tf.keras.layers.Dropout(0.05),
        tf.keras.layers.Dense(1, activation="sigmoid"),
    ]
)

In [97]:
model_classification.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss="binary_crossentropy")
model_classification.fit(x_under, y_under, epochs=25, verbose=None)

<keras.callbacks.History at 0x1b101932560>

In [98]:
y_pred = np.around(model_classification.predict(X_clf_test, verbose=None))

In [99]:
print(classification_report(y_clf_test, y_pred))
print(confusion_matrix(y_clf_test, y_pred))

              precision    recall  f1-score   support

         0.0       1.00      0.99      0.99    136922
         1.0       0.90      1.00      0.95     13078

    accuracy                           0.99    150000
   macro avg       0.95      0.99      0.97    150000
weighted avg       0.99      0.99      0.99    150000

[[135475   1447]
 [     8  13070]]


In [105]:
model_classification.save('model_classification_neuro.h5')

**Собственная реализация

In [19]:
pip install sympy

Collecting sympyNote: you may need to restart the kernel to use updated packages.

  Downloading sympy-1.12-py3-none-any.whl (5.7 MB)
     ---------------------------------------- 5.7/5.7 MB 5.1 MB/s eta 0:00:00
Collecting mpmath>=0.19
  Downloading mpmath-1.3.0-py3-none-any.whl (536 kB)
     -------------------------------------- 536.2/536.2 KB 8.5 MB/s eta 0:00:00
Installing collected packages: mpmath, sympy
Successfully installed mpmath-1.3.0 sympy-1.12


You should consider upgrading via the 'C:\Users\Aron\Desktop\Учеба\4 семестр\Машинное обучение\Jupiter nouts\venv\Scripts\python.exe -m pip install --upgrade pip' command.


In [51]:
import numdifftools as nd
class NeuroLayer:

    def __init__(self, numOfNeurons: int, activation: str) -> None:
        allowedActivation = ['sigmoid', 'tanh', 'relu', 'linear']
        self.n = numOfNeurons
        self.connectedToPrevious = np.array([])
        if activation in allowedActivation:
            if activation == 'sigmoid':
                self.af = self.sigmoid
            elif activation == 'tanh':
                self.af = self.tanh
            elif activation == 'relu':
                self.af = self.relu
            elif activation == 'linear':
                self.af = self.linear

    def sigmoid(self, x: np.ndarray) -> np.ndarray:
        return 1/(1+np.exp(-1 * x)).astype("float64")
    
    def tanh(self, x: np.ndarray) -> np.ndarray:
        return np.tanh(x)
    
    def relu(self, x: np.ndarray) -> np.ndarray:
        return np.maximum(x, 0)
    
    def linear(self, x: np.ndarray) -> np.ndarray:
        return x
    
    def forward(self, x: np.ndarray) -> np.ndarray:
        self.x = x
        temp = np.dot(self.connectedToPrevious, self.x) + self.b
        self.a = self.af(temp)
        self.ga = nd.Derivative(self.af)(temp)
        return self.a
    
    def fast_forward(self, x: np.ndarray) -> np.ndarray:
        return self.af(np.dot(self.connectedToPrevious, x) + self.b)
    
    def backward(self, gr: np.ndarray, m, lr) -> np.ndarray:
        buf = np.multiply(self.ga.T, np.dot(gr, m))
        self.b -= lr * buf.T
        self.connectedToPrevious -= lr*np.dot(self.x, buf).T
        return buf

In [52]:
class NeuroNet:
    def __init__(self, sequence: list, input_shape: tuple) -> None:
        self.layers = sequence
        buf = np.random.normal(0, 1, (self.layers[0].n, input_shape[0]))
        self.matrices = [buf]
        self.layers[0].connectedToPrevious = buf
        self.layers[0].b = np.random.normal(0, 1, (self.layers[0].n, 1))
        
    def __MSE(self, yp, yr):
        return np.mean((yr-yp)**2)
        
    def compile(self, loss: str) -> None:
        for i in range(len(self.layers) - 1):
            buf = np.random.normal(0, 1, (self.layers[i+1].n, self.layers[i].n))
            self.matrices.append(buf)
            self.layers[i+1].connectedToPrevious = buf
            self.layers[i+1].b = np.random.normal(0, 1, (self.layers[i+1].n, 1))
            
    def __max_batches(self, n: int) -> int:
        i = n // 2
        m = 20 if n < 100 else 250
        while i > m:
            if n % i == 0:
                return i
            i -= 1
        return i
            
    def fit(self, X:pd.DataFrame, y:pd.Series, e:int, rate: float = 0.01) -> np.ndarray:
        X = X.to_numpy()
        for i in range(e):
            print(f"Initializing epoch {i+1} of {e}")
            
            nbatches = self.__max_batches(len(X))
            
            batchSize = len(X) // nbatches
            
            start = 0
            
            totalp = []
            
            for i in range(batchSize, nbatches, batchSize):
                batch = X[start:i]
                
                for ind, ob in enumerate(batch):
                    ob = ob[np.newaxis, :].T
                    for layer in self.layers:
                        ob = layer.forward(ob)
                    
                    pred = ob
                    totalp.append(pred.flatten()[0])
                    
                    gr = nd.Gradient(self.__MSE)(pred, y.values[start:i][ind])
                    
                    m = 1
                    
                    for layer in self.layers[::-1]:
                        gr = layer.backward(gr, m, rate)
                        m = layer.connectedToPrevious
                
                start += batchSize
        self.matrices = [layer.connectedToPrevious for layer in self.layers]
                
    def predict(self, X: pd.DataFrame) -> np.ndarray:
        X = X.to_numpy()
        pred = []
        for ind, ob in enumerate(X):
            print(f"{ind}/{X.shape[0]}", end='\r')
            ob = ob[np.newaxis, :].T
            for layer in self.layers:
                ob = layer.fast_forward(ob)
            pred.append(ob.flatten()[0])
            
        return np.array(pred)

In [28]:
undersample1 = RandomUnderSampler()
x_under1, y_under1 = undersample1.fit_resample(x_under, y_under)
x_under1.shape, y_under1.shape, X_clf_test.shape, y_clf_test.shape

((148686, 7), (148686,), (150000, 7), (150000,))

In [79]:
layer1 = NeuroLayer(16, 'relu')
layer2 = NeuroLayer(8, 'relu')
layer3 = NeuroLayer(4, 'relu')
layer4 = NeuroLayer(1, 'linear')

layer1.connectedToPrevious = np.random.normal(0, 1, (16, 20))
layer2.connectedToPrevious = np.random.normal(0, 1, (8, 16))
layer3.connectedToPrevious = np.random.normal(0, 1, (4, 8))
layer4.connectedToPrevious = np.random.normal(0, 1, (1, 4))

net = NeuroNet([layer1, layer2, layer3, layer4], (20,))

net.compile('MSE')
net.fit(pd.DataFrame(X_reg_train), y_reg_train, 5, 0.0001)
pred = net.predict(X_test)

Initializing epoch 1 of 5
Initializing epoch 2 of 5
Initializing epoch 3 of 5
Initializing epoch 4 of 5
Initializing epoch 5 of 5
3241/3242

In [80]:
print(pred)

[1.44481656e+19 1.00607572e+19 9.71814152e+18 ... 1.15524974e+19
 1.31605643e+19 1.20492395e+19]


In [86]:
layer11 = NeuroLayer(64, 'relu')
layer22 = NeuroLayer(32, 'relu')
layer33 = NeuroLayer(16, 'relu')
layer44 = NeuroLayer(1, 'linear')

layer11.connectedToPrevious = np.random.normal(0, 1, (64, 20))
layer22.connectedToPrevious = np.random.normal(0, 1, (32, 64))
layer33.connectedToPrevious = np.random.normal(0, 1, (16, 32))
layer44.connectedToPrevious = np.random.normal(0, 1, (1, 16))

net1 = NeuroNet([layer11, layer22, layer33, layer44], (20,))

net1.compile('MSE')
net1.fit(pd.DataFrame(X_reg_train), y_reg_train, 40, 0.0001)
pred1 = net1.predict(X_test)

Initializing epoch 1 of 40
Initializing epoch 2 of 40
Initializing epoch 3 of 40
Initializing epoch 4 of 40
Initializing epoch 5 of 40
Initializing epoch 6 of 40
Initializing epoch 7 of 40
Initializing epoch 8 of 40
Initializing epoch 9 of 40
Initializing epoch 10 of 40
Initializing epoch 11 of 40
Initializing epoch 12 of 40
Initializing epoch 13 of 40
Initializing epoch 14 of 40
Initializing epoch 15 of 40
Initializing epoch 16 of 40
Initializing epoch 17 of 40
Initializing epoch 18 of 40
Initializing epoch 19 of 40
Initializing epoch 20 of 40
Initializing epoch 21 of 40
Initializing epoch 22 of 40
Initializing epoch 23 of 40
Initializing epoch 24 of 40
Initializing epoch 25 of 40
Initializing epoch 26 of 40
Initializing epoch 27 of 40
Initializing epoch 28 of 40
Initializing epoch 29 of 40
Initializing epoch 30 of 40
Initializing epoch 31 of 40
Initializing epoch 32 of 40
Initializing epoch 33 of 40
Initializing epoch 34 of 40
Initializing epoch 35 of 40
Initializing epoch 36 of 40
I

In [91]:
print(pred1)

[2.31816228e+23 2.35897122e+23 2.36221169e+23 ... 2.34488522e+23
 2.33004503e+23 2.34030104e+23]
