In [1]:
from pathlib import Path
import os
import sys

sys.path.append(str(Path(os.getcwd()).parent))

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from imblearn.under_sampling import RandomUnderSampler
import tensorflow as tf

In [3]:
from mylib.neural_network.sequential import Sequential
from mylib.neural_network.dense import Dense
from mylib.neural_network.SGD import SGD

## 1. Предобработка данных для классификации

In [4]:
table_classification = pd.read_csv("../data/neo_task_compleated.csv")
table_classification

Unnamed: 0,est_diameter_min,est_diameter_max,relative_velocity,miss_distance,absolute_magnitude,hazardous
0,0.016016,0.035813,56014.078517,1.024333e+06,26.10,False
1,0.030518,0.068240,7864.348060,3.268186e+07,24.70,False
2,0.055533,0.124177,55257.544508,6.538636e+07,23.40,False
3,0.019256,0.043057,41531.404722,1.260796e+07,25.70,False
4,0.139494,0.311918,67639.394481,7.130590e+07,21.40,False
...,...,...,...,...,...,...
90831,0.017561,0.039268,23264.740825,1.635007e+06,25.90,False
90832,0.110804,0.247765,24802.519406,3.351901e+07,21.90,False
90833,0.035039,0.078350,116288.999548,5.471396e+07,24.40,False
90834,0.044112,0.098637,45763.317060,2.694877e+07,23.90,False


In [5]:
X_classification = np.array(table_classification.drop(columns=['hazardous'], axis=1))
y_classification = np.array(table_classification['hazardous'])
columns = table_classification.drop(columns=['hazardous'], axis=1).columns

In [6]:
rus = RandomUnderSampler()
X, y = rus.fit_resample(X_classification, y_classification)
print(X.shape, y.shape)

(17680, 5) (17680,)


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, shuffle=True)

In [8]:
# Масштабирование

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_my_train, X_my_test, y_my_train, y_my_test = X_train, X_test, y_train, y_test

X_train

array([[-0.3310222 , -0.33062452, -1.12061933, -1.15990015, -0.0763502 ],
       [-0.44767382, -0.44784079, -1.66274494, -0.50304093,  0.17635915],
       [-0.6442255 , -0.64534388, -0.89053958,  1.39453643,  0.99143581],
       ...,
       [-0.2927757 , -0.29219288, -0.5191361 ,  0.25617111, -0.14397665],
       [-0.46124876, -0.46148144,  1.05789047,  1.63431417,  0.21195202],
       [ 0.65499113,  0.66016163,  0.85344915,  0.26513737, -1.01244261]])

## 2. Построение моделей для классификации

In [9]:
from sklearn.metrics import classification_report

In [10]:
# Модель классификации с функцией активации "Софтмакс" на выходном слое

classification_model = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(64, activation="relu", input_shape=(5,)),
        tf.keras.layers.Dense(32, activation="relu"),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(16, activation="relu"),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(2, activation="softmax"),
    ]
)

classification_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.05), loss="sparse_categorical_crossentropy")
classification_model.fit(X_train, y_train, epochs=25, verbose=None)

y_pred = [np.argmax(pred) for pred in classification_model.predict(X_test)]
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

       False       1.00      0.71      0.83      2652
        True       0.77      1.00      0.87      2652

    accuracy                           0.85      5304
   macro avg       0.89      0.85      0.85      5304
weighted avg       0.89      0.85      0.85      5304



In [11]:
# Модель классификации с функцией активации "Сигмоида" на выходном слое

classification_model = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(64, activation="relu", input_shape=(5,)),
        tf.keras.layers.Dense(32, activation="relu"),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(16, activation="relu"),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(1, activation="sigmoid"),
    ]
)

classification_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.05), loss="mse")
classification_model.fit(X_train, y_train, epochs=25, verbose=None)

y_pred = np.around(classification_model.predict(X_test))
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

       False       0.97      0.76      0.85      2652
        True       0.80      0.97      0.88      2652

    accuracy                           0.87      5304
   macro avg       0.89      0.87      0.87      5304
weighted avg       0.89      0.87      0.87      5304



In [21]:
# Собственная реализация

classification_model = Sequential([
        Dense(8, activation="tanh", input_shape=5),
        Dense(11, activation="ReLU",),
        Dense(2, activation="softmax"),
])

classification_model.compile(optimizer=SGD(0.005, 100), loss="cross_entropy")
classification_model.fit(X_train, y_train, epochs=5)

y_pred = [np.argmax(pred) for pred in classification_model.predict(X_test)]
print(classification_report(y_test, y_pred))

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 5 is different from 28)

## 3. Предобработка данных для регрессии

In [13]:
table_regression = pd.read_csv("../data/energy_task_compleated.csv")
table_regression

Unnamed: 0.1,Unnamed: 0,Appliances,lights,T1,RH_1,T2,RH_2,T3,RH_3,T4,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,month,day
0,0,60,30,19.890000,47.596667,19.200000,44.790000,19.790000,44.730000,19.000000,...,17.033333,45.5300,6.60,733.5,92.000000,7.000000,63.000000,5.3,11.0,1.0
1,1,60,30,19.890000,46.693333,19.200000,44.722500,19.790000,44.790000,19.000000,...,17.066667,45.5600,6.48,733.6,92.000000,6.666667,59.166667,5.2,11.0,1.0
2,2,50,30,19.890000,46.300000,19.200000,44.626667,19.790000,44.933333,18.926667,...,17.000000,45.5000,6.37,733.7,92.000000,6.333333,55.333333,5.1,11.0,1.0
3,3,50,40,19.890000,46.066667,19.200000,44.590000,19.790000,45.000000,18.890000,...,17.000000,45.4000,6.25,733.8,92.000000,6.000000,51.500000,5.0,11.0,1.0
4,4,60,40,19.890000,46.333333,19.200000,44.530000,19.790000,45.000000,18.890000,...,17.000000,45.4000,6.13,733.9,92.000000,5.666667,47.666667,4.9,11.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19730,19730,100,0,25.566667,46.560000,25.890000,42.025714,27.200000,41.163333,24.700000,...,23.200000,46.7900,22.70,755.2,55.666667,3.333333,23.666667,13.3,5.0,27.0
19731,19731,90,0,25.500000,46.500000,25.754000,42.080000,27.133333,41.223333,24.700000,...,23.200000,46.7900,22.60,755.2,56.000000,3.500000,24.500000,13.3,5.0,27.0
19732,19732,270,10,25.500000,46.596667,25.628571,42.768571,27.050000,41.690000,24.700000,...,23.200000,46.7900,22.50,755.2,56.333333,3.666667,25.333333,13.3,5.0,27.0
19733,19733,420,10,25.500000,46.990000,25.414000,43.036000,26.890000,41.290000,24.700000,...,23.200000,46.8175,22.30,755.2,56.666667,3.833333,26.166667,13.2,5.0,27.0


In [14]:
X = np.array(table_regression.drop(columns=["Appliances"], axis=1))
y = np.array(table_regression["Appliances"])

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True)

In [16]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train

array([[ 1.34485831, -0.48216571,  2.39035127, ...,  1.96332608,
         2.60133947, -1.04033075],
       [-1.06296117, -0.48216571,  0.43875221, ...,  0.07876622,
         0.92053732, -1.3541562 ],
       [ 0.50311024, -0.48216571, -0.49255169, ..., -0.13593047,
         1.59285818, -1.14493923],
       ...,
       [ 1.19278366, -0.48216571,  1.45904737, ...,  0.88984262,
         0.58437689, -1.04033075],
       [ 1.44968307, -0.48216571,  1.12791709, ..., -0.2790616 ,
         0.24821646,  0.11036258],
       [ 1.12383382,  2.02513229,  0.94165631, ..., -0.69652739,
        -0.4241044 , -1.04033075]])

## 4. Построение моделей для регрессии

In [17]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from math import sqrt

In [18]:
def metrics(test, predict):
    print(f'MAE: {mean_absolute_error(test, predict)}')
    print(f'MSE: {mean_squared_error(test, predict)}')
    print(f'RMSE: {sqrt(mean_squared_error(test, predict))}')
    print(f'MAPE: {mean_absolute_percentage_error(test, predict)}')
    print(f'R^2: {r2_score(test, predict)}')

In [19]:
# Модель регрессии с линейной функцией активации на выходном слое

classification_model = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(64, activation="relu", input_shape=(28,)),
        tf.keras.layers.Dense(32, activation="relu"),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(16, activation="linear"),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(1, activation="linear"),
    ]
)

classification_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.05), loss="mse")
classification_model.fit(X_train, y_train, epochs=25, verbose=None)

y_pred = classification_model.predict(X_test)
metrics(y_test, y_pred)

MAE: 49.81472315093267
MSE: 8530.114898701628
RMSE: 92.35862113902323
MAPE: 0.5649235407851465
R^2: 0.22172620112652186


In [25]:
# Собственная реализация

regression_model = Sequential([
        Dense(5, activation="sigmoid", input_shape=28),
        Dense(10, activation="ReLU",),
        Dense(1, activation="linear"),
])

regression_model.compile(optimizer=SGD(0.005, 12000), loss="mse")
regression_model.fit(X_train, y_train, epochs=20)

y_pred = regression_model.predict(X_test)
metrics(y_test, y_pred)
# y_pred

[333.83861 182.90551 267.33645 268.49823 214.99848]
[ 79.41613 244.7291  106.07581  84.42491 129.5935   95.66051 164.42993
 159.59944 154.31641 196.58763 147.35457 242.04536 172.42541 130.11595
 152.46834 166.21317 218.68842 124.44924 140.8047  160.18887 149.42341
 109.94885 167.86225 146.18622 238.60918 125.86726 187.50599 171.05644]
[-118.68199 -121.72651 -188.65009 -149.47372 -136.81457]
[ -47.69549 -145.77503  -67.64449  -45.16721  -78.68825  -63.23005
 -108.70152  -82.10707  -88.95387 -122.23763  -96.27472 -140.77318
  -92.64975  -64.77215  -89.42922 -110.68381 -127.3057   -75.54181
  -90.78552  -98.39702  -93.12977  -65.0685  -104.48267  -94.97213
 -138.7685   -60.77918 -105.68705  -93.94265]
[-614.01456 -483.4532  -719.11119 -624.04813 -544.84093]
[-195.14604 -588.88456 -263.53244 -191.66074 -315.35197 -245.82098
 -419.35143 -359.58234 -360.40096 -483.64342 -374.39386 -574.35359
 -389.94813 -285.8885  -364.73066 -425.54978 -519.05136 -303.17473
 -353.35242 -393.47005 -370.37559 

  dE_dx = np.around(np.around(dE_dt, 5) @ np.around(self.W, 5), 5)
  dE_dt = np.around(np.around(dE_dh, 5) * np.around(self.activation_functions[self.activation]["derivative"](self.t), 5), 5)
  dE_dx = np.around(dE_dh @ temp_layers[i].W, 5)


[nan nan nan nan nan]
[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan]
[nan nan nan nan nan]
[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan]
[nan nan nan nan nan]
[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan]
[nan nan nan nan nan]
[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan]
[nan nan nan nan nan]
[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan]
[nan nan nan nan nan]
[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan]
[nan nan nan nan nan]
[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan]
[nan nan nan nan nan]
[nan nan nan nan na

  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
