In [1]:
from pathlib import Path
import os
import sys

sys.path.append(str(Path(os.getcwd()).parent))

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from imblearn.under_sampling import RandomUnderSampler
import tensorflow as tf

## 1. Предобработка данных для классификации

In [3]:
table_classification = pd.read_csv("../data/neo_task_compleated.csv")
table_classification

Unnamed: 0,est_diameter_min,est_diameter_max,relative_velocity,miss_distance,absolute_magnitude,hazardous
0,0.016016,0.035813,56014.078517,1.024333e+06,26.10,False
1,0.030518,0.068240,7864.348060,3.268186e+07,24.70,False
2,0.055533,0.124177,55257.544508,6.538636e+07,23.40,False
3,0.019256,0.043057,41531.404722,1.260796e+07,25.70,False
4,0.139494,0.311918,67639.394481,7.130590e+07,21.40,False
...,...,...,...,...,...,...
90831,0.017561,0.039268,23264.740825,1.635007e+06,25.90,False
90832,0.110804,0.247765,24802.519406,3.351901e+07,21.90,False
90833,0.035039,0.078350,116288.999548,5.471396e+07,24.40,False
90834,0.044112,0.098637,45763.317060,2.694877e+07,23.90,False


In [4]:
X_classification = np.array(table_classification.drop(columns=['hazardous'], axis=1))
y_classification = np.array(table_classification['hazardous'])
columns = table_classification.drop(columns=['hazardous'], axis=1).columns

In [5]:
rus = RandomUnderSampler()
X, y = rus.fit_resample(X_classification, y_classification)
print(X.shape, y.shape)

(17680, 5) (17680,)


In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, shuffle=True)

In [7]:
# Масштабирование

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_my_train, X_my_test, y_my_train, y_my_test = X_train, X_test, y_train, y_test

X_train

array([[-0.60779594, -0.60756474,  0.80214772,  1.59574064,  0.83945647],
       [-0.43428896, -0.43386772, -0.47083446, -0.27503631,  0.16482016],
       [ 0.34848384,  0.34976244, -1.055442  ,  0.71003653, -0.82938071],
       ...,
       [ 3.64894368,  3.65383723, -0.22213995,  0.97647486, -1.90879879],
       [ 5.6003158 ,  5.60734665,  1.33464906,  0.49383679, -2.19285618],
       [-0.69437433, -0.69423796, -0.87809553,  1.50936963,  1.76264299]])

## 2. Построение моделей для классификации

In [8]:
from sklearn.metrics import classification_report

In [9]:
# Модель классификации с функцией активации "Софтмакс" на выходном слое

classification_model = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(64, activation="relu", input_shape=(5,)),
        tf.keras.layers.Dense(32, activation="relu"),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(16, activation="relu"),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(2, activation="softmax"),
    ]
)

classification_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.05), loss="sparse_categorical_crossentropy")
classification_model.fit(X_train, y_train, epochs=25, verbose=None)

y_pred = [np.argmax(pred) for pred in classification_model.predict(X_test)]
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

       False       1.00      0.70      0.82      2652
        True       0.77      1.00      0.87      2652

    accuracy                           0.85      5304
   macro avg       0.88      0.85      0.85      5304
weighted avg       0.88      0.85      0.85      5304



In [None]:
# Модель классификации с функцией активации "Сигмоида" на выходном слое

classification_model = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(64, activation="relu", input_shape=(5,)),
        tf.keras.layers.Dense(32, activation="relu"),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(16, activation="relu"),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(1, activation="sigmoid"),
    ]
)

classification_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.05), loss="mse")
classification_model.fit(X_train, y_train, epochs=25, verbose=None)

y_pred = np.around(classification_model.predict(X_test))
print(classification_report(y_test, y_pred))

## 3. Предобработка данных для регрессии

In [11]:
table_regression = pd.read_csv("../data/energy_task_compleated.csv")
table_regression

Unnamed: 0.1,Unnamed: 0,Appliances,lights,T1,RH_1,T2,RH_2,T3,RH_3,T4,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,month,day
0,0,60,30,19.890000,47.596667,19.200000,44.790000,19.790000,44.730000,19.000000,...,17.033333,45.5300,6.60,733.5,92.000000,7.000000,63.000000,5.3,11.0,1.0
1,1,60,30,19.890000,46.693333,19.200000,44.722500,19.790000,44.790000,19.000000,...,17.066667,45.5600,6.48,733.6,92.000000,6.666667,59.166667,5.2,11.0,1.0
2,2,50,30,19.890000,46.300000,19.200000,44.626667,19.790000,44.933333,18.926667,...,17.000000,45.5000,6.37,733.7,92.000000,6.333333,55.333333,5.1,11.0,1.0
3,3,50,40,19.890000,46.066667,19.200000,44.590000,19.790000,45.000000,18.890000,...,17.000000,45.4000,6.25,733.8,92.000000,6.000000,51.500000,5.0,11.0,1.0
4,4,60,40,19.890000,46.333333,19.200000,44.530000,19.790000,45.000000,18.890000,...,17.000000,45.4000,6.13,733.9,92.000000,5.666667,47.666667,4.9,11.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19730,19730,100,0,25.566667,46.560000,25.890000,42.025714,27.200000,41.163333,24.700000,...,23.200000,46.7900,22.70,755.2,55.666667,3.333333,23.666667,13.3,5.0,27.0
19731,19731,90,0,25.500000,46.500000,25.754000,42.080000,27.133333,41.223333,24.700000,...,23.200000,46.7900,22.60,755.2,56.000000,3.500000,24.500000,13.3,5.0,27.0
19732,19732,270,10,25.500000,46.596667,25.628571,42.768571,27.050000,41.690000,24.700000,...,23.200000,46.7900,22.50,755.2,56.333333,3.666667,25.333333,13.3,5.0,27.0
19733,19733,420,10,25.500000,46.990000,25.414000,43.036000,26.890000,41.290000,24.700000,...,23.200000,46.8175,22.30,755.2,56.666667,3.833333,26.166667,13.2,5.0,27.0


In [12]:
X = np.array(table_regression.drop(columns=["Appliances"], axis=1))
y = np.array(table_regression["Appliances"])

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True)

In [14]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train

array([[-1.68395231, -0.47746427, -1.49775437, ..., -0.66078558,
        -1.09755363, -0.19711743],
       [-0.07595415, -0.47746427, -0.5003692 , ..., -0.88733481,
        -0.42455263,  0.22265115],
       [-0.86391007,  0.78005555, -1.26295328, ..., -0.9111821 ,
        -0.76105313,  0.01276686],
       ...,
       [-1.18635131,  4.55261503,  0.06065996, ...,  0.96082995,
        -0.76105313, -1.35148104],
       [-0.11312519,  3.29509521, -0.14297285, ..., -0.19099403,
        -0.42455263,  0.117709  ],
       [ 1.60515874, -0.47746427,  1.68141087, ...,  1.62855399,
         0.24844837,  0.85230403]])

## 4. Построение моделей для регрессии

In [15]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score
from math import sqrt

In [16]:
def metrics(test, predict):
    print(f'MAE: {mean_absolute_error(test, predict)}')
    print(f'MSE: {mean_squared_error(test, predict)}')
    print(f'RMSE: {sqrt(mean_squared_error(test, predict))}')
    print(f'MAPE: {mean_absolute_percentage_error(test, predict)}')
    print(f'R^2: {r2_score(test, predict)}')

In [17]:
# Модель регрессии с линейной функцией активации на выходном слое

classification_model = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(64, activation="relu", input_shape=(28,)),
        tf.keras.layers.Dense(32, activation="relu"),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(16, activation="linear"),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(1, activation="linear"),
    ]
)

classification_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.05), loss="mse")
classification_model.fit(X_train, y_train, epochs=25, verbose=None)

y_pred = classification_model.predict(X_test)
metrics(y_test, y_pred)

MAE: 47.85583234779578
MSE: 8229.696912721402
RMSE: 90.71767695836023
MAPE: 0.5222340924835388
R^2: 0.22585801657325144


## 5. Собственная реализация перцептрона

In [34]:
from mylib.neural_network.sequential import Sequential
from mylib.neural_network.dence import Dense
from mylib.neural_network.SGD import SGD

classification_model = Sequential([
        Dense(6, activation="sigmoid", input_shape=5),
        Dense(6, activation="tanh"),
        Dense(16, activation="ReLU"),
        Dense(8, activation="ReLU"),
        Dense(1, activation="sigmoid"),
])

classification_model.compile(optimizer=SGD(0.001, 20), loss="mse")
classification_model.fit(X_train, y_train, epochs=16)

y_pred = np.around(classification_model.predict(X_test))
# y_pred = [np.argmax(pred) for pred in classification_model.predict(X_test)]
print(classification_report(y_test, y_pred))
# y_pred

              precision    recall  f1-score   support

       False       0.00      0.00      0.00      2652
        True       0.50      1.00      0.67      2652

    accuracy                           0.50      5304
   macro avg       0.25      0.50      0.33      5304
weighted avg       0.25      0.50      0.33      5304



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [3]:
from mylib.neural_network.SGD import SGD
from mylib.neural_network.dence import Dense

a = SGD(batch_size=2)
l = [
    Dense(10, "sigmoid", 4),
    Dense(6, "ReLU"),
    Dense(1, "linear")
]
for i in range(len(l)):
            if l[i].n:
                l[i].initialize_weights()
                continue
            l[i].n = l[i - 1].m
            l[i].initialize_weights()
a.compile(l, {
                "function": lambda X_true, X_pred: -np.sum(X_true * np.log(X_pred)),
                "derivative": lambda X_true, X_pred: -X_true / X_pred # TODO: Перепроверить производную
            })

h = a.optimize(np.array([[1, 2, 3, 4], [4, 8, -8, 11]]), np.array([55, 3]))
print(h)

KeyError: 'transform'