# ML классификатор на основе нейронной сети

In [303]:
import numpy as np
from sklearn.preprocessing import OneHotEncoder 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input, BatchNormalization, Dropout
import pandas as pd

In [304]:
# Загружаем файл с разделителем ';' в DF
df = pd.read_csv(fr'nn_features_and_target.csv', delimiter=';')
# df.columns = df.columns.astype(str)
# df['29'] = df.apply(lambda x: 1.0 if x['28'] == 0.0 else 0.0, axis=1)
print(df.to_string(max_rows=8, max_cols=30))

             0         1         2         3         4         5         6         7         8         9        10        11        12        13        14        15        16        17        18        19        20   21   22   23   24   25   26   27    28
0     1.000000  0.949765  0.946863  0.890311  0.886746  0.810817  0.768762  0.702327  0.658250  0.519462  0.000000  0.071725  0.156965  0.237875  0.342755  0.385964  0.561642  0.618664  0.710416  0.721658  0.876098  1.0  0.0  0.0  0.0  0.0  0.0  0.0  down
1     1.000000  0.999613  0.949249  0.946274  0.888331  0.884700  0.808215  0.764843  0.699029  0.650257  0.510770  0.000000  0.073884  0.163346  0.246354  0.359640  0.402954  0.585638  0.643909  0.737880  0.749382  0.0  1.0  0.0  0.0  0.0  0.0  0.0    up
2     0.879807  0.815902  0.811820  0.731742  0.685201  0.616099  0.559507  0.401556  0.216736  0.112487  0.000000  0.101511  0.235431  0.280920  0.482243  0.544868  0.668686  0.686063  0.859401  0.888299  1.000000  0.0  0.0  0.0  1

In [305]:
# Разделяем датафрейм на признаки (X) и целевую переменную (y)
X = df.drop('28', axis=1).values
y = df['28'].values
print(y)

['down' 'up' 'down' ... 'up' 'down' 'down']


In [306]:
# Преобразуем категориальный таргет в числовой формат
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
print(y_encoded)

[0 1 0 ... 1 0 0]


In [307]:
# Преобразуем числовые метки в one-hot encoding
y_categorical = to_categorical(y_encoded)
print(y_categorical)

[[1. 0.]
 [0. 1.]
 [1. 0.]
 ...
 [0. 1.]
 [1. 0.]
 [1. 0.]]


In [308]:
# Разделяем данные на тренировочный и тестовый наборы
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42)
print(y_test)

[[1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 

In [309]:
# Создаем модель нейронной сети

model = Sequential()
model.add(Input(shape=(X_train.shape[1],)))  # Входной слой
for _ in range(4):
    model.add(Dense(1024, activation='relu', ))
    model.add(BatchNormalization())
    model.add(Dropout(0.20))
# model.add(Dense(y_train.shape[1], activation='softmax'))
model.add(Dense(y_categorical.shape[1], activation='softmax'))  # Выходной слой

In [310]:
# Компилируем модель по параметрам
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [311]:
# Обучаем модель
model.fit(X_train, y_train, epochs=500, batch_size=32, validation_split=0.2)

Epoch 1/500
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 19ms/step - accuracy: 0.5128 - loss: 1.8923 - val_accuracy: 0.4960 - val_loss: 0.7004
Epoch 2/500
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.4872 - loss: 1.0373 - val_accuracy: 0.4776 - val_loss: 0.7194
Epoch 3/500
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.4980 - loss: 0.8792 - val_accuracy: 0.4723 - val_loss: 0.8408
Epoch 4/500
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.5082 - loss: 0.8404 - val_accuracy: 0.5277 - val_loss: 0.7061
Epoch 5/500
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.4985 - loss: 0.8417 - val_accuracy: 0.5013 - val_loss: 0.7212
Epoch 6/500
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.5369 - loss: 0.7453 - val_accuracy: 0.5013 - val_loss: 0.7082
Epoch 7/500
[1m48/48[0m [

<keras.src.callbacks.history.History at 0x149a686d100>

In [312]:
# Оцениваем модель на тестовом наборе
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test accuracy: {accuracy}')

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5093 - loss: 1.6582 
Test accuracy: 0.49682876467704773


In [313]:
predictions = model.predict(X_test)

# Преобразование чисел в строки с фиксированным количеством десятичных знаков
formatted_arr = np.array([[f'{num:.6f}' for num in row] for row in predictions])

# Преобразование строк обратно в числа
formatted_num_arr = np.array([[float(num) for num in row] for row in formatted_arr])

# Преобразуем массив в DataFrame
df = pd.DataFrame(formatted_num_arr)

# Добавление столбца предсказанной категории
df['predict'] = df.apply(lambda x: 'down' if x[0] > x[1] else 'up', axis=1)

# Вывод вероятностей для примера из тестовой выборки
print(df)

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
            0         1 predict
0    0.029042  0.970958      up
1    0.018760  0.981240      up
2    0.745602  0.254398    down
3    0.924239  0.075761    down
4    0.335212  0.664788      up
..        ...       ...     ...
468  0.138390  0.861610      up
469  0.705455  0.294545    down
470  0.818717  0.181283    down
471  0.993473  0.006527    down
472  0.590647  0.409353    down

[473 rows x 3 columns]


In [314]:
# Преобразуем числовые метки обратно в категориальные 
y_test_lst = [('down' if y[0] == 1.0 else 'up') for y in y_test]

# Добавление колонки с реальными напрвлениями баров
df['real'] = y_test_lst
print(df)

            0         1 predict  real
0    0.029042  0.970958      up  down
1    0.018760  0.981240      up    up
2    0.745602  0.254398    down    up
3    0.924239  0.075761    down    up
4    0.335212  0.664788      up  down
..        ...       ...     ...   ...
468  0.138390  0.861610      up  down
469  0.705455  0.294545    down    up
470  0.818717  0.181283    down    up
471  0.993473  0.006527    down  down
472  0.590647  0.409353    down    up

[473 rows x 4 columns]


In [315]:
# Подсчет количества совпадений
num_matches = (df['predict'] == df['real']).sum()
print(num_matches)

# Подсчет количества не совпадений
num_no_matches = (df['predict'] != df['real']).sum()
print(num_no_matches)

235
238
