## Подготовка

In [1]:
import warnings

import numpy as np
import pandas as pd
import tensorflow as tf
from keras.api.layers import LSTM, Dense, Dropout, Bidirectional
from keras.api.models import Sequential
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier

In [2]:
warnings.filterwarnings('ignore')

In [3]:
data = pd.read_csv('datasets/APLE_PERIOD_D1.csv', index_col=[0], parse_dates=[0])

In [4]:
data = data.iloc[::-1]
data

Unnamed: 0_level_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,...,A30,A31,A32,A33,A34,A35,A36,A37,A38,Close
data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-11-30,0.035,-0.015,0.0875,0.0075,-0.0175,-0.0375,0.0875,0.0075,1.000000e-02,7.105427e-15,...,-0.06,0.000050,-0.029931,0.000000,0.000000,0.03,0.03,3.552714e-15,-0.037431,1
2017-12-01,-0.015,-0.045,0.0075,0.0225,-0.0375,-0.1125,0.0075,0.0225,7.105427e-15,-6.000000e-02,...,-0.15,-0.029931,0.000069,0.000000,0.000000,0.00,0.00,1.500000e-01,-0.022431,0
2017-12-04,-0.045,0.060,0.0225,0.1500,-0.1125,-0.0300,0.0225,0.1500,-6.000000e-02,7.500000e-02,...,-0.03,0.000069,-0.029912,0.000000,0.000000,0.03,0.03,3.750000e-02,-0.179912,1
2017-12-05,0.060,-0.060,0.1500,0.0300,-0.0300,-0.1500,0.1500,0.0300,7.500000e-02,-7.500000e-02,...,-0.09,-0.029912,0.000088,0.000000,0.000000,0.00,0.00,1.875000e-01,-0.029912,0
2017-12-06,-0.060,0.045,0.0300,0.1125,-0.1500,-0.0225,0.0300,0.1125,-7.500000e-02,4.500000e-02,...,0.00,0.000088,0.000088,0.000000,0.000000,0.00,0.00,2.250000e-02,-0.112412,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-03-15,0.075,0.165,0.1875,0.4125,-0.0375,-0.0825,0.3250,0.2375,1.350000e-01,1.100000e-01,...,-0.24,0.026316,0.056391,0.094411,0.014286,-0.09,0.32,6.928571e-02,-0.356109,0
2024-03-18,0.165,0.070,0.4125,0.1750,-0.0825,-0.0350,0.2375,0.1500,1.100000e-01,6.500000e-02,...,-0.12,0.056391,-0.033477,0.014286,-0.085777,0.02,0.31,-5.327694e-02,-0.208477,1
2024-03-19,0.070,-0.095,0.1750,0.0475,-0.0350,-0.2375,0.1500,0.0725,6.500000e-02,-5.000000e-02,...,-0.31,-0.033477,-0.183252,-0.085777,-0.145802,0.27,0.40,-2.080201e-02,-0.230752,1
2024-03-20,-0.095,0.010,0.0475,0.0250,-0.2375,-0.0050,0.0725,0.0100,-5.000000e-02,-1.050000e-01,...,-0.35,-0.183252,0.056823,-0.145802,-0.135833,0.09,0.17,1.266667e-01,0.031823,1


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1586 entries, 2017-11-30 to 2024-03-21
Data columns (total 39 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A1      1586 non-null   float64
 1   A2      1586 non-null   float64
 2   A3      1586 non-null   float64
 3   A4      1586 non-null   float64
 4   A5      1586 non-null   float64
 5   A6      1586 non-null   float64
 6   A7      1586 non-null   float64
 7   A8      1586 non-null   float64
 8   A9      1586 non-null   float64
 9   A10     1586 non-null   float64
 10  A11     1586 non-null   float64
 11  A12     1586 non-null   float64
 12  A13     1586 non-null   float64
 13  A14     1586 non-null   float64
 14  A15     1586 non-null   float64
 15  A16     1586 non-null   float64
 16  A17     1586 non-null   float64
 17  A18     1586 non-null   float64
 18  A19     1586 non-null   float64
 19  A20     1586 non-null   float64
 20  A21     1586 non-null   float64
 21  A22     1586 non-nu

## Обучение

In [6]:
data = data.dropna()

In [7]:
X = data.drop('Close', axis=1)
y = data[['Close']]

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, random_state=42)

In [9]:
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
pca = PCA(n_components=7)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

In [11]:
pca.explained_variance_ratio_

array([0.57315351, 0.09923983, 0.08368575, 0.07269912, 0.03499105,
       0.02666934, 0.02303646])

In [12]:
cumulative_variance = np.cumsum(pca.explained_variance_ratio_)
print("Суммарная доля объяснённой дисперсии:", cumulative_variance[-1])

Суммарная доля объяснённой дисперсии: 0.9134750655407532


### Модель Sequential

In [13]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, input_shape=(X_train_pca.shape[1], 1)))
model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(32, return_sequences=True)))
model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(16, return_sequences=False)))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))  
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[tf.metrics.binary_accuracy]) 

In [14]:
model.fit(X_train_pca, y_train, batch_size=32, epochs=40, shuffle=False)

Epoch 1/40
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - binary_accuracy: 0.4820 - loss: 0.6934
Epoch 2/40
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - binary_accuracy: 0.5182 - loss: 0.6921
Epoch 3/40
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - binary_accuracy: 0.5313 - loss: 0.6900
Epoch 4/40
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - binary_accuracy: 0.5630 - loss: 0.6778
Epoch 5/40
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - binary_accuracy: 0.6378 - loss: 0.6530
Epoch 6/40
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - binary_accuracy: 0.6430 - loss: 0.6423
Epoch 7/40
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - binary_accuracy: 0.6319 - loss: 0.6457
Epoch 8/40
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - binary_accuracy: 0.6506 - loss: 0.6409


<keras.src.callbacks.history.History at 0x24a458e3f80>

In [15]:
model.evaluate(X_test_pca, y_test)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - binary_accuracy: 0.6696 - loss: 0.61492


[0.6051186919212341, 0.6826196312904358]

In [16]:
y_pred = model.predict(X_test_pca, verbose=1)
y_pred = np.where(y_pred > 0.5, 1, 0)
print(classification_report(y_test, y_pred))

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step
              precision    recall  f1-score   support

           0       0.72      0.61      0.66       200
           1       0.66      0.75      0.70       197

    accuracy                           0.68       397
   macro avg       0.69      0.68      0.68       397
weighted avg       0.69      0.68      0.68       397



### Модель Random Forest

In [17]:
model = RandomForestClassifier(random_state=42)

In [18]:
model.fit(X_train_pca, y_train)

In [19]:
y_pred = model.predict(X_test_pca)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.68      0.61      0.65       200
           1       0.65      0.71      0.68       197

    accuracy                           0.66       397
   macro avg       0.66      0.66      0.66       397
weighted avg       0.66      0.66      0.66       397

