In [None]:
import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping


In [None]:
def calculate_indicators(df):
    """
    Добавляет EMA, RSI и MACD к датафрейму.
    """
    # EMA по цене закрытия (период 10)
    df['EMA_10'] = df['Closed Price'].ewm(span=10, adjust=False).mean()

    # RSI (14 периодов)
    delta = df['Closed Price'].diff()
    gain = delta.clip(lower=0).rolling(window=14).mean()
    loss = -delta.clip(upper=0).rolling(window=14).mean()
    rs = gain / loss
    df['RSI_14'] = 100 - (100 / (1 + rs))

    # MACD (разница между EMA12 и EMA26)
    ema12 = df['Closed Price'].ewm(span=12, adjust=False).mean()
    ema26 = df['Closed Price'].ewm(span=26, adjust=False).mean()
    df['MACD'] = ema12 - ema26

    df = df.dropna().reset_index(drop=True)
    return df

In [None]:
def preprocess_data(file_path):
    """
    Загружает данные, добавляет технические индикаторы и новые признаки,
    создает целевую переменную.
    """
    df = pd.read_excel(file_path)
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.sort_values('Date').reset_index(drop=True)

    # Удаляем пропуски, если они есть
    df = df.dropna()

    # Извлекаем признаки из даты
    df['Weekday'] = df['Date'].dt.dayofweek  # 0 - понедельник, 6 - воскресенье
    df['Month'] = df['Date'].dt.month

    # Создаем целевую переменную: 1, если Next_Close > Closed Price, иначе 0
    df['Next_Close'] = df['Closed Price'].shift(-1)
    df['Target'] = (df['Next_Close'] > df['Closed Price']).astype(int)
    df = df[:-1]  # последняя строка без next_close

    # Добавляем технические индикаторы
    df = calculate_indicators(df)
    return df

In [None]:
def create_sequences(X, y, time_steps=40):
    """
    Формирует последовательности для подачи в LSTM.
    """
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:(i+time_steps)])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

In [None]:
def build_and_train_lstm(units, dropout, batch_size, learning_rate):
    """
    Функция для оптимизации гиперпараметров с Bayesian Optimization.
    """
    model = Sequential([
        LSTM(int(units), input_shape=(TIME_STEPS, X_train.shape[2]), return_sequences=False),
        Dropout(dropout),
        Dense(25, activation='relu'),
        Dropout(dropout),
        Dense(1, activation='sigmoid')
    ])

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    history = model.fit(X_train, y_train,
                        epochs=50, batch_size=int(batch_size),
                        validation_data=(X_val, y_val),
                        verbose=0)

    val_loss = min(history.history['val_loss'])
    return -val_loss

In [None]:
data_folder = "extracted_data4"
csv_files = glob.glob(os.path.join(data_folder, "*.xlsx"))

TIME_STEPS = 45

results = {}

In [None]:
mas_accuracy=[]
mas_name=[]
for file in csv_files:
    ticker = os.path.splitext(os.path.basename(file))[0]
    print(f"\nОбработка данных для: {ticker}")

    # Предобработка данных
    df = preprocess_data(file)

    # Выбираем признаки:
    # Исключаем Opened и Closed Price
    # Используем: Change, Trading Volume (shr), Trading Value (KHR), High, Low, EMA_10, RSI_14, MACD, Weekday, Month
    feature_columns = ['Change', 'Trading Volume (shr)', 'Trading Value (KHR)',
                       'High', 'Low', 'EMA_10', 'RSI_14', 'MACD', 'Weekday', 'Month']
    features = df[feature_columns]
    target = df['Target']

    # Масштабирование признаков
    scaler = MinMaxScaler()
    features_scaled = scaler.fit_transform(features)

    # Формирование последовательностей для LSTM
    X_seq, y_seq = create_sequences(features_scaled, target.values, time_steps=TIME_STEPS)

    # Разделение данных по времени:
    # первые 70% - обучение, следующие 15% - валидация, последние 15% - тест
    train_size = int(0.7 * len(X_seq))
    val_size = int(0.15 * len(X_seq))

    X_train, y_train = X_seq[:train_size], y_seq[:train_size]
    X_val, y_val = X_seq[train_size:train_size+val_size], y_seq[train_size:train_size+val_size]
    X_test, y_test = X_seq[train_size+val_size:], y_seq[train_size+val_size:]

    print(f"Размеры выборок: Train {X_train.shape}, Val {X_val.shape}, Test {X_test.shape}")

    # Построение LSTM модели
    model = Sequential()
    model.add(LSTM(128, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=False))
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.summary()

    # Обучение модели с использованием ранней остановки
    early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    history = model.fit(X_train, y_train,
                        epochs=100,
                        batch_size=32,
                        validation_data=(X_val, y_val),
                        callbacks=[early_stop],
                        verbose=1)

    # Оценка модели на тестовой выборке
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    print(f"{ticker}: Test Loss = {loss:.4f}, Test Accuracy = {accuracy:.4f}")
    mas_accuracy.append(accuracy)
    mas_name.append(ticker)
    # Графики обучения: loss и accuracy
    plt.figure(figsize=(12,5))
    plt.subplot(1,2,1)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Val Loss')
    plt.title(f"{ticker} - Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()

    plt.subplot(1,2,2)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Val Accuracy')
    plt.title(f"{ticker} - Accuracy")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.legend()

    plt.show()

    results[ticker] = {'model': model, 'history': history.history, 'loss': loss, 'accuracy': accuracy}

In [None]:
mas_name,mas_accuracy

In [None]:
import plotly.graph_objects as go
import numpy as np

# Данные
stocks = mas_name  # Названия акций
accuracy = mas_accuracy
# Вычисление среднего значения
mean_accuracy = np.mean(accuracy)

# Создание гистограммы
fig = go.Figure()
fig.add_trace(go.Bar(
    x=stocks,
    y=accuracy,
    text=[f'{val:.2f}' for val in accuracy],
    textposition='outside',
    marker_color='royalblue',
    name='Accuracy'
))

# Добавление средней линии
fig.add_trace(go.Scatter(
    x=stocks,
    y=[mean_accuracy] * len(stocks),
    mode='lines',
    line=dict(color='red', width=2, dash='dash'),
    text=[f'{mean_accuracy:.2f}'] * len(stocks),
    textposition='top right',
    name='Mean Accuracy'
))

# Настройки макета
fig.update_layout(
    title='Stock Accuracy Comparison',
    xaxis_title='Stock',
    yaxis_title='Accuracy',
    yaxis=dict(range=[min(accuracy) - 0.05, max(accuracy) + 0.05]),
    bargap=0.3,
    template='plotly_white',
    width=700,
    height=500
)

fig.show()