In [10]:
import pandas as pd
import datetime as dt
import cufflinks as cf
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.layers import LSTM, Dense,Dropout
import tensorflow as tf
from tensorflow.keras.layers import Layer
import pywt
from statsmodels.tsa.seasonal import seasonal_decompose

In [2]:
data = pd.read_csv('prices-split-adjusted.csv', parse_dates=True)
data.head()

Unnamed: 0,date,symbol,open,close,low,high,volume
0,2016-01-05,WLTW,123.43,125.839996,122.309998,126.25,2163600.0
1,2016-01-06,WLTW,125.239998,119.980003,119.940002,125.540001,2386400.0
2,2016-01-07,WLTW,116.379997,114.949997,114.93,119.739998,2489500.0
3,2016-01-08,WLTW,115.480003,116.620003,113.5,117.440002,2006300.0
4,2016-01-11,WLTW,117.010002,114.970001,114.089996,117.330002,1408600.0


In [3]:
data_symbol = data[data['symbol'] == 'WLTW'].reset_index(drop=True)
data_symbol_open = data_symbol[['date', 'open']].rename(columns={'open': 'numb'})
data_symbol_close = data_symbol[['date', 'close']].rename(columns={'close': 'numb'})
data = pd.concat([data_symbol_open, data_symbol_close], axis=0, ignore_index=True)
data = data.sort_values(by='date')

data['time'] = data.groupby('date').cumcount().apply(lambda x: '08:00:00' if x == 0 else '16:00:00')
data['date'] = pd.to_datetime(data['date'].astype(str) + ' ' + data['time'])
data.set_index('date', inplace=True)
data.drop(columns='time', inplace=True)
if not isinstance(data.index, pd.DatetimeIndex):
    data.index = pd.to_datetime(data.index)

start_date = data.index.min().floor('d')
end_date = data.index.max().floor('d')
all_dates = pd.date_range(start=start_date, end=end_date, freq='D')
all_dates_df = pd.DataFrame({
    'date': np.tile(all_dates, 2),
    'time': np.repeat(['08:00:00', '16:00:00'], len(all_dates))
})
all_dates_df['date'] = pd.to_datetime(all_dates_df['date'].astype(str) + ' ' + all_dates_df['time'])
all_dates_df.drop(columns='time', inplace=True)
all_dates_df.set_index('date', inplace=True)
full_data = all_dates_df.join(data, how='left')
full_data['numb'] = full_data['numb'].fillna(method='bfill')
full_data = full_data.sort_index()


Series.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.



In [4]:
full_data.to_csv('own_data.csv')

In [53]:
full_data = full_data.dropna()

In [54]:
full_data.head()

Unnamed: 0_level_0,numb
date,Unnamed: 1_level_1
2016-01-05 08:00:00,123.43
2016-01-05 16:00:00,125.839996
2016-01-06 08:00:00,125.239998
2016-01-06 16:00:00,119.980003
2016-01-07 08:00:00,116.379997


In [55]:
start_date = '2016-12-01'
end_date = '2016-12-30'
train = full_data[full_data.index < start_date]
test = full_data[(full_data.index >= start_date) & (full_data.index < end_date)]

In [8]:
cf.go_offline()

full_data['numb'].iplot(title='Цена', xTitle='Дата', yTitle='Цена')


DatetimeIndex.format is deprecated and will be removed in a future version. Convert using index.astype(str) or index.map(formatter) instead.



In [12]:
result = seasonal_decompose(full_data, model='additive', period=26)

seasonal_component = result.seasonal
seasonal_df = pd.DataFrame(seasonal_component)
seasonal_df.columns = ['Seasonality']
seasonal_df.iplot(title='Сезонность')

In [13]:
result = seasonal_decompose(full_data, model='additive', period=26)

# Извлечение трендовой составляющей
trend_component = result.trend
trend_df = pd.DataFrame(trend_component)
trend_df.columns = ['Trend']

# Визуализация трендовой составляющей с использованием cufflinks
trend_df.iplot(title='Тренд')


DatetimeIndex.format is deprecated and will be removed in a future version. Convert using index.astype(str) or index.map(formatter) instead.



In [15]:
from statsmodels.tsa.stattools import acf, pacf
import numpy as np
# Очистка данных от пропусков
data_clean = data['numb'].dropna()

# Вычисление ACF и PACF
lags = 26  # Для 24 часа
acf_values = acf(data_clean, nlags=lags)
pacf_values = pacf(data_clean, nlags=lags, method='ols')

# Создание DataFrame для ACF и PACF
df_acf = pd.DataFrame({'lags': np.arange(0, lags+1), 'acf': acf_values})
df_pacf = pd.DataFrame({'lags': np.arange(0, lags+1), 'pacf': pacf_values})

# Визуализация ACF
df_acf.iplot(kind='bar', x='lags', y='acf', title='ACF', xTitle='Lags', yTitle='ACF Values')

# Визуализация PACF
df_pacf.iplot(kind='bar', x='lags', y='pacf', title='PACF', xTitle='Lags', yTitle='PACF Values')

## Вейвлет разложение

In [40]:
import plotly.express as px


def wavelet_decomposition(df, n_components, wavelet='db5'):
    data = df.iloc[:, 0].values
    coeffs = pywt.wavedec(data, wavelet, level=n_components)
    return coeffs

def wavelet_reconstruction(coeffs, wavelet='db5'):
    reconstructed_signal = pywt.waverec(coeffs, wavelet)
    return reconstructed_signal

coeffs = wavelet_decomposition(full_data, n_components=5)

for i, coeff in enumerate(coeffs):
    component = pd.DataFrame(coeff, columns=['Value'])
    fig = px.line(component, title=f"Wavelet Component {i}", labels={'Value': 'Value'})
    fig.show()


n_components = 5
coeffs = wavelet_decomposition(full_data, n_components)

coeffs_no_trend_season = [
    np.zeros_like(coeffs[0]),  # Удаляем тренд
    coeffs[1],  # Оставляем высокочастотные компоненты (шум)
    coeffs[2],  # Оставляем высокочастотные компоненты (шум)
    coeffs[3],  # Удаляем сезонность
    np.zeros_like(coeffs[4]),  # Оставляем высокочастотные компоненты (шум)
    coeffs[5]   # Оставляем высокочастотные компоненты (шум)
]
time_index = full_data.index

reconstructed_signal_no_trend_season = wavelet_reconstruction(coeffs_no_trend_season)

# Визуализация восстановленного сигнала
reconstructed_df = pd.DataFrame(reconstructed_signal_no_trend_season, columns=['Value'], index=time_index)
fig = px.line(reconstructed_df, title="Signal without Trend and Seasonal Components", labels={'Value': 'Value'})
fig.show()

In [41]:
reconstructed_df.head()

Unnamed: 0_level_0,Value
date,Unnamed: 1_level_1
2016-01-05 08:00:00,6.888356
2016-01-05 16:00:00,6.579827
2016-01-06 08:00:00,4.941094
2016-01-06 16:00:00,2.200479
2016-01-07 08:00:00,0.021782


In [42]:
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_absolute_error


adata_test = adfuller(reconstructed_df)
print(f'p-value = {adata_test[1]}')
if adata_test[1] < 0.05:
    print('Ряд стационарен')
else:
    print('Ряд не стационарен')

p-value = 2.0077272360882786e-11
Ряд стационарен


In [43]:
start_date = '2016-12-01'
end_date = '2016-12-30'
train_f = reconstructed_df[reconstructed_df.index < start_date]
test_f = reconstructed_df[(reconstructed_df.index >= start_date) & (reconstructed_df.index < end_date)]

## Метод с извлечением признаков

In [30]:
import numpy as np
import pandas as pd
from tsfresh import extract_features
from tsfresh.utilities.dataframe_functions import make_forecasting_frame
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy='mean')
# 1. Подготовка данных
df_shift, y = make_forecasting_frame(full_data['numb'], kind="power", max_timeshift=10, rolling_direction=1)

# 2. Извлечение признаков
X = extract_features(df_shift, column_id="id", column_sort="time", column_value="value", impute_function=np.nanmean, show_warnings=False)


Your time stamps are not uniformly sampled, which makes rolling nonsensical in some domains.

Rolling: 100%|██████████| 20/20 [00:21<00:00,  1.06s/it]
Feature Extraction: 100%|██████████| 20/20 [01:49<00:00,  5.49s/it]


In [31]:
X.head()

Unnamed: 0,Unnamed: 1,value__variance_larger_than_standard_deviation,value__has_duplicate_max,value__has_duplicate_min,value__has_duplicate,value__sum_values,value__abs_energy,value__mean_abs_change,value__mean_change,value__mean_second_derivative_central,value__median,...,value__fourier_entropy__bins_5,value__fourier_entropy__bins_10,value__fourier_entropy__bins_100,value__permutation_entropy__dimension_3__tau_1,value__permutation_entropy__dimension_4__tau_1,value__permutation_entropy__dimension_5__tau_1,value__permutation_entropy__dimension_6__tau_1,value__permutation_entropy__dimension_7__tau_1,value__query_similarity_count__query_None__threshold_0.0,value__mean_n_absolute_max__number_of_maxima_7
id,2016-01-05 16:00:00,0.0,0.0,0.0,0.0,123.43,15234.9649,,,,123.43,...,,,,,,,,,,
id,2016-01-06 08:00:00,1.0,0.0,0.0,0.0,249.269996,31070.669493,2.409996,2.409996,,124.634998,...,-0.0,-0.0,-0.0,,,,,,,
id,2016-01-06 16:00:00,1.0,0.0,0.0,0.0,374.509994,46755.726592,1.504997,0.904999,-1.504997,125.239998,...,0.693147,0.693147,0.693147,-0.0,,,,,,
id,2016-01-07 08:00:00,1.0,0.0,0.0,0.0,494.489997,61150.927712,2.756663,-1.149999,-1.917498,124.334999,...,1.098612,1.098612,1.098612,0.693147,-0.0,,,,,
id,2016-01-07 16:00:00,1.0,0.0,0.0,0.0,610.869994,74695.231414,2.967499,-1.762501,-1.001667,123.43,...,1.098612,1.098612,1.098612,0.636514,0.693147,-0.0,,,,


In [32]:
# 3. Разделение данных на обучающую и тестовую выборку
X.dropna(axis='columns', inplace=True)
split_index = int(0.8 * len(y))
X_train, X_test = X.iloc[:split_index], X.iloc[split_index:]
y_train, y_test = y[:split_index], y[split_index:]
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

In [34]:
from sklearn.metrics import accuracy_score

def accuracy_behavior(series):
    return (series.diff() > 0).astype(int)


# 4. Моделирование
regressor = RandomForestRegressor()
regressor.fit(X_train_imputed, y_train)
y_pred = regressor.predict(X_test_imputed)

# 5. Оценка модели
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {mae}")

test_b = accuracy_behavior(y_test)
forc_b = accuracy_behavior(pd.Series(y_pred))
accuracy = accuracy_score(test_b, forc_b)
print(f'Accuracy of behavior prediction: {accuracy:.2f}')

Mean Absolute Error: 0.9706241143448355
Accuracy of behavior prediction: 0.44


In [35]:
y_test_df = pd.DataFrame({'y_test': y_test, 'y_pred': y_pred})
y_test_df.reset_index(drop=True,inplace=True)

y_test_df.iplot(title='Прогноз временных рядов', xTitle='Дата', yTitle='Цена')

## Сетка без преобразований ряда

In [56]:
train_values = train.values
test_values = test.values

def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back)]
        X.append(a)
        Y.append(dataset[i + look_back])
    return np.array(X), np.array(Y)

look_back = 10  # Примерное количество шагов во времени для обучения

# Создание данных для RNN
X_train, Y_train = create_dataset(train_values, look_back)
X_test, Y_test = create_dataset(test_values, look_back)

# Преобразование в форму, которую ожидает RNN
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [59]:
model = Sequential()

# Входной слой
model.add(SimpleRNN(65, activation='relu', input_shape=(X_train.shape[1], 1), return_sequences=True))

# Дополнительные слои
model.add(SimpleRNN(58, activation='relu', return_sequences=True))
model.add(SimpleRNN(81, activation='relu'))

# Выходной слой
model.add(Dense(1))

# Компиляция модели
model.compile(optimizer='adam', loss='mean_absolute_error')

In [70]:
model.fit(X_train, Y_train, epochs=50, batch_size=106, validation_data=(X_test, Y_test))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1bb223e2500>

In [71]:
predictions = model.predict(X_test)



In [63]:
predictions.shape

(48, 1)

In [65]:
Y_test.shape

(48, 1)

In [72]:
df_test = pd.DataFrame({
    'Y_test': Y_test.ravel(),
    'Predictions': predictions.ravel()
})
# Отобразить на графике
df_test.iplot(title="Прогноз vs Реальные значения", xTitle="Время", yTitle="Значение", theme="solar")

In [73]:
from sklearn.metrics import mean_absolute_error
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
mape = mean_absolute_percentage_error(Y_test, predictions)
mae = mean_absolute_error(Y_test, predictions)

print(f"MAPE: {mape}%")
print(f"MAE: {mae}")

MAPE: 0.8078601992803037%
MAE: 1.001531767730713


## Обучение Optuna

In [69]:
import optuna
from tensorflow.keras.callbacks import EarlyStopping

def create_model(trial):
    model = Sequential()
    
    # Гиперпараметры
    n_units_1 = trial.suggest_int('n_units_1', 10, 100)
    n_units_2 = trial.suggest_int('n_units_2', 10, 100)
    n_units_3 = trial.suggest_int('n_units_3', 10, 100)
    activation = trial.suggest_categorical('activation', ['relu', 'tanh'])
    
    # Входной слой
    model.add(SimpleRNN(n_units_1, activation=activation, input_shape=(X_train.shape[1], 1), return_sequences=True))

    # Дополнительные слои
    model.add(SimpleRNN(n_units_2, activation=activation, return_sequences=True))
    model.add(SimpleRNN(n_units_3, activation=activation))
    
    # Выходной слой
    model.add(Dense(1))

    # Компиляция модели
    model.compile(optimizer='adam', loss='mean_absolute_error')
    
    return model

def objective(trial):
    # Создание модели
    model = create_model(trial)
    
    # Обучение модели
    early_stopping = EarlyStopping(monitor='val_loss', patience=5)
    history = model.fit(
        X_train, Y_train,
        epochs=50,
        batch_size=trial.suggest_int('batch_size', 16, 128),
        validation_data=(X_test, Y_test),
        callbacks=[early_stopping],
        verbose=0
    )
    
    # Оценка модели
    Y_pred = model.predict(X_test)
    score = mean_absolute_error(Y_test, Y_pred)
    
    return score


study = optuna.create_study(direction='minimize')  # Минимизируем MAE
study.optimize(objective, n_trials=15)  # Количество попыток (trials)

# Вывод наилучших гиперпараметров
print("Best hyperparameters:", study.best_params)
print("Best score:", study.best_value)

[I 2024-08-16 15:35:45,220] A new study created in memory with name: no-name-027b703d-f909-4e48-95f8-eb8fb8cd7914




[I 2024-08-16 15:35:52,649] Trial 0 finished with value: 1.6647828617553708 and parameters: {'n_units_1': 85, 'n_units_2': 39, 'n_units_3': 12, 'activation': 'relu', 'batch_size': 107}. Best is trial 0 with value: 1.6647828617553708.




[I 2024-08-16 15:36:02,530] Trial 1 finished with value: 2.692412534556071 and parameters: {'n_units_1': 63, 'n_units_2': 13, 'n_units_3': 79, 'activation': 'relu', 'batch_size': 58}. Best is trial 0 with value: 1.6647828617553708.




[I 2024-08-16 15:36:11,011] Trial 2 finished with value: 1.0779124557393394 and parameters: {'n_units_1': 65, 'n_units_2': 58, 'n_units_3': 81, 'activation': 'relu', 'batch_size': 106}. Best is trial 2 with value: 1.0779124557393394.




[I 2024-08-16 15:36:17,361] Trial 3 finished with value: 2.361126105784098 and parameters: {'n_units_1': 89, 'n_units_2': 64, 'n_units_3': 57, 'activation': 'relu', 'batch_size': 122}. Best is trial 2 with value: 1.0779124557393394.




[I 2024-08-16 15:36:34,413] Trial 4 finished with value: 100.31748501187133 and parameters: {'n_units_1': 78, 'n_units_2': 74, 'n_units_3': 48, 'activation': 'tanh', 'batch_size': 127}. Best is trial 2 with value: 1.0779124557393394.




[I 2024-08-16 15:36:41,730] Trial 5 finished with value: 2.815428413208007 and parameters: {'n_units_1': 94, 'n_units_2': 12, 'n_units_3': 89, 'activation': 'relu', 'batch_size': 41}. Best is trial 2 with value: 1.0779124557393394.




[I 2024-08-16 15:37:17,011] Trial 6 finished with value: 104.9549159181671 and parameters: {'n_units_1': 20, 'n_units_2': 97, 'n_units_3': 16, 'activation': 'tanh', 'batch_size': 42}. Best is trial 2 with value: 1.0779124557393394.




[I 2024-08-16 15:37:23,143] Trial 7 finished with value: 1.2413088691813146 and parameters: {'n_units_1': 34, 'n_units_2': 17, 'n_units_3': 79, 'activation': 'relu', 'batch_size': 99}. Best is trial 2 with value: 1.0779124557393394.




[I 2024-08-16 15:37:36,885] Trial 8 finished with value: 1.2056149285176596 and parameters: {'n_units_1': 23, 'n_units_2': 47, 'n_units_3': 17, 'activation': 'relu', 'batch_size': 72}. Best is trial 2 with value: 1.0779124557393394.




[I 2024-08-16 15:37:53,312] Trial 9 finished with value: 2.816281633829752 and parameters: {'n_units_1': 50, 'n_units_2': 93, 'n_units_3': 65, 'activation': 'relu', 'batch_size': 121}. Best is trial 2 with value: 1.0779124557393394.




[I 2024-08-16 15:39:12,061] Trial 10 finished with value: 39.7639261854248 and parameters: {'n_units_1': 61, 'n_units_2': 36, 'n_units_3': 38, 'activation': 'tanh', 'batch_size': 17}. Best is trial 2 with value: 1.0779124557393394.




[I 2024-08-16 15:39:20,294] Trial 11 finished with value: 1.2977738697204588 and parameters: {'n_units_1': 10, 'n_units_2': 48, 'n_units_3': 32, 'activation': 'relu', 'batch_size': 84}. Best is trial 2 with value: 1.0779124557393394.




[I 2024-08-16 15:39:26,475] Trial 12 finished with value: 7.692510125295004 and parameters: {'n_units_1': 38, 'n_units_2': 63, 'n_units_3': 100, 'activation': 'relu', 'batch_size': 78}. Best is trial 2 with value: 1.0779124557393394.




[I 2024-08-16 15:39:36,427] Trial 13 finished with value: 1.3126336825459797 and parameters: {'n_units_1': 72, 'n_units_2': 79, 'n_units_3': 69, 'activation': 'relu', 'batch_size': 92}. Best is trial 2 with value: 1.0779124557393394.




[I 2024-08-16 15:40:15,662] Trial 14 finished with value: 97.81179610615538 and parameters: {'n_units_1': 47, 'n_units_2': 46, 'n_units_3': 28, 'activation': 'tanh', 'batch_size': 57}. Best is trial 2 with value: 1.0779124557393394.


Best hyperparameters: {'n_units_1': 65, 'n_units_2': 58, 'n_units_3': 81, 'activation': 'relu', 'batch_size': 106}
Best score: 1.0779124557393394


## Сеть LSTM  с преобразованными данными

In [74]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(full_data[['numb']])

def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back), 0]
        X.append(a)
        Y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(Y)

look_back = 10  # Например, используем последние 10 значений для прогнозирования следующего
X, Y = create_dataset(scaled_data, look_back)

X_dif = X[:-1] - X[1:]
X_new = np.hstack([X[:-1],X_dif])
Y = Y[:-1]

X = np.reshape(X_new, (X_new.shape[0], X_new.shape[1], 1))
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, shuffle=False)

In [75]:
from tensorflow.keras.layers import LSTM, Dense

model = Sequential()

# Входной слой LSTM
model.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(X_train.shape[1], 1)))

# Дополнительные слои (если необходимо)
model.add(LSTM(50, return_sequences=True))
model.add(LSTM(50))

# Выходной слой
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_absolute_error')

model.fit(X_train, Y_train, epochs=100, batch_size=32, validation_data=(X_test, Y_test))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x1bb21599f30>

In [76]:
predictions = model.predict(X_test)
predicted_power = scaler.inverse_transform(predictions) 

y_test_inv = scaler.inverse_transform(Y_test.reshape(-1, 1))
predictions_inv = scaler.inverse_transform(predictions)
df_test = pd.DataFrame({
    'Y_test': y_test_inv.ravel(),
    'Predictions': predictions_inv.ravel()
})
# Отобразить на графике
df_test.iplot(title="Прогноз vs Реальные значения", xTitle="Время", yTitle="Значение", theme="solar")



In [77]:
from sklearn.metrics import mean_absolute_error

def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
mape = mean_absolute_percentage_error(y_test_inv, predictions_inv)
mae = mean_absolute_error(y_test_inv, predictions_inv)

print(f"MAPE: {mape}%")
print(f"MAE: {mae}")

MAPE: 0.4227327439565435%
MAE: 0.5219136255634031


In [79]:
test_b = accuracy_behavior(df_test.Y_test)
forc_b = accuracy_behavior(df_test.Predictions)
accuracy = accuracy_score(test_b, forc_b)
print(f'Accuracy of behavior prediction: {accuracy:.2f}')

Accuracy of behavior prediction: 0.97


## GRU с преобразованными данными

In [80]:
from tensorflow.keras.layers import GRU, Dense

model = Sequential()

# Входной слой GRU
model.add(GRU(50, activation='relu', return_sequences=True, input_shape=(X_train.shape[1], 1)))

# Дополнительные слои (если необходимо)
model.add(GRU(50, return_sequences=True))
model.add(GRU(50))

# Выходной слой
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error')

model.fit(X_train, Y_train, epochs=100, batch_size=32, validation_data=(X_test, Y_test))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x1bb2010d960>

In [81]:
predictions = model.predict(X_test)
predicted_power = scaler.inverse_transform(predictions) 

y_test_inv = scaler.inverse_transform(Y_test.reshape(-1, 1))
predictions_inv = scaler.inverse_transform(predictions)
df_test = pd.DataFrame({
    'Y_test': y_test_inv.ravel(),
    'Predictions': predictions_inv.ravel()
})
# Отобразить на графике
df_test.iplot(title="Прогноз vs Реальные значения", xTitle="Время", yTitle="Значение", theme="solar")



In [82]:
from sklearn.metrics import mean_absolute_error

def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
mape = mean_absolute_percentage_error(y_test_inv, predictions_inv)
mae = mean_absolute_error(y_test_inv, predictions_inv)

print(f"MAPE: {mape}%")
print(f"MAE: {mae}")

MAPE: 0.07606403316683717%
MAE: 0.09318618200939606


In [83]:
test_b = accuracy_behavior(df_test.Y_test)
forc_b = accuracy_behavior(df_test.Predictions)
accuracy = accuracy_score(test_b, forc_b)
print(f'Accuracy of behavior prediction: {accuracy:.2f}')

Accuracy of behavior prediction: 0.99


## AttentionMechanism

In [84]:
class AttentionMechanism(Layer):
    def __init__(self, return_sequences=True):
        super(AttentionMechanism, self).__init__()
        self.return_sequences = return_sequences

    def build(self, input_shape):
        self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
                               initializer="normal")
        self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
                               initializer="zeros")

    def call(self, x):
        e = tf.nn.tanh(tf.matmul(x,self.W)+self.b)
        a = tf.nn.softmax(e, axis=1)
        output = x*a

        if self.return_sequences:
            return output

        return tf.reduce_sum(output, axis=1)

In [85]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

model = Sequential()

model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(AttentionMechanism(return_sequences=True))  # Внимание после LSTM
model.add(LSTM(50, return_sequences=False))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_absolute_error')

model.fit(X_train, Y_train, epochs=100, batch_size=32, validation_data=(X_test, Y_test))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x1bb209b2830>

In [86]:
predictions = model.predict(X_test)
predicted_power = scaler.inverse_transform(predictions) 

y_test_inv = scaler.inverse_transform(Y_test.reshape(-1, 1))
predictions_inv = scaler.inverse_transform(predictions)
df_test = pd.DataFrame({
    'Y_test': y_test_inv.ravel(),
    'Predictions': predictions_inv.ravel()
})
# Отобразить на графике
df_test.iplot(title="Прогноз vs Реальные значения", xTitle="Время", yTitle="Значение", theme="solar")



In [87]:
from sklearn.metrics import mean_absolute_error

def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
mape = mean_absolute_percentage_error(y_test_inv, predictions_inv)
mae = mean_absolute_error(y_test_inv, predictions_inv)

print(f"MAPE: {mape}%")
print(f"MAE: {mae}")

MAPE: 0.9107821233510035%
MAE: 1.1144676652644225


In [88]:
test_b = accuracy_behavior(df_test.Y_test)
forc_b = accuracy_behavior(df_test.Predictions)
accuracy = accuracy_score(test_b, forc_b)
print(f'Accuracy of behavior prediction: {accuracy:.2f}')

Accuracy of behavior prediction: 0.38


## Лучше всего справились модели GRU и LSTM