# Импорт данных

На примере прогноза ИПЦ в процентах к соответствующему периоду предыдущего года

In [1]:
import pandas as pd
from datetime import date

In [67]:
df = pd.read_csv('Индекс потребительских цен, Россия — Диаграмма.csv', sep=';')

df.columns = ['uuid', 'date', 'goal']
df.drop(columns=['uuid'], inplace=True)
df.date = pd.to_datetime(df.date, format='%d.%m.%Y').dt.date
df = df.sort_values(by='date', ascending=False)

df.head(3)

Unnamed: 0,date,goal
302,2024-04-30,107.8
301,2024-03-31,107.7
300,2024-02-29,107.7


# Предобработка

Оставим только значения с 2014 года, т.к. до этого времени у ЦБ были другой таргет, санкций на РФ было меньше, экономика была другой и прочее

In [68]:
df = df[df.date > date(year=2015, month=1, day=1)]

Приведем к значениям от 0 до 1, чтобы улучшить качество нейросети. Выберем для этого один из двух нормализаторов

In [69]:
from sklearn.preprocessing import StandardScaler

In [70]:
z_transformer = StandardScaler().fit(df[['goal']])
df['z_goal'] = z_transformer.transform(df[['goal']])

df.head(3)

Unnamed: 0,date,goal,z_goal
302,2024-04-30,107.8,0.161025
301,2024-03-31,107.7,0.138869
300,2024-02-29,107.7,0.138869


и обратно переводим

In [71]:
z_transformer.inverse_transform(df[['z_goal']])[:3]

array([[107.8],
       [107.7],
       [107.7]])

# Создание модели

In [72]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import MeanAbsolutePercentageError
from tensorflow.keras.layers import GRU, LSTM, BatchNormalization, Dropout, Dense, TimeDistributed

## Гиперпараметры модели

In [73]:
from dataclasses import dataclass, field

In [153]:
@dataclass
class ModelConfig:
    lookback         = 6   # сколько берем данных для прогноза
    forecast_horizon = 3   # на сколько делаем прогноз
    units            = 2
    batch_size       = 64
    dropout_rate     = 0.2
    epochs           = 250
    
    input_shape: tuple = (lookback, 1)

## Сама модель

In [154]:
tf.compat.v2.random.set_seed(1)
model = Sequential()

model.add(LSTM(units=ModelConfig.units, return_sequences=True, input_shape=ModelConfig.input_shape))
model.add(LSTM(units=ModelConfig.units, return_sequences=False, input_shape=ModelConfig.input_shape))
model.add(Dense(units=ModelConfig.forecast_horizon, activation='linear'))

model.summary()

  super().__init__(**kwargs)


In [156]:
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='mean_squared_error',
    metrics=[MeanAbsolutePercentageError()]
)

# Обучение модели

In [77]:
import numpy as np
from typing import Iterable
from sklearn.model_selection import train_test_split

In [78]:
def prepare_rnn_data(data: Iterable, lookback: int, horizon: int) -> tuple[np.array, np.array]:
    """
    Создает батчи по lookback месяцев для иксов и по horizon месяца для игреков
    
    Также приводит к формату данных для RNN
    """
    
    data_range = range(lookback, len(data) - horizon + 1)
    
    x = np.array([data[i - lookback:i] for i in data_range])
    x = np.reshape(x, (x.shape[0], lookback, 1))

    y = np.array([data[i:i + horizon] for i in data_range])
    
    return x, y

In [157]:
X, y = prepare_rnn_data(df['z_goal'].values, ModelConfig.lookback, ModelConfig.forecast_horizon)

print(f'{X.shape = }, {y.shape = }')

X.shape = (104, 6, 1), y.shape = (104, 3)


In [158]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
x_val, y_val = x_test[-12:], y_test[-12:]

In [159]:
model.fit(
    x_train,
    y_train,
    validation_data=(x_val, y_val),
    batch_size=ModelConfig.batch_size,
    epochs=ModelConfig.epochs
)

Epoch 1/250
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 231ms/step - loss: 0.9127 - mean_absolute_percentage_error: 100.8253 - val_loss: 1.6737 - val_mean_absolute_percentage_error: 98.8765
Epoch 2/250
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.9115 - mean_absolute_percentage_error: 100.7719 - val_loss: 1.6717 - val_mean_absolute_percentage_error: 98.7543
Epoch 3/250
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.9109 - mean_absolute_percentage_error: 100.6872 - val_loss: 1.6696 - val_mean_absolute_percentage_error: 98.6110
Epoch 4/250
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.9103 - mean_absolute_percentage_error: 100.6020 - val_loss: 1.6675 - val_mean_absolute_percentage_error: 98.4595
Epoch 5/250
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 0.9098 - mean_absolute_percentage_error: 100.5188 - val_loss: 1.6653 - val_mean

<keras.src.callbacks.history.History at 0x7f6170a12d90>

# Качество модели

возьмем предпоследние 6 месяцев

In [149]:
results = model.evaluate(x_test, y_test, batch_size=128)

print(f'test loss: {results[0]:0.2f}, test MAPE: {results[1]:0.2f}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 1.2551 - mean_absolute_percentage_error: 105.0614
test loss: 1.26, test MAPE: 105.06


In [150]:
pred_last_x = df.iloc[6:12].z_goal.values
pred_last_x

array([-0.0826886 , -0.23777918, -0.41502556, -0.61442774, -0.83598572,
       -1.0132321 ])

тогда предсказать надо будет

In [151]:
df[['date', 'goal']].iloc[3:6]

Unnamed: 0,date,goal
299,2024-01-31,107.4
298,2023-12-31,107.4
297,2023-11-30,107.5


In [152]:
x = np.array([pred_last_x])
x = np.reshape(x, (x.shape[0], ModelConfig.lookback, 1))

z_transformer.inverse_transform(model.predict(x))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step


array([[107.42727 , 107.303024, 107.37627 ]], dtype=float32)