# Импорт данных

На примере прогноза ИПЦ в процентах к соответствующему периоду предыдущего года

In [1]:
import pandas as pd
from datetime import date

In [67]:
df = pd.read_csv('Индекс потребительских цен, Россия — Диаграмма.csv', sep=';')

df.columns = ['uuid', 'date', 'goal']
df.drop(columns=['uuid'], inplace=True)
df.date = pd.to_datetime(df.date, format='%d.%m.%Y').dt.date
df = df.sort_values(by='date', ascending=False)

df.head(3)

Unnamed: 0,date,goal
302,2024-04-30,107.8
301,2024-03-31,107.7
300,2024-02-29,107.7


# Предобработка

Оставим только значения с 2014 года, т.к. до этого времени у ЦБ были другой таргет, санкций на РФ было меньше, экономика была другой и прочее

In [68]:
df = df[df.date > date(year=2015, month=1, day=1)]

Приведем к значениям от 0 до 1, чтобы улучшить качество нейросети. Выберем для этого один из двух нормализаторов

In [69]:
from sklearn.preprocessing import StandardScaler

In [70]:
z_transformer = StandardScaler().fit(df[['goal']])
df['z_goal'] = z_transformer.transform(df[['goal']])

df.head(3)

Unnamed: 0,date,goal,z_goal
302,2024-04-30,107.8,0.161025
301,2024-03-31,107.7,0.138869
300,2024-02-29,107.7,0.138869


и обратно переводим

In [71]:
z_transformer.inverse_transform(df[['z_goal']])[:3]

array([[107.8],
       [107.7],
       [107.7]])

# Создание модели

In [72]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import MeanAbsolutePercentageError
from tensorflow.keras.layers import GRU, LSTM, BatchNormalization, Dropout, Dense, TimeDistributed

## Гиперпараметры модели

In [73]:
from dataclasses import dataclass, field

In [209]:
@dataclass
class ModelConfig:
    lookback         = 6   # Сколько берем данных для прогноза
    forecast_horizon = 3   # На сколько делаем прогноз
    n_layers         = 1   # Количество слоев
    units            = 1   # Количество нейронов на каждом слое
    batch_size       = 64
    dropout_rate     = 0.2
    epochs           = 100
    
    input_shape: tuple = (lookback, 1)

## Сама модель

In [210]:
tf.compat.v2.random.set_seed(1)
model = Sequential()

for i in range(ModelConfig.n_layers):    
    model.add(
        LSTM(
            units=ModelConfig.units,
            return_sequences=True if (i != ModelConfig.n_layers - 1) else False,
            input_shape=ModelConfig.input_shape
        )
    )

model.add(Dense(units=ModelConfig.forecast_horizon, activation='linear'))

model.summary()

  super().__init__(**kwargs)


In [211]:
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='mean_squared_error',
    metrics=[MeanAbsolutePercentageError()]
)

# Обучение модели

In [77]:
import numpy as np
from typing import Iterable
from sklearn.model_selection import train_test_split

In [78]:
def prepare_rnn_data(data: Iterable, lookback: int, horizon: int) -> tuple[np.array, np.array]:
    """
    Создает батчи по lookback месяцев для иксов и по horizon месяца для игреков
    
    Также приводит к формату данных для RNN
    """
    
    data_range = range(lookback, len(data) - horizon + 1)
    
    x = np.array([data[i - lookback:i] for i in data_range])
    x = np.reshape(x, (x.shape[0], lookback, 1))

    y = np.array([data[i:i + horizon] for i in data_range])
    
    return x, y

In [212]:
X, y = prepare_rnn_data(df['z_goal'].values, ModelConfig.lookback, ModelConfig.forecast_horizon)

print(f'{X.shape = }, {y.shape = }')

X.shape = (104, 6, 1), y.shape = (104, 3)


In [213]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
x_val, y_val = x_test[-12:], y_test[-12:]

In [214]:
model.fit(
    x_train,
    y_train,
    validation_data=(x_val, y_val),
    batch_size=ModelConfig.batch_size,
    epochs=ModelConfig.epochs
)

Epoch 1/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 158ms/step - loss: 0.7722 - mean_absolute_percentage_error: 93.0236 - val_loss: 1.4446 - val_mean_absolute_percentage_error: 92.7510
Epoch 2/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.7708 - mean_absolute_percentage_error: 93.0483 - val_loss: 1.4420 - val_mean_absolute_percentage_error: 92.6100
Epoch 3/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.7696 - mean_absolute_percentage_error: 93.0442 - val_loss: 1.4393 - val_mean_absolute_percentage_error: 92.4431
Epoch 4/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.7685 - mean_absolute_percentage_error: 93.0373 - val_loss: 1.4365 - val_mean_absolute_percentage_error: 92.2660
Epoch 5/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.7674 - mean_absolute_percentage_error: 93.0297 - val_loss: 1.4338 - val_mean_abso

<keras.src.callbacks.history.History at 0x7f6169367e10>

# Качество модели

возьмем предпоследние 6 месяцев

In [215]:
results = model.evaluate(x_test, y_test, batch_size=128)

print(f'test loss: {results[0]:0.2f}, test MAPE: {results[1]:0.2f}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 0.8658 - mean_absolute_percentage_error: 87.5163
test loss: 0.87, test MAPE: 87.52


In [216]:
pred_last_x = df.iloc[6:12].z_goal.values
pred_last_x

array([-0.0826886 , -0.23777918, -0.41502556, -0.61442774, -0.83598572,
       -1.0132321 ])

тогда предсказать надо будет

In [217]:
df[['date', 'goal']].iloc[3:6]

Unnamed: 0,date,goal
299,2024-01-31,107.4
298,2023-12-31,107.4
297,2023-11-30,107.5


In [218]:
x = np.array([pred_last_x])
x = np.reshape(x, (x.shape[0], ModelConfig.lookback, 1))

z_transformer.inverse_transform(model.predict(x))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step


array([[105.77394, 105.64407, 107.02989]], dtype=float32)