# Импорт данных

На примере прогноза ИПЦ в процентах к соответствующему периоду предыдущего года

In [1]:
import pandas as pd
from datetime import date

In [31]:
df = pd.read_csv('Индекс потребительских цен, Россия — Диаграмма.csv', sep=';')

df.columns = ['uuid', 'date', 'goal']
df.drop(columns=['uuid'], inplace=True)
df.date = pd.to_datetime(df.date, format='%d.%m.%Y').dt.date
df = df.sort_values(by='date', ascending=False)

df.head(3)

Unnamed: 0,date,goal
302,2024-04-30,107.8
301,2024-03-31,107.7
300,2024-02-29,107.7


# Предобработка

Оставим только значения с 2014 года, т.к. до этого времени у ЦБ были другой таргет, санкций на РФ было меньше, экономика была другой и прочее

In [48]:
df = df[df.date > date(year=2015, month=1, day=1)]

Приведем к значениям от 0 до 1, чтобы улучшить качество нейросети. Выберем для этого один из двух нормализаторов

In [35]:
from sklearn.preprocessing import StandardScaler

In [49]:
z_transformer = StandardScaler().fit(df[['goal']])
df['z_goal'] = z_transformer.transform(df[['goal']])

df.head(3)

Unnamed: 0,date,goal,z_goal
302,2024-04-30,107.8,0.161025
301,2024-03-31,107.7,0.138869
300,2024-02-29,107.7,0.138869


и обратно переводим

In [50]:
z_transformer.inverse_transform(df[['z_goal']])[:3]

array([[107.8],
       [107.7],
       [107.7]])

# Создание модели

In [58]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.metrics import MeanAbsolutePercentageError
from tensorflow.keras.layers import GRU, BatchNormalization, Dropout, Dense, TimeDistributed

## Гиперпараметры модели

In [59]:
from dataclasses import dataclass, field

In [76]:
@dataclass
class ModelConfig:
    lookback         = 6   # сколько берем данных для прогноза
    forecast_horizon = 3   # на сколько делаем прогноз
    batch_size       = 256
    n_layers         = 2
    dropout_rate     = 0.2
    epochs           = 100
    
    input_shape: tuple = (lookback, 1)

## Сама модель

In [77]:
model = Sequential()

for _ in range(ModelConfig.n_layers):
    model.add(GRU(units=ModelConfig.batch_size, return_sequences=True, input_shape=ModelConfig.input_shape))
    model.add(BatchNormalization())
    model.add(Dropout(ModelConfig.dropout_rate))

model.add(GRU(units=ModelConfig.batch_size, return_sequences=False))
model.add(BatchNormalization())
model.add(Dropout(ModelConfig.dropout_rate))

# model.add(Dense(units=32, activation='relu'))
model.add(Dense(units=ModelConfig.forecast_horizon, activation='relu'))

model.summary()

  super().__init__(**kwargs)


In [78]:
model.compile(optimizer='adam', loss='mean_squared_error', metrics=[MeanAbsolutePercentageError()])

# Обучение модели

In [79]:
import numpy as np
from typing import Iterable
from sklearn.model_selection import train_test_split

In [80]:
def prepare_rnn_data(data: Iterable, lookback: int, horizon: int) -> tuple[np.array, np.array]:
    """
    Создает батчи по lookback месяцев для иксов и по horizon месяца для игреков
    
    Также приводит к формату данных для RNN
    """
    
    data_range = range(lookback, len(data) - horizon + 1)
    
    x = np.array([data[i - lookback:i] for i in data_range])
    x = np.reshape(x, (x.shape[0], lookback, 1))

    y = np.array([data[i:i + horizon] for i in data_range])
    
    return x, y

In [81]:
X, y = prepare_rnn_data(df['z_goal'].values, ModelConfig.lookback, ModelConfig.forecast_horizon)

print(f'{X.shape = }, {y.shape = }')

X.shape = (104, 6, 1), y.shape = (104, 3)


In [84]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
x_val, y_val = x_test[10:], y_test[10:]

In [85]:
model.fit(
    x_train,
    y_train,
    validation_data=(x_val, y_val),
    batch_size=ModelConfig.batch_size,
    epochs=ModelConfig.epochs
)

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step - loss: 0.5463 - mean_absolute_percentage_error: 108.2552 - val_loss: 1.8067 - val_mean_absolute_percentage_error: 100.0000
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - loss: 0.5483 - mean_absolute_percentage_error: 89.5840 - val_loss: 1.8037 - val_mean_absolute_percentage_error: 99.9501
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - loss: 0.6178 - mean_absolute_percentage_error: 107.1767 - val_loss: 1.7846 - val_mean_absolute_percentage_error: 99.7016
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step - loss: 0.5649 - mean_absolute_percentage_error: 95.6777 - val_loss: 1.7524 - val_mean_absolute_percentage_error: 99.2648
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step - loss: 0.6467 - mean_absolute_percentage_error: 103.9692 - val_loss: 1.7419 - val_mean_

<keras.src.callbacks.history.History at 0x7fa2b6e4a750>

# Качество модели

возьмем предпоследние 6 месяцев

In [86]:
pred_last_x = df.iloc[6:12].z_goal.values
pred_last_x

array([-0.0826886 , -0.23777918, -0.41502556, -0.61442774, -0.83598572,
       -1.0132321 ])

тогда последними будут

In [87]:
df.iloc[3:6]

Unnamed: 0,date,goal,z_goal
299,2024-01-31,107.4,0.072402
298,2023-12-31,107.4,0.072402
297,2023-11-30,107.5,0.094558


In [88]:
x = np.array([pred_last_x])
x = np.reshape(x, (x.shape[0], ModelConfig.lookback, 1))
x

array([[[-0.0826886 ],
        [-0.23777918],
        [-0.41502556],
        [-0.61442774],
        [-0.83598572],
        [-1.0132321 ]]])

In [89]:
z_transformer.inverse_transform(model.predict(x))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 297ms/step


array([[107.07321, 107.07321, 107.07321]], dtype=float32)