# Time Series

---
Author: Anatoliy Durkin

Updated: 18.05.2025

---
В данном ноутбуке рассмотрены методы работы с временными рядами

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
df_train = pd.read_csv('time_series_train.csv')
df_test = pd.read_csv('time_series_test.csv')

In [None]:
df_train

In [None]:
df_test

In [None]:
df_train.info()

## Работа с датой

In [None]:
pd.to_datetime('2025-02-15T12:30:45', format='%Y-%m-%dT%H:%M:%S')

In [None]:
pd.to_datetime('2025-02-15T12:30:45', format='%Y-%m-%dT%H:%M:%S') + pd.Timedelta(days=3)

In [None]:
pd.to_datetime('2025-02-15 12:30:45') - pd.to_datetime('2025-02-11 22:10:15')

In [None]:
pd.to_datetime(df_train['Date'], format='%Y-%m-%d')

In [None]:
df_train['Date'] = pd.to_datetime(df_train['Date'], format='%Y-%m-%d')

In [None]:
df_train.info()

In [None]:
df_train.set_index('Date', inplace=True)

In [None]:
df_train.head()

In [None]:
df_test['Date'] = pd.to_datetime(df_test['Date'], format='%Y-%m-%d')
df_test.set_index('Date', inplace=True)

In [None]:
df_train.index.is_monotonic_increasing

In [None]:
df_train.resample('1w').sum()

In [None]:
df1, df2 = train_test_split(df_train, test_size=0.2, shuffle=False)

In [None]:
df1

In [None]:
df2

## Данные

Выберем магазин и товар, которые в дальнейшем будем рассматривать.

In [None]:
df_train[(df_train['store']==0)&(df_train['product']==0)]['number_sold'].plot()

In [None]:
train = df_train[(df_train['store']==0)&(df_train['product']==0)].drop(['store','product'], axis=1)
test = df_test[(df_test['store']==0)&(df_test['product']==0)].drop(['store','product'], axis=1)

Выберите также второй набор, с которым в дальнейшем будете выполнять манипуляции самостоятельно. Выберите интресный вариант.

In [None]:
# Ваш код
...

In [None]:
plt.plot(train)
plt.plot(test)

## Сглаживание

In [None]:
train.rolling(window=7).mean().plot()

In [None]:
plt.figure(figsize=(12,5))
plt.plot(train)
plt.plot(train.rolling(window=7).mean())
plt.plot(train.rolling(window=30).mean())

In [None]:
train.rolling(window=30).mean().plot()

In [None]:
train.rolling(window=365).mean().plot()

## Сезонность

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
seasonal_decompose(train, model = 'additive', period = 365).plot()
plt.show()

## Лаги

In [None]:
train.head()

In [None]:
train.shift(1).head()

In [None]:
train.shift(-1).head()

In [None]:
for i in range(1,7):
    string = f'lag{i}'
    train[string] = train['number_sold'].shift(i)
train.head(10)

In [None]:
sns.heatmap(train.corr(), annot=True)

In [None]:
train.drop(list(train.columns[1:]), axis = 1, inplace = True)

In [None]:
train['lag1'] = train['number_sold'].shift(1, axis = 0)
train['lag7'] = train['number_sold'].shift(7, axis = 0)
train['lag30'] = train['number_sold'].shift(30, axis = 0)
train['lag365'] = train['number_sold'].shift(365, axis = 0)

sns.heatmap(train.corr(), annot = True)

In [None]:
train.drop(list(train.columns[1:]), axis = 1, inplace = True)

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [None]:
plot_acf(train, lags = 30)
plt.grid(True)
plt.show()

In [None]:
plot_pacf(train)
plt.xticks(range(0,40,7))
plt.grid(True)
plt.show()

In [None]:
plot_acf(train, lags = 365)
plt.xticks(range(0,366, 28))

plt.grid(True)
plt.show()

## Прогнозирование

In [None]:
# !pip install skforecast

In [None]:
from skforecast.recursive import ForecasterRecursive
from skforecast.model_selection import grid_search_forecaster
from skforecast.model_selection import backtesting_forecaster

from sklearn.linear_model import LinearRegression

In [None]:
train.asfreq('D')

In [None]:
forecaster = ForecasterRecursive(
    regressor = LinearRegression(),
    lags = 365)

forecaster.fit(train['number_sold'])
forecaster

In [None]:
steps = len(test)

y_pred = forecaster.predict(steps = steps)
y_pred.head()

In [None]:
y_pred.index=test.index

In [None]:
plt.plot(train)
plt.plot(test)
plt.plot(y_pred)