In [1]:
# Импорт необходимых библиотек
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt



  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [2]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
from prophet import Prophet
import xgboost as xgb

In [3]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor

In [21]:
# 1. Загрузка очищенных данных
df = pd.read_csv('processed_output.csv', parse_dates=['pickup_date'])
df['ds'] = df['pickup_date'] + pd.to_timedelta(df['pickup_hour'], unit='h')
df = df.set_index('ds')
# Предполагаем, что ваш исходный DataFrame называется df

# Список колонок, которые нужно удалить
cols_to_drop = [
    'trip_time',
    'sales_tax',
    'congestion_surcharge',
    'driver_pay',
    'cbd_congestion_fee',
    'fare_amount',
    'extra',
    'total_amount',
    'passenger_count',
    'trip_distance',
    'RatecodeID',
    'payment_type',
    'dropoff_location_id',
    'WT03'
]

# Создаём новую версию DataFrame без этих колонок
df = df.drop(columns=cols_to_drop, errors='ignore')

# Если нужно перезаписать исходный df:
# df.drop(columns=cols_to_drop, inplace=True, errors='ignore')

# Для проверки
print("Оставшиеся колонки:", df.columns.tolist())
df.info()


Оставшиеся колонки: ['pickup_location_id', 'pickup_date', 'pickup_hour', 'ride_count', 'AWND', 'PRCP', 'SNOW', 'SNWD', 'TMAX', 'TMIN', 'WT01']
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 326843 entries, 2022-01-07 08:00:00 to 2025-04-30 15:00:00
Data columns (total 11 columns):
 #   Column              Non-Null Count   Dtype         
---  ------              --------------   -----         
 0   pickup_location_id  326843 non-null  float64       
 1   pickup_date         326843 non-null  datetime64[ns]
 2   pickup_hour         326843 non-null  int64         
 3   ride_count          326843 non-null  int64         
 4   AWND                260358 non-null  float64       
 5   PRCP                260358 non-null  float64       
 6   SNOW                260358 non-null  float64       
 7   SNWD                260358 non-null  float64       
 8   TMAX                260358 non-null  float64       
 9   TMIN                260358 non-null  float64       
 10  WT01                469

In [5]:
# Метрики вручную
def r2_score_np(y_true, y_pred):
    ss_res = np.sum((y_true - y_pred)**2)
    ss_tot = np.sum((y_true - np.mean(y_true))**2)
    return 1 - ss_res/ss_tot

def rmse_np(y_true, y_pred):
    return np.sqrt(np.mean((y_true - y_pred)**2))

In [22]:
# После того, как вы прочитали df и установили индекс ds:

# 1. Правильный таргет: агрегируем ride_count по ds
y = df['ride_count'].groupby(df.index).sum().sort_index()

# 2. Экзогенные числовые тоже агрегируем по ds
exog = df.drop(columns=['pickup_date','pickup_hour','ride_count'])
num_cols = exog.select_dtypes(include=[np.number]).columns
exog_num = exog[num_cols].groupby(exog.index).mean()

# 3. Категориальные one-hot + mean по ds (если есть)
cat_cols = exog.select_dtypes(exclude=[np.number]).columns.tolist()
if cat_cols:
    dummies = pd.get_dummies(exog[cat_cols], drop_first=True)
    exog_cat = dummies.groupby(dummies.index).mean()
    exog_agg = pd.concat([exog_num, exog_cat], axis=1).sort_index()
else:
    exog_agg = exog_num.sort_index()
    
# 5. Очистка inf и NaN
exog_agg = exog_agg.replace([np.inf, -np.inf], np.nan)
exog_agg = exog_agg.fillna(method='ffill').fillna(method='bfill').fillna(0)

# 4. Синхронизируем и смотрим
data = pd.concat([y, exog_agg], axis=1).dropna()
print("Индекс таргета:", len(y),      "->", y.index.min(), "–", y.index.max())
print("Индекс экзогенных:", len(exog_agg), "->", exog_agg.index.min(), "–", exog_agg.index.max())
print("Общих точек после dropna:", len(data))

y_sync    = data['ride_count']
exog_sync = data.drop(columns='ride_count')

Индекс таргета: 3419 -> 2008-12-31 22:00:00 – 2025-05-01 22:00:00
Индекс экзогенных: 3419 -> 2008-12-31 22:00:00 – 2025-05-01 22:00:00
Общих точек после dropna: 3419


  exog_agg = exog_agg.fillna(method='ffill').fillna(method='bfill').fillna(0)


In [7]:
exog_sync.isna().sum()

pickup_location_id      0
trip_time               0
sales_tax               0
congestion_surcharge    0
driver_pay              0
cbd_congestion_fee      0
passenger_count         0
trip_distance           0
RatecodeID              0
payment_type            0
fare_amount             0
extra                   0
total_amount            0
dropoff_location_id     0
AWND                    0
PRCP                    0
SNOW                    0
SNWD                    0
TMAX                    0
TMIN                    0
WT01                    0
WT03                    0
dtype: int64

In [23]:
# 4) Разбиение на train/test (80/20)
n = len(y)
split = int(0.8 * n)
train_y, test_y       = y.iloc[:split], y.iloc[split:]
train_exog, test_exog = exog_agg.iloc[:split], exog_agg.iloc[split:]


results = {}

In [9]:
# 5) Seasonal Naïve (исправленный)
season = 24 * 7

# full_series — ваш полный синхронный ряд y_sync или y
full_naive = y.shift(season)

# берём прогноз ровно на тестовые метки
naive_pred = full_naive.loc[test_y.index]

# убираем первые season точек, где прогноз отсутствует
mask_valid = ~naive_pred.isna()

# считаем метрики только по валидным точкам
results['SeasonalNaive'] = (
    r2_score_np(test_y[mask_valid].values, naive_pred[mask_valid].values),
    rmse_np(test_y[mask_valid].values, naive_pred[mask_valid].values)
)

In [10]:
# 6) SARIMAX с экзогенными переменными
sar = SARIMAX(
    train_y, 
    exog=train_exog,
    order=(1,0,1),
    seasonal_order=(1,1,1,24),
    enforce_stationarity=False,
    enforce_invertibility=False
).fit(disp=False)
sar_pred = sar.get_forecast(steps=len(test_y), exog=test_exog).predicted_mean
results['SARIMAX'] = (
    r2_score_np(test_y.values, sar_pred.values),
    rmse_np(test_y.values, sar_pred.values)
)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(


In [11]:
# 7) XGBoost
xgb_model = xgb.XGBRegressor(n_estimators=100, random_state=42, n_jobs=-1)
xgb_model.fit(train_exog, train_y.values)
xgb_pred = xgb_model.predict(test_exog)
results['XGBoost'] = (
    r2_score_np(test_y.values, xgb_pred),
    rmse_np(test_y.values, xgb_pred)
)  # ← вот эта скобка закрывает кортеж

In [12]:
# 8) RandomForest
rf_model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
rf_model.fit(train_exog, train_y.values)
rf_pred = rf_model.predict(test_exog)
results['RandomForest'] = (
    r2_score_np(test_y.values, rf_pred),
    rmse_np(test_y.values, rf_pred)
)


In [13]:
# 9) MLPRegressor
mlp_model = MLPRegressor(hidden_layer_sizes=(50,), max_iter=300, random_state=42)
mlp_model.fit(train_exog, train_y.values)
mlp_pred = mlp_model.predict(test_exog)
results['MLPRegressor'] = (
    r2_score_np(test_y.values, mlp_pred),
    rmse_np(test_y.values, mlp_pred)
)



In [14]:
# 10) Вывод результатов
print("\nСравнение моделей:")
print("Модель            \tR2\t\tRMSE")
for name,(r2,rmse) in results.items():
    print(f"{name:15s}\t{r2:.3f}\t\t{rmse:.3f}")


Сравнение моделей:
Модель            	R2		RMSE
SeasonalNaive  	0.704		1598.825
SARIMAX        	0.784		1366.914
XGBoost        	0.603		1852.270
RandomForest   	0.651		1736.285
MLPRegressor   	-2.269		5315.530


In [15]:
import joblib
from sklearn.experimental import enable_iterative_imputer  
from sklearn.impute import IterativeImputer
from sklearn.metrics import r2_score, mean_squared_error
# Пусть ваша лучшая модель SARIMAX называется `sar`
# Сохраним её на диск:
joblib.dump(sar, 'best_sarimax_model.joblib')

# Проверим, что файл создался и что мы можем загрузить модель:
loaded_sar = joblib.load('best_sarimax_model.joblib')

# И сделаем быстрый прогноз, чтобы убедиться, что всё работает:
pred = loaded_sar.get_forecast(steps=len(test_y), exog=test_exog).predicted_mean
print(pred.iloc[:5])

2735    10285.944651
2736    10147.706272
2737    10686.322741
2738     9411.133131
2739    10581.577896
Name: predicted_mean, dtype: float64


  return get_prediction_index(


In [24]:
n = len(y)
split = int(0.8 * n)
y_train, y_test       = y.iloc[:split], y.iloc[split:]
X_train, X_test = exog_agg.iloc[:split], exog_agg.iloc[split:]
miss_ratio = X_train.isna().mean()
drop_feats = miss_ratio[miss_ratio > 0.6].index.tolist()
X_train_red = X_train.drop(columns=drop_feats)
X_test_red  = X_test.drop(columns=drop_feats)

# 4) Mean-импутация (для сравнения)
X_mean_train = X_train_red.fillna(X_train_red.mean())
X_mean_test  = X_test_red.fillna(X_train_red.mean())

# 5) Iterative Imputer
iter_imp = IterativeImputer(
    max_iter=10,
    random_state=0,
    initial_strategy='mean'
)
# обучаем и трансформируем
X_iter_train = pd.DataFrame(
    iter_imp.fit_transform(X_train_red),
    columns=X_train_red.columns,
    index=X_train_red.index
)
X_iter_test  = pd.DataFrame(
    iter_imp.transform(X_test_red),
    columns=X_test_red.columns,
    index=X_test_red.index
)

In [17]:
import itertools   # ← не забудьте!
from tqdm import tqdm

def sarimax_grid_search(y_train, X_train, y_val, X_val,
                        pdq_range, seasonal_pdq_range,
                        enforce_stationarity=False,
                        enforce_invertibility=False):
    results = []
    n_train = len(y_train)
    n_val   = len(y_val)

    for order in tqdm(pdq_range, desc="ARIMA orders"):
        for seasonal_order in seasonal_pdq_range:
            try:
                model = SARIMAX(
                    endog=y_train,
                    exog=X_train,
                    order=order,
                    seasonal_order=seasonal_order,
                    enforce_stationarity=enforce_stationarity,
                    enforce_invertibility=enforce_invertibility
                )
                res = model.fit(disp=False)

                # прогноз по позициям, а не по времени:
                start = n_train
                end   = n_train + n_val - 1
                y_pred = res.predict(start=start, end=end, exog=X_val)

                r2   = r2_score(y_val, y_pred)
                rmse = np.sqrt(mean_squared_error(y_val, y_pred))
                results.append({
                    'order': order,
                    'seasonal_order': seasonal_order,
                    'r2': r2,
                    'rmse': rmse
                })
            except Exception:
                continue

    return pd.DataFrame(results)

# 1) Задаём диапазоны гиперпараметров
p = [0, 1, 2]
d = [0, 1]
q = [0, 1, 2]
P = [0, 1]
D = [0, 1]
Q = [0, 1]
s = [24]  # сезонность 24 для почасовых данных

pdq_range = list(itertools.product(p, d, q))
seasonal_pdq_range = [(P_, D_, Q_, s_) for P_, D_, Q_, s_ in itertools.product(P, D, Q, s)]

# 2) Перебор для Mean-Imputation
df_mean_search = sarimax_grid_search(
    y_train, X_mean_train,
    y_test,  X_mean_test,
    pdq_range, seasonal_pdq_range
)
# Сортируем по RMSE и выводим топ-5
print("Mean imputation — топ 5 по RMSE")
print(df_mean_search.sort_values('rmse').head(5))


# 3) Перебор для Iterative-Imputation
df_iter_search = sarimax_grid_search(
    y_train, X_iter_train,
    y_test,  X_iter_test,
    pdq_range, seasonal_pdq_range
)
print("\nIterative imputation — топ 5 по RMSE")
print(df_iter_search.sort_values('rmse').head(5))


# 4) Если нужно, можно объединить результаты и сравнить лучшую комбинацию
best_mean = df_mean_search.sort_values('rmse').iloc[0]
best_iter = df_iter_search.sort_values('rmse').iloc[0]

print("\nЛучшие гиперпараметры:")
print(f"  Mean:      order={best_mean['order']}, seasonal_order={best_mean['seasonal_order']}, RMSE={best_mean['rmse']:.4f}, R2={best_mean['r2']:.4f}")
print(f"  Iterative: order={best_iter['order']}, seasonal_order={best_iter['seasonal_order']}, RMSE={best_iter['rmse']:.4f}, R2={best_iter['r2']:.4f}")

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
ARIMA orders: 100%|█████████████████████████| 18/18 [5:00:04<00:00, 1000.26s/it]


Mean imputation — топ 5 по RMSE
         order seasonal_order        r2         rmse
71   (1, 0, 2)  (1, 1, 1, 24)  0.789134  1350.038149
103  (2, 0, 0)  (1, 1, 1, 24)  0.785712  1360.950336
55   (1, 0, 0)  (1, 1, 1, 24)  0.783121  1369.152742
63   (1, 0, 1)  (1, 1, 1, 24)  0.782591  1370.824772
59   (1, 0, 1)  (0, 1, 1, 24)  0.779114  1381.742776


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  return get_prediction_index(
ARIMA orders: 100%|█████████████████████████| 18/18 [5:17:51<00:00, 1059.54s/it]


Iterative imputation — топ 5 по RMSE
         order seasonal_order        r2         rmse
71   (1, 0, 2)  (1, 1, 1, 24)  0.789134  1350.038149
103  (2, 0, 0)  (1, 1, 1, 24)  0.785712  1360.950336
55   (1, 0, 0)  (1, 1, 1, 24)  0.783121  1369.152742
63   (1, 0, 1)  (1, 1, 1, 24)  0.782591  1370.824772
59   (1, 0, 1)  (0, 1, 1, 24)  0.779114  1381.742776

Лучшие гиперпараметры:
  Mean:      order=(1, 0, 2), seasonal_order=(1, 1, 1, 24), RMSE=1350.0381, R2=0.7891
  Iterative: order=(1, 0, 2), seasonal_order=(1, 1, 1, 24), RMSE=1350.0381, R2=0.7891





In [None]:
import joblib
import numpy as np
from sklearn.metrics import r2_score, mean_squared_error
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Предполагаем, что у вас уже есть:
# - best_mean  = df_mean_search.sort_values('rmse').iloc[0]
# - best_iter  = df_iter_search.sort_values('rmse').iloc[0]
# - y_train, y_test, X_mean_train, X_mean_test, X_iter_train, X_iter_test

# 1) Выбираем лучший кандидат по RMSE
best = best_mean if best_mean['rmse'] <= best_iter['rmse'] else best_iter
best_type = 'Mean Imputation' if best is best_mean else 'Iterative Imputation'

order = tuple(best['order'])
seasonal_order = tuple(best['seasonal_order'])

print(f"\nTraining final SARIMAX with {best_type}:")
print(f"  order={order}, seasonal_order={seasonal_order}")

# 2) Готовим полный тренировочный набор и тестовый по лучшему типу импутации
if best_type == 'Mean Imputation':
    X_train_full, X_test_full = X_mean_train, X_mean_test
else:
    X_train_full, X_test_full = X_iter_train, X_iter_test

y_train_full, y_test_full = y_train, y_test

# 3) Обучаем финальную модель
model_final = SARIMAX(
    endog=y_train_full,
    exog=X_train_full,
    order=order,
    seasonal_order=seasonal_order,
    enforce_stationarity=False,
    enforce_invertibility=False
)
res_final = model_final.fit(disp=False)

# 4) Делаем прогноз на тесте по позициям (чтобы избежать проблем с индексом)
n_train = len(y_train_full)
n_test  = len(y_test_full)
start   = n_train
end     = n_train + n_test - 1

y_pred = res_final.predict(start=start, end=end, exog=X_test_full)

# 5) Считаем метрики и печатаем
r2   = r2_score(y_test_full, y_pred)
rmse = np.sqrt(mean_squared_error(y_test_full, y_pred))
print(f"\nFinal model metrics on test set:")
print(f"  R²:   {r2:.4f}")
print(f"  RMSE: {rmse:.4f}")

# 6) Сохраняем модель
joblib.dump(res_final, 'best_sarimax_model.joblib')
print("\nBest model saved to 'best_sarimax_model.joblib'")


Training final SARIMAX with Mean Imputation:
  order=(1, 0, 2), seasonal_order=(1, 1, 1, 24)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
