In [1]:
import numpy as np
import pandas as pd

In [38]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# === ЗАГРУЗКА ДАННЫХ ===
df_train = pd.read_excel("train.xlsx", engine="openpyxl")
df_test = pd.read_excel("test.xlsx", engine="openpyxl")

# === ПОДГОТОВКА ВРЕМЕННОГО РЯДА ===
ts = df_train.set_index("dt")["Цена на арматуру"]
ts.index = pd.to_datetime(ts.index)
ts = ts.asfreq("W-MON")  # Указываем: еженедельные данные (по понедельникам)

# === ОБУЧЕНИЕ SARIMA-МОДЕЛИ ===
model = SARIMAX(ts,
                order=(1, 1, 1),
                seasonal_order=(1, 1, 1, 52),
                enforce_stationarity=False,
                enforce_invertibility=False)

sarima_result = model.fit(disp=False)

# === ПРОГНОЗ НА ПЕРИОД ТЕСТА ===
n_periods = len(df_test)
forecast_obj = sarima_result.get_forecast(steps=n_periods)
forecast_mean = forecast_obj.predicted_mean

# === ПРИВЯЗКА ДАТ И СОПОСТАВЛЕНИЕ С ФАКТОМ ===
df_test["dt"] = pd.to_datetime(df_test["dt"])
df_test = df_test.set_index("dt")
df_test["SARIMA_Прогноз"] = forecast_mean

# === ОЦЕНКА КАЧЕСТВА ===
y_true = df_test["Цена на арматуру"]
y_pred = df_test["SARIMA_Прогноз"]

mae = mean_absolute_error(y_true, y_pred)
rmse = mean_squared_error(y_true, y_pred, squared=False)
r2 = r2_score(y_true, y_pred)

# === ВЫВОД РЕЗУЛЬТАТОВ ===
print("📊 Метрики качества модели SARIMA:")
print(f"MAE:  {mae:.2f} ₽")
print(f"RMSE: {rmse:.2f} ₽")
print(f"R²:   {r2:.4f}")

📊 Метрики качества модели SARIMA:
MAE:  5200.16 ₽
RMSE: 6244.77 ₽
R²:   -2.0423




In [34]:
mae,rmse,r2

(5200.161099904093, 6244.767309466797, -2.042320707093102)

In [40]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# === ЗАГРУЗКА ДАННЫХ ===
df_train = pd.read_excel("train.xlsx", engine="openpyxl")
df_test = pd.read_excel("test.xlsx", engine="openpyxl")

# === ОБЪЕДИНЕНИЕ ДЛЯ ГЕНЕРАЦИИ ПРИЗНАКОВ ===
df_train["is_train"] = 1
df_test["is_train"] = 0
df_all = pd.concat([df_train, df_test], ignore_index=True)
df_all["dt"] = pd.to_datetime(df_all["dt"])
df_all = df_all.sort_values("dt")

# === ВРЕМЕННЫЕ ПРИЗНАКИ ===
df_all["week"] = df_all["dt"].dt.isocalendar().week
df_all["month"] = df_all["dt"].dt.month
df_all["quarter"] = df_all["dt"].dt.quarter
df_all["year"] = df_all["dt"].dt.year

# === ЛАГИ И СКОЛЬЗЯЩИЕ СРЕДНИЕ ===
for lag in [1, 2, 4, 12]:
    df_all[f"lag_{lag}"] = df_all["Цена на арматуру"].shift(lag)
    df_all[f"rolling_mean_{lag}"] = df_all["Цена на арматуру"].shift(1).rolling(window=lag).mean()

# === УДАЛЕНИЕ NAN ===
df_all = df_all.dropna().reset_index(drop=True)

# === РАЗДЕЛЕНИЕ ОБРАТНО ===
df_train_feat = df_all[df_all["is_train"] == 1].copy()
df_test_feat = df_all[df_all["is_train"] == 0].copy()

# === ПРИЗНАКИ И ЦЕЛЬ ===
features = [col for col in df_train_feat.columns if col not in ["dt", "Цена на арматуру", "is_train"]]
target = "Цена на арматуру"
X_train = df_train_feat[features]
y_train = df_train_feat[target]
X_test = df_test_feat[features]

# === МАСШТАБИРОВАНИЕ ===
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# === ОБУЧЕНИЕ МОДЕЛИ ===
model = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42)
model.fit(X_train_scaled, y_train)

# === ПРОГНОЗ ===
predictions = model.predict(X_test_scaled)

# === СОХРАНЕНИЕ И ОЦЕНКА ===
df_test_feat["RF_Прогноз"] = predictions
df_test_feat = df_test_feat.set_index("dt")

y_true = df_test_feat["Цена на арматуру"]
y_pred = df_test_feat["RF_Прогноз"]

mae = mean_absolute_error(y_true, y_pred)
rmse = mean_squared_error(y_true, y_pred, squared=False)
r2 = r2_score(y_true, y_pred)

# === ВЫВОД ===
print("📊 Метрики Random Forest:")
print(f"MAE:  {mae:.2f} ₽")
print(f"RMSE: {rmse:.2f} ₽")
print(f"R²:   {r2:.4f}")


📊 Метрики Random Forest:
MAE:  1221.16 ₽
RMSE: 1655.87 ₽
R²:   0.7861


