In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.nonparametric.smoothers_lowess import lowess
from catboost import CatBoostRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [None]:
train = pd.read_excel("train.xlsx")
test = pd.read_excel("test.xlsx")

In [None]:
result = seasonal_decompose(train['Цена на арматуру'], model='additive', period=52)
result.plot()
plt.show()

In [None]:
train['smoothed_price'] = lowess(train['Цена на арматуру'], np.arange(len(train)), frac=0.1)[:, 1]
test['smoothed_price'] = lowess(test['Цена на арматуру'], np.arange(len(test)), frac=0.1)[:, 1]


In [None]:
train['month'] = train['dt'].dt.month
test['month'] = test['dt'].dt.month
train['week_of_year'] = train['dt'].dt.isocalendar().week
test['week_of_year'] = test['dt'].dt.isocalendar().week

dt                  0
Цена на арматуру    0
dtype: int64
dt                  0
Цена на арматуру    0
dtype: int64


In [None]:
train['rolling_mean_4'] = train['Цена на арматуру'].rolling(window=4).mean()
test['rolling_mean_4'] = test['Цена на арматуру'].rolling(window=4).mean()

for lag in range(1, 4):
    train[f'lag_{lag}'] = train['Цена на арматуру'].shift(lag)
    test[f'lag_{lag}'] = test['Цена на арматуру'].shift(lag)

train.dropna(inplace=True)
test.dropna(inplace=True)

In [None]:
X = train[['lag_1', 'lag_2', 'lag_3', 'month', 'week_of_year', 'rolling_mean_4']]
y = train['Цена на арматуру']

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_test = test[['lag_1', 'lag_2', 'lag_3', 'month', 'week_of_year', 'rolling_mean_4']]
y_test = test['Цена на арматуру']

In [None]:
model = CatBoostRegressor(verbose=100)
model.fit(X_train, y_train, eval_set=(X_val, y_val), early_stopping_rounds=50)

In [None]:
predictions = model.predict(X_test)
test['predicted_price'] = predictions


In [None]:
plt.figure(figsize=(12, 6))
plt.plot(test['dt'], test['Цена на арматуру'], label='Фактические значения', color='blue')
plt.plot(test['dt'], test['predicted_price'], label='Прогнозируемые значения', color='red', linestyle='--')
plt.title('Фактические и прогнозируемые цены на арматуру')
plt.xlabel('Дата')
plt.ylabel('Цена')
plt.legend()
plt.grid()
plt.show()

In [None]:
def calculate_metrics(y_true, y_pred, model_name):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    print(f"Метрики для модели {model_name}:")
    print(f"MAE: {mae:.2f}")
    print(f"MSE: {mse:.2f}")
    print(f"RMSE: {rmse:.2f}")
    print(f"R²: {r2:.2f}")
    print("-" * 30)


In [None]:
calculate_metrics(y_test, predictions, "CatBoost")

In [None]:
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
lr_predictions = lr_model.predict(X_test)
calculate_metrics(y_test, lr_predictions, "Линейная регрессия")

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(test['dt'], y_test, label='Фактические значения', color='blue')
plt.plot(test['dt'], predictions, label='CatBoost Прогноз', color='red', linestyle='--')
plt.plot(test['dt'], lr_predictions, label='Линейная регрессия Прогноз', color='green', linestyle='-.')
plt.title('Сравнение моделей на тестовых данных')
plt.xlabel('Дата')
plt.ylabel('Цена')
plt.legend()
plt.grid()
plt.show()

plt.ylabel('Цена', fontsize=12)