In [2]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Загрузка данных
data = pd.read_csv("../data/processed/train.csv")
data.columns = data.columns.str.strip()

required_cols = ['area', 'total_floors', 'price', 'rooms_1', 'rooms_2', 'rooms_3', 'first_floor', 'last_floor']
missing_cols = [col for col in required_cols if col not in data.columns]
if missing_cols:
    raise KeyError(f"Отсутствуют необходимые столбцы: {missing_cols}")

X = data[['area', 'total_floors', 'rooms_1', 'rooms_2', 'rooms_3', 'first_floor', 'last_floor']].astype(float)
y = data['price']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

model = DecisionTreeRegressor(max_depth=5)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
mae = np.mean(np.abs(y_test - y_pred))

print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R²: {r2:.6f}")
print(f"MAE: {mae:.2f} рублей")

# Создаём директорию для модели, если её нет
os.makedirs('../models', exist_ok=True)
model_path = '../models/decision_tree_reg_1.pkl'
joblib.dump(model, model_path)
print(f"Модель сохранена в файл {model_path}")


MSE: 44843804165715.62
RMSE: 6696551.66
R²: 0.825887
MAE: 3307156.39 рублей
Модель сохранена в файл ../models/decision_tree_reg_1.pkl
