In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import Ridge, Lasso, LinearRegression
from sklearn.linear_model import ElasticNet
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from xgboost import XGBClassifier, XGBRegressor
from sklearn.ensemble import RandomForestRegressor

In [3]:
X_train = pd.read_csv('./data/processed/X_train_ohe.csv', na_filter=False)
X_val = pd.read_csv('./data/processed/X_val_ohe.csv', na_filter=False)
X_test = pd.read_csv('./data/processed/X_test_ohe.csv', na_filter=False)

y_train = pd.read_csv('./data/processed/y_train_ohe.csv', na_filter=False)
y_val = pd.read_csv('./data/processed/y_val_ohe.csv', na_filter=False)
y_test = pd.read_csv('./data/processed/y_test_ohe.csv', na_filter=False)

In [4]:
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaler = scaler.transform(X_train)
X_val_scaler = scaler.transform(X_val)
X_test_scaler = scaler.transform(X_test)

## L1 регуляризация

In [5]:
lasso_model = Lasso()
lasso_model.fit(X_train_scaler, y_train)
print(mean_absolute_error(lasso_model.predict(X_val_scaler), y_val))

22829.418863135536


  positive)


Настроим параметр регуляризации

In [6]:
lasso_model = Lasso(alpha=77)
lasso_model.fit(X_train_scaler, y_train)
print(mean_absolute_error(lasso_model.predict(X_val_scaler), y_val))

22817.474633029353


In [7]:
print('Качество на тесте: ', mean_absolute_error(lasso_model.predict(X_test_scaler), y_test))

Качество на тесте:  22685.995494193452


In [17]:
import pickle
with open('linear_model.pickle', 'wb') as f:
    pickle.dump(lasso_model, f)

# L2 регуляризация

In [7]:
ridge_model = Ridge()
ridge_model.fit(X_train_scaler, y_train)
print(mean_absolute_error(ridge_model.predict(X_val_scaler), y_val))

22830.846491116776


Настроим параметр регуляризации

In [8]:
ridge_model = Ridge(alpha=8)
ridge_model.fit(X_train_scaler, y_train)
print(mean_absolute_error(ridge_model.predict(X_val_scaler), y_val))

22829.358576611365


In [9]:
print('Качество на тесте: ', mean_absolute_error(ridge_model.predict(X_test_scaler), y_test))

Качество на тесте:  22682.347082970748


## ElasticNet

In [10]:
e_model = ElasticNet()
e_model.fit(X_train_scaler, y_train)
print(mean_absolute_error(e_model.predict(X_val_scaler), y_val))

23644.867012977353


Настроим параметр регуляризации

In [11]:
e_model = ElasticNet(alpha=0.001, l1_ratio=0.7)
e_model.fit(X_train_scaler, y_train)
print(mean_absolute_error(e_model.predict(X_val_scaler), y_val))

22829.421455370433


  positive)


In [12]:
print('Качество на тесте: ', mean_absolute_error(e_model.predict(X_test_scaler), y_test))

Качество на тесте:  22682.47170120155


# XGBoost

In [13]:
xgb_model = XGBRegressor()
xgb_model.fit(X_train_scaler, y_train)
print(mean_absolute_error(xgb_model.predict(X_val_scaler), y_val))

22081.591375709002


Настроим параметры

In [14]:
xgb_model = XGBRegressor(n_estimators=500, max_depth=4, learning_rate=0.1, n_jobs=-1)
xgb_model.fit(X_train_scaler, y_train)
print(mean_absolute_error(xgb_model.predict(X_val_scaler), y_val))

21622.793629531683


In [15]:
print('Качество на тесте: ', mean_absolute_error(xgb_model.predict(X_test_scaler), y_test))

Качество на тесте:  21237.12340899344
