In [43]:
import warnings
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, Lasso, LassoCV, Ridge, RidgeCV, ElasticNet, ElasticNetCV
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split, GridSearchCV
warnings.filterwarnings('ignore')

# Подготовка данных

# Подготовим функцию для вывода на печать метрик модели:

In [44]:
from sklearn.tree import DecisionTreeRegressor
def print_model_metrics(estimator, y_train, y_pred):
    print(f"Коэффициент детерминации: {estimator.score(X,y)}")
    print(f'MSE: {mean_squared_error(y_train, y_pred)}')
    print(f'RMSE: {mean_squared_error(y_train, y_pred, squared=False)}')
    print(f'MAE: {mean_absolute_error(y_train, y_pred)}')

In [45]:
dataset = pd.read_csv('../data/forestfires_preprocessed.csv')
dataset.head()



Unnamed: 0,X,Y,FFMC,DMC,DC,ISI,temp,RH,wind,rain,...,month_nov,month_oct,month_sep,day_fri,day_mon,day_sat,day_sun,day_thu,day_tue,day_wed
0,7,5,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,...,0,0,0,1,0,0,0,0,0,0
1,7,4,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,...,0,1,0,0,0,0,0,0,1,0
2,7,4,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,...,0,1,0,0,0,1,0,0,0,0
3,8,6,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,...,0,0,0,1,0,0,0,0,0,0
4,8,6,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,...,0,0,0,0,0,0,1,0,0,0


# Выделим вектор исследуемого признака и матрицу объект-признак

In [46]:
y = dataset["DC"]
y

0       94.3
1      669.1
2      686.9
3       77.5
4      102.2
       ...  
512    665.6
513    665.6
514    665.6
515    614.7
516    106.7
Name: DC, Length: 517, dtype: float64

In [47]:
X = dataset[dataset.columns[1:]]
X

Unnamed: 0,Y,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,...,month_nov,month_oct,month_sep,day_fri,day_mon,day_sat,day_sun,day_thu,day_tue,day_wed
0,5,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.00,...,0,0,0,1,0,0,0,0,0,0
1,4,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.00,...,0,1,0,0,0,0,0,0,1,0
2,4,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.00,...,0,1,0,0,0,1,0,0,0,0
3,6,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.00,...,0,0,0,1,0,0,0,0,0,0
4,6,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.00,...,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
512,3,81.6,56.7,665.6,1.9,27.8,32,2.7,0.0,6.44,...,0,0,0,0,0,0,1,0,0,0
513,4,81.6,56.7,665.6,1.9,21.9,71,5.8,0.0,54.29,...,0,0,0,0,0,0,1,0,0,0
514,4,81.6,56.7,665.6,1.9,21.2,70,6.7,0.0,11.16,...,0,0,0,0,0,0,1,0,0,0
515,4,94.4,146.0,614.7,11.3,25.6,42,4.0,0.0,0.00,...,0,0,0,0,0,1,0,0,0,0


In [48]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=0)

# Модель LinearRegression
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html

In [49]:
lr = LinearRegression().fit(X_train, y_train)
print_model_metrics(lr, y_train, lr.predict(X_train))

Коэффициент детерминации: 1.0
MSE: 1.5915200299051479e-25
RMSE: 3.989385955137893e-13
MAE: 3.245857139128706e-13


# Модель DecisionTreeRegressor (+ гиперпараметры)
https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html

In [50]:
parameters = {'max_depth': np.arange(1,15,1),
              'max_features': np.arange(5,X.shape[1]-1,1)}

In [51]:
model = GridSearchCV(DecisionTreeRegressor(random_state=0), parameters).fit(X_train, y_train)
model.best_params_

{'max_depth': 11, 'max_features': 25}

In [52]:
print_model_metrics(model, y_train, model.predict(X_train))

Коэффициент детерминации: 0.9998937631216844
MSE: 0.0008243166287019308
RMSE: 0.028710914800854585
MAE: 0.005723234624153262


# Модель Lasso (+ гиперпараметры)
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html#sklearn.linear_model.Lasso

In [53]:
parameters = {'alpha': np.arange(0.1,2,0.05)}

In [54]:
model = GridSearchCV(Lasso(random_state=0), parameters).fit(X_train, y_train)
model.best_params_

{'alpha': 0.1}

In [55]:
print_model_metrics(model, y_train, model.predict(X_train))

Коэффициент детерминации: 0.9999999999974093
MSE: 1.6091148907572194e-07
RMSE: 0.000401137743269967
MAE: 0.00033314595158356365


# Модель LassoCV
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoCV.html

In [56]:
lasso_cv = LassoCV(cv=5, random_state=0).fit(X_train, y_train)
print_model_metrics(lasso_cv, y_train, lasso_cv.predict(X_train))

Коэффициент детерминации: 0.9999989994438212
MSE: 0.06214596644850607
RMSE: 0.24929092732890634
MAE: 0.2070367712911528


# Модель Ridge (+ гиперпараметры)
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html#sklearn.linear_model.Ridge

In [57]:
parameters = {'alpha': np.arange(0.1,2,0.05)}
model = GridSearchCV(Ridge(random_state=0), parameters).fit(X_train, y_train)
model.best_params_

{'alpha': 0.1}

In [58]:
print_model_metrics(model, y_train, model.predict(X_train))

Коэффициент детерминации: 0.9999999999999997
MSE: 1.982668177839729e-11
RMSE: 4.452716224777556e-06
MAE: 3.2054933897036054e-06


# Модель RidgeCV
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RidgeCV.html#sklearn.linear_model.RidgeCV

In [59]:
ridge_cv = RidgeCV(cv=5).fit(X_train, y_train)
print_model_metrics(ridge_cv, y_train,ridge_cv.predict(X_train))

Коэффициент детерминации: 0.9999999999999997
MSE: 1.982668177839729e-11
RMSE: 4.452716224777556e-06
MAE: 3.2054933897036054e-06


# Модель ElasticNet (+ гиперпараметры)
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html

In [60]:
parameters = {'alpha': np.arange(0.1,2,0.05), 'l1_ratio': np.arange(0.1, 1.1, 0.1)}
model = GridSearchCV(ElasticNet(random_state=0), parameters).fit(X_train, y_train)
model.best_params_

{'alpha': 0.20000000000000004, 'l1_ratio': 0.7000000000000001}

In [61]:
print_model_metrics(model, y_train, model.predict(X_train))

Коэффициент детерминации: 0.9999999935375274
MSE: 0.00039427800391764585
RMSE: 0.019856434823946768
MAE: 0.014930812798865713


# Модель ElasticNetCV
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNetCV.html#sklearn.linear_model.ElasticNetCV

In [62]:
elastic_net_cv = ElasticNetCV(cv=5, random_state=0).fit(X_train, y_train)
print_model_metrics(elastic_net_cv, y_train,elastic_net_cv.predict(X_train))

Коэффициент детерминации: 0.9999960057677434
MSE: 0.24808744282092637
RMSE: 0.4980837708869125
MAE: 0.41365988269961185
