In [3]:
import warnings
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, Lasso, LassoCV, Ridge, RidgeCV, ElasticNet, ElasticNetCV
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split, GridSearchCV
warnings.filterwarnings('ignore')

# Подготовим функцию для вывода на печать метрик модели:

In [4]:
from sklearn.tree import DecisionTreeRegressor
def print_model_metrics(estimator, y_train, y_pred):
    print(f"Коэффициент детерминации: {estimator.score(X,y)}")
    print(f'MSE: {mean_squared_error(y_train, y_pred)}')
    print(f'RMSE: {mean_squared_error(y_train, y_pred, squared=False)}')
    print(f'MAE: {mean_absolute_error(y_train, y_pred)}')


In [5]:
phone_df = pd.read_csv("../data/Smartphone_chosse_preprocessed.csv")
phone_df



Unnamed: 0.1,Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,0,842,0,2.2,0,1,0,7,0.6,188,...,20,756,2549,9,7,19,0,0,1,1
1,1,1021,1,0.5,1,0,1,53,0.7,136,...,905,1988,2631,17,3,7,1,1,0,2
2,2,563,1,0.5,1,2,1,41,0.9,145,...,1263,1716,2603,11,2,9,1,1,0,2
3,3,615,1,2.5,0,0,0,10,0.8,131,...,1216,1786,2769,16,8,11,1,0,0,2
4,4,1821,1,1.2,0,13,1,44,0.6,141,...,1208,1212,1411,8,2,15,1,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,1995,794,1,0.5,1,0,1,2,0.8,106,...,1222,1890,668,13,4,19,1,1,0,0
1996,1996,1965,1,2.6,1,0,0,39,0.2,187,...,915,1965,2032,11,10,16,1,1,1,2
1997,1997,1911,0,0.9,1,1,1,36,0.7,108,...,868,1632,3057,9,1,5,1,1,0,3
1998,1998,1512,0,0.9,0,4,1,46,0.1,145,...,336,670,869,18,10,19,1,1,1,0


# Выделим вектор исследуемого признака и матрицу объект-признак

In [6]:
y = phone_df["price_range"]
y

0       1
1       2
2       2
3       2
4       1
       ..
1995    0
1996    2
1997    3
1998    0
1999    3
Name: price_range, Length: 2000, dtype: int64

In [7]:
X = phone_df[phone_df.columns[1:]]
X

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,1,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,2,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,4,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,5,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,1,...,1208,1212,1411,8,2,15,1,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,794,1,0.5,1,0,1,2,0.8,106,5,...,1222,1890,668,13,4,19,1,1,0,0
1996,1965,1,2.6,1,0,0,39,0.2,187,3,...,915,1965,2032,11,10,16,1,1,1,2
1997,1911,0,0.9,1,1,1,36,0.7,108,7,...,868,1632,3057,9,1,5,1,1,0,3
1998,1512,0,0.9,0,4,1,46,0.1,145,4,...,336,670,869,18,10,19,1,1,1,0


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=0)

# Модель LinearRegression
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html

In [9]:
lr = LinearRegression().fit(X_train, y_train)
print_model_metrics(lr, y_train, lr.predict(X_train))

Коэффициент детерминации: 1.0
MSE: 1.934055717319691e-27
RMSE: 4.3977900328684303e-14
MAE: 3.73688012320859e-14


# Модель DecisionTreeRegressor (+ гиперпараметры)
https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html

In [10]:
parameters = {'max_depth': np.arange(1,15,1),
              'max_features': np.arange(5,X.shape[1]-1,1)}

In [11]:
model = GridSearchCV(DecisionTreeRegressor(random_state=0), parameters).fit(X_train, y_train)
model.best_params_

{'max_depth': 2, 'max_features': 9}

In [12]:
print_model_metrics(model, y_train, model.predict(X_train))

Коэффициент детерминации: 1.0
MSE: 0.0
RMSE: 0.0
MAE: 0.0


# Модель Lasso (+ гиперпараметры)
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html#sklearn.linear_model.Lasso

In [13]:
parameters = {'alpha': np.arange(0.1,2,0.05)}

In [14]:
model = GridSearchCV(Lasso(random_state=0), parameters).fit(X_train, y_train)
model.best_params_

{'alpha': 0.1}

In [15]:
print_model_metrics(model, y_train, model.predict(X_train))

Коэффициент детерминации: 0.923942755087263
MSE: 0.09606101899581318
RMSE: 0.3099371210355629
MAE: 0.25830039477157724


# Модель LassoCV
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoCV.html

In [16]:
lasso_cv = LassoCV(cv=5, random_state=0).fit(X_train, y_train)
print_model_metrics(lasso_cv, y_train, lasso_cv.predict(X_train))

Коэффициент детерминации: 0.9171578090874551
MSE: 0.10457202104614784
RMSE: 0.3233759747509821
MAE: 0.26883121167748514


# Модель Ridge (+ гиперпараметры)
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html#sklearn.linear_model.Ridge

In [17]:
parameters = {'alpha': np.arange(0.1,2,0.05)}
model = GridSearchCV(Ridge(random_state=0), parameters).fit(X_train, y_train)
model.best_params_

{'alpha': 0.1}

In [18]:
print_model_metrics(model, y_train, model.predict(X_train))

Коэффициент детерминации: 0.9999999733328597
MSE: 3.3623107864550846e-08
RMSE: 0.00018336604883279468
MAE: 0.00015268590275154678


# Модель RidgeCV
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RidgeCV.html#sklearn.linear_model.RidgeCV

In [19]:
ridge_cv = RidgeCV(cv=5).fit(X_train, y_train)
print_model_metrics(ridge_cv, y_train,ridge_cv.predict(X_train))

Коэффициент детерминации: 0.9999999733328597
MSE: 3.3623107864550846e-08
RMSE: 0.00018336604883279468
MAE: 0.00015268590275154678


# Модель ElasticNet (+ гиперпараметры)
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html

In [20]:
parameters = {'alpha': np.arange(0.1,2,0.05), 'l1_ratio': np.arange(0.1, 1.1, 0.1)}
model = GridSearchCV(ElasticNet(random_state=0), parameters).fit(X_train, y_train)
model.best_params_

{'alpha': 0.1, 'l1_ratio': 0.1}

In [21]:
print_model_metrics(model, y_train, model.predict(X_train))

Коэффициент детерминации: 0.9781202306812166
MSE: 0.027634318972800664
RMSE: 0.16623573314062373
MAE: 0.1385898863936613


# Модель ElasticNetCV
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNetCV.html#sklearn.linear_model.ElasticNetCV

In [22]:
elastic_net_cv = ElasticNetCV(cv=5, random_state=0).fit(X_train, y_train)
print_model_metrics(elastic_net_cv, y_train,elastic_net_cv.predict(X_train))

Коэффициент детерминации: 0.9171578066641899
MSE: 0.10457203393369699
RMSE: 0.3233759946775533
MAE: 0.26883123310072826
