In [11]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
from sklearn.metrics import *
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR
from sklearn.svm import LinearSVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [2]:
from sklearn.datasets import fetch_california_housing
california = fetch_california_housing()

In [3]:
data = pd.DataFrame(california.data, columns = california.feature_names)
data['Price'] = california.target
data.head(3)

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,Price
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521


In [4]:
y = data['Price']
X = data.drop('Price', axis=1)
y.shape, X.shape

((20640,), (20640, 8))

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((15480, 8), (5160, 8), (15480,), (5160,))

In [6]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, y_train)

In [7]:
_ = [print(k, v) for k, v in zip(X.columns, model.coef_)]

MedInc 0.44760006851694656
HouseAge 0.009567525956130706
AveRooms -0.12475595615841213
AveBedrms 0.7944712539835435
Population -1.4390259614454476e-06
AveOccup -0.0034430799260360325
Latitude -0.41855525668859733
Longitude -0.4334051354775118


In [17]:
y_pred = model.predict(X_test)
look_pred = pd.DataFrame({'y_test':y_test, 'y_pred':y_pred})
look_pred.head(3)

Unnamed: 0,y_test,y_pred
20046,0.477,0.724128
3024,0.458,1.766778
15663,5.00001,2.711516


Какую еще информацию можно вывести для обученной модели? Попробуйте изменить аргументы при создании модели и посмотрите, как это влияет на качество предсказания.

In [18]:
poly_train = PolynomialFeatures(2).fit_transform(X_train)
poly_test = PolynomialFeatures(2).fit_transform(X_test)

In [20]:
polynomial = LinearRegression()
polynomial.fit(poly_train, y_train)
y_pred_poly = polynomial.predict(poly_test)
r2_score(y_test.tolist(), y_pred_poly)

0.6563005894194118

In [21]:
print(f'\n {polynomial.coef_} \n {polynomial.rank_} \n {polynomial.singular_} \n {polynomial.intercept_} \n {polynomial.n_features_in_}')


 [-4.07904609e-08 -1.18149889e+01 -8.49101202e-01  7.93341507e+00
 -3.90263704e+01 -4.90165007e-04  1.03727716e+00  8.27990514e+00
  5.74137596e+00 -3.05936096e-02  1.59801803e-03  3.86922839e-02
 -1.38726408e-01  5.11408705e-05 -3.84501146e-03 -1.61733233e-01
 -1.51724214e-01  2.11029781e-04 -5.98080380e-04  1.09081916e-02
  2.56573553e-06 -1.97181492e-03 -1.03455043e-02 -1.00661914e-02
  1.16865965e-02 -1.16437968e-01 -5.51721919e-05  2.13589483e-02
  1.06985141e-01  1.00602280e-01  2.88810788e-01  4.88926266e-04
 -8.65448374e-02 -5.40042730e-01 -4.96189958e-01  1.42020440e-09
  2.44088253e-05  1.05046721e-05  3.64069361e-06  5.25946150e-05
  2.03788796e-02  1.58781028e-02  5.80472939e-02  1.01661358e-01
  3.81657320e-02] 
 44 
 [1.80738364e+09 1.26545939e+07 1.02765755e+07 2.40189404e+06
 4.35275741e+05 3.31771380e+05 3.17720985e+05 1.68920211e+05
 1.32741728e+05 4.80305525e+04 3.81804977e+04 3.33632869e+04
 2.02410750e+04 1.97787906e+04 1.72696657e+04 1.06897549e+04
 8.79698094e+0

Попробуйте применить к той же задаче другие модели регрессии. Для каждой из них выведите визуализацию регрессии и оценку точности.

In [22]:
def get_metrics(y_test, y_pred):
    print(f'MSE: {mean_squared_error(y_test, y_pred)}')
    print(f'MAPE: {mean_absolute_percentage_error(y_test, y_pred)}')
    print(f'R2: {r2_score(y_test, y_pred)}')
regr_a = make_pipeline(PolynomialFeatures(degree=2), StandardScaler(), LinearSVR(C=0.8, epsilon=0.2, max_iter=10000, random_state=42))
regr_a.fit(X_train, y_train)
y_pred = regr_a.predict(X_test)
get_metrics(y_test, y_pred)

MSE: 0.46319029355254887
MAPE: 0.2640193121484273
R2: 0.6499516656908483


In [23]:
regr_b = make_pipeline(StandardScaler(), SVR(kernel='poly', degree=2, coef0 = 0.5))
regr_b.fit(X_train, y_train)
y_pred = regr_b.predict(X_test)
get_metrics(y_test, y_pred)

MSE: 0.4727480300435908
MAPE: 0.24675266633144724
R2: 0.6427285658439266


In [None]:
regr_c = make_pipeline(PolynomialFeatures(degree=2), StandardScaler(), SVR(kernel='rbf', C=100, gamma=0.1))
regr_c.fit(X_train, y_train)
y_pred = regr_c.predict(X_test)
get_metrics(y_test, y_pred)