In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.datasets import load_boston
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split

In [2]:
boston = load_boston()
data_boston = pd.DataFrame(boston.data, columns=boston.feature_names)
data_boston['PRICE'] = boston.target
           
lr_multi = LinearRegression()

x_column_list_for_multi = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']
y_column_list_for_multi = ['PRICE']

lr_multi.fit(data_boston[x_column_list_for_multi], data_boston[y_column_list_for_multi])

print(lr_multi.coef_)
print(lr_multi.intercept_)

[[-1.08011358e-01  4.64204584e-02  2.05586264e-02  2.68673382e+00
  -1.77666112e+01  3.80986521e+00  6.92224640e-04 -1.47556685e+00
   3.06049479e-01 -1.23345939e-02 -9.52747232e-01  9.31168327e-03
  -5.24758378e-01]]
[36.45948839]


In [7]:
X_train, X_test, y_train, y_test = train_test_split(data_boston[x_column_list_for_multi], data_boston[y_column_list_for_multi], test_size=0.3)
lr_multi2 = LinearRegression()

lr_multi2.fit(X_train, y_train)
print(lr_multi2.coef_)
print(lr_multi2.intercept_)

y_pred = lr_multi2.predict(X_test)

# 残差
print(y_pred - y_test)

# MAE
print(mean_absolute_error(y_pred, y_test))

[[-1.07701204e-01  4.70016795e-02 -2.98818105e-02  2.70602033e+00
  -1.81983080e+01  3.82473373e+00 -1.68864644e-02 -1.65866057e+00
   2.90633195e-01 -1.26239339e-02 -9.38922050e-01  9.31514949e-03
  -4.23365245e-01]]
[37.73410466]
        PRICE
171  4.387537
87   3.948650
473 -4.771905
347  1.880963
251  0.058942
..        ...
267 -9.038326
80   0.445640
305  2.797473
303 -0.200050
467 -2.198942

[152 rows x 1 columns]
3.3663335721361207


In [17]:
lasso = Lasso(alpha=0.001, normalize=True)
lasso.fit(X_train, y_train)
print(lasso.coef_)
print(lasso.intercept_)

[-1.01711306e-01  4.31735959e-02 -3.64752333e-02  2.69003031e+00
 -1.74070886e+01  3.85859302e+00 -1.56205422e-02 -1.57622409e+00
  2.58387178e-01 -1.11048516e-02 -9.30669857e-01  9.10146919e-03
 -4.24417413e-01]
[36.39696819]


In [18]:
y_pred_lasso = lasso.predict(X_test)

# 残差
print(y_pred_lasso.reshape(-1, 1) - y_test)

# MAE
print(mean_absolute_error(y_pred_lasso, y_test))

        PRICE
171  4.418253
87   3.863515
473 -4.871853
347  1.951316
251  0.129102
..        ...
267 -9.197320
80   0.397752
305  2.438699
303 -0.334394
467 -2.323282

[152 rows x 1 columns]
3.344867230720637


In [19]:
ridge = Ridge(alpha=0.01, normalize=True)
ridge.fit(X_train, y_train)
print(ridge.coef_)
print(ridge.intercept_)

[[-1.02940649e-01  4.30487221e-02 -4.63885086e-02  2.76548423e+00
  -1.69759719e+01  3.86759922e+00 -1.70042724e-02 -1.57190554e+00
   2.53584548e-01 -1.07546144e-02 -9.23025348e-01  9.23515387e-03
  -4.17921896e-01]]
[35.92187799]


In [21]:
y_pred_ridge = ridge.predict(X_test)

# 残差
print(y_pred_ridge.reshape(-1, 1) - y_test)

# MAE
print(mean_absolute_error(y_pred_ridge, y_test))

        PRICE
171  4.289865
87   3.851285
473 -4.888878
347  2.000946
251  0.158028
..        ...
267 -9.207048
80   0.404392
305  2.430425
303 -0.336997
467 -2.342008

[152 rows x 1 columns]
3.350227427345092
