In [1]:
# data analysis and wrangling
import pandas as pd
import numpy as np

# visualization
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# machine learning
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split

# data load
from sklearn.datasets import load_boston
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import PolynomialFeatures



In [2]:
boston_dataset = load_boston()

In [3]:
boston_dataset.keys()

dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename'])

In [4]:
boston_data_df = pd.DataFrame(boston_dataset.data, columns=boston_dataset.feature_names)

In [5]:
boston_data_df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [6]:
boston_data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 13 columns):
CRIM       506 non-null float64
ZN         506 non-null float64
INDUS      506 non-null float64
CHAS       506 non-null float64
NOX        506 non-null float64
RM         506 non-null float64
AGE        506 non-null float64
DIS        506 non-null float64
RAD        506 non-null float64
TAX        506 non-null float64
PTRATIO    506 non-null float64
B          506 non-null float64
LSTAT      506 non-null float64
dtypes: float64(13)
memory usage: 51.5 KB


In [7]:
boston_data_df.describe()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
count,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0
mean,3.613524,11.363636,11.136779,0.06917,0.554695,6.284634,68.574901,3.795043,9.549407,408.237154,18.455534,356.674032,12.653063
std,8.601545,23.322453,6.860353,0.253994,0.115878,0.702617,28.148861,2.10571,8.707259,168.537116,2.164946,91.294864,7.141062
min,0.00632,0.0,0.46,0.0,0.385,3.561,2.9,1.1296,1.0,187.0,12.6,0.32,1.73
25%,0.082045,0.0,5.19,0.0,0.449,5.8855,45.025,2.100175,4.0,279.0,17.4,375.3775,6.95
50%,0.25651,0.0,9.69,0.0,0.538,6.2085,77.5,3.20745,5.0,330.0,19.05,391.44,11.36
75%,3.677083,12.5,18.1,0.0,0.624,6.6235,94.075,5.188425,24.0,666.0,20.2,396.225,16.955
max,88.9762,100.0,27.74,1.0,0.871,8.78,100.0,12.1265,24.0,711.0,22.0,396.9,37.97


## BostonのデータをTrainとTestにSplit、その後スケール変換(MinMaxScaler)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(boston_dataset.data, boston_dataset.target, random_state=0)

In [9]:
X_train.min(axis=0)

array([6.3200e-03, 0.0000e+00, 4.6000e-01, 0.0000e+00, 3.8500e-01,
       3.5610e+00, 2.9000e+00, 1.1742e+00, 1.0000e+00, 1.8700e+02,
       1.2600e+01, 3.2000e-01, 1.7300e+00])

In [10]:
X_train.max(axis=0)

array([ 88.9762, 100.    ,  27.74  ,   1.    ,   0.871 ,   8.78  ,
       100.    ,  12.1265,  24.    , 711.    ,  22.    , 396.9   ,
        36.98  ])

In [11]:
scaler = MinMaxScaler()

In [12]:
scaler.fit(X_train)

MinMaxScaler(copy=True, feature_range=(0, 1))

In [13]:
X_train_scaled = scaler.transform(X_train)

In [14]:
X_train_scaled.min(axis=0)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [15]:
X_train_scaled.max(axis=0)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [16]:
X_test_scaled = scaler.transform(X_test)

In [17]:
X_test_scaled.min(axis=0)

array([ 8.18254447e-05,  0.00000000e+00,  1.02639296e-02,  0.00000000e+00,
        1.44032922e-02,  5.78654915e-02,  5.66426365e-02, -4.07220401e-03,
        0.00000000e+00,  1.90839695e-03,  0.00000000e+00,  5.74915528e-03,
        5.39007092e-03])

In [18]:
X_test_scaled.max(axis=0)

array([0.82643452, 0.9       , 0.9233871 , 1.        , 1.        ,
       0.98946158, 1.        , 0.87069383, 1.        , 0.91412214,
       0.91489362, 1.        , 1.02808511])

In [19]:
X_train.shape

(379, 13)

In [20]:
X_train_scaled.shape

(379, 13)

In [21]:
X_test.shape

(127, 13)

In [22]:
X_test_scaled.shape

(127, 13)

## SplitしたBostonのデータをもとに多項式特徴量を導入

In [23]:
poly = PolynomialFeatures(degree=2)

In [24]:
poly.fit(X_train_scaled)

PolynomialFeatures(degree=2, include_bias=True, interaction_only=False)

In [25]:
X_train_poly = poly.transform(X_train_scaled)

In [26]:
X_test_poly = poly.transform(X_test_scaled)

In [27]:
X_train_poly.shape

(379, 105)

In [28]:
X_test_poly.shape

(127, 105)

In [29]:
poly.get_feature_names()

['1',
 'x0',
 'x1',
 'x2',
 'x3',
 'x4',
 'x5',
 'x6',
 'x7',
 'x8',
 'x9',
 'x10',
 'x11',
 'x12',
 'x0^2',
 'x0 x1',
 'x0 x2',
 'x0 x3',
 'x0 x4',
 'x0 x5',
 'x0 x6',
 'x0 x7',
 'x0 x8',
 'x0 x9',
 'x0 x10',
 'x0 x11',
 'x0 x12',
 'x1^2',
 'x1 x2',
 'x1 x3',
 'x1 x4',
 'x1 x5',
 'x1 x6',
 'x1 x7',
 'x1 x8',
 'x1 x9',
 'x1 x10',
 'x1 x11',
 'x1 x12',
 'x2^2',
 'x2 x3',
 'x2 x4',
 'x2 x5',
 'x2 x6',
 'x2 x7',
 'x2 x8',
 'x2 x9',
 'x2 x10',
 'x2 x11',
 'x2 x12',
 'x3^2',
 'x3 x4',
 'x3 x5',
 'x3 x6',
 'x3 x7',
 'x3 x8',
 'x3 x9',
 'x3 x10',
 'x3 x11',
 'x3 x12',
 'x4^2',
 'x4 x5',
 'x4 x6',
 'x4 x7',
 'x4 x8',
 'x4 x9',
 'x4 x10',
 'x4 x11',
 'x4 x12',
 'x5^2',
 'x5 x6',
 'x5 x7',
 'x5 x8',
 'x5 x9',
 'x5 x10',
 'x5 x11',
 'x5 x12',
 'x6^2',
 'x6 x7',
 'x6 x8',
 'x6 x9',
 'x6 x10',
 'x6 x11',
 'x6 x12',
 'x7^2',
 'x7 x8',
 'x7 x9',
 'x7 x10',
 'x7 x11',
 'x7 x12',
 'x8^2',
 'x8 x9',
 'x8 x10',
 'x8 x11',
 'x8 x12',
 'x9^2',
 'x9 x10',
 'x9 x11',
 'x9 x12',
 'x10^2',
 'x10 x11',
 'x10 x1

In [30]:
X_train_poly_df = pd.DataFrame(X_train_poly, columns=poly.get_feature_names())

In [31]:
X_train_poly_df.head()

Unnamed: 0,1,x0,x1,x2,x3,x4,x5,x6,x7,x8,...,x9^2,x9 x10,x9 x11,x9 x12,x10^2,x10 x11,x10 x12,x11^2,x11 x12,x12^2
0,1.0,0.002079,0.22,0.197947,0.0,0.09465,0.391646,0.6931,0.619112,0.26087,...,0.074475,0.188708,0.267554,0.129521,0.478158,0.677941,0.328188,0.961199,0.465311,0.225255
1,1.0,0.00109,0.25,0.171188,0.0,0.139918,0.453344,0.456231,0.525716,0.304348,...,0.034267,0.139821,0.185115,0.039334,0.570507,0.755319,0.160492,1.0,0.212482,0.045149
2,1.0,0.001106,0.4,0.218109,1.0,0.127572,0.710098,0.474768,0.329885,0.130435,...,0.016349,0.068012,0.125396,0.01567,0.282933,0.521654,0.065188,0.961792,0.120189,0.015019
3,1.0,0.097903,0.0,0.646628,0.0,0.633745,0.557578,0.987642,0.050355,1.0,...,0.835619,0.739077,0.902781,0.399102,0.653689,0.79848,0.352992,0.975342,0.431179,0.190616
4,1.0,0.121703,0.0,0.646628,0.0,0.604938,0.617168,0.905252,0.058919,1.0,...,0.835619,0.739077,0.048982,0.623937,0.653689,0.043323,0.551852,0.002871,0.036573,0.465879


In [32]:
X_train_poly_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 379 entries, 0 to 378
Columns: 105 entries, 1 to x12^2
dtypes: float64(105)
memory usage: 311.0 KB


In [33]:
X_train_poly_df.describe()

Unnamed: 0,1,x0,x1,x2,x3,x4,x5,x6,x7,x8,...,x9^2,x9 x10,x9 x11,x9 x12,x10^2,x10 x11,x10 x12,x11^2,x11 x12,x12^2
count,379.0,379.0,379.0,379.0,379.0,379.0,379.0,379.0,379.0,379.0,...,379.0,379.0,379.0,379.0,379.0,379.0,379.0,379.0,379.0,379.0
mean,1.0,0.038287,0.118615,0.389425,0.065963,0.352182,0.527243,0.678019,0.238099,0.365722,...,0.276889,0.292734,0.341656,0.162663,0.446817,0.558172,0.210462,0.867547,0.259588,0.135796
std,0.0,0.09282,0.238101,0.257386,0.248546,0.239051,0.132859,0.290573,0.191278,0.377221,...,0.349707,0.286788,0.291177,0.198394,0.251577,0.255188,0.175503,0.257476,0.183354,0.168939
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,1.0,0.000774,0.0,0.167889,0.0,0.131687,0.449032,0.436148,0.084562,0.130435,...,0.028848,0.075219,0.143697,0.023288,0.260751,0.40551,0.067916,0.897973,0.118493,0.019879
50%,1.0,0.002892,0.0,0.296921,0.0,0.314815,0.511209,0.774459,0.186399,0.173913,...,0.073437,0.139821,0.222661,0.060309,0.478158,0.620379,0.156451,0.975093,0.222459,0.072479
75%,1.0,0.038118,0.2,0.646628,0.0,0.506173,0.596858,0.936148,0.35997,1.0,...,0.835619,0.739077,0.459564,0.275404,0.653689,0.767777,0.334643,0.995265,0.358104,0.188397
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,0.797872,1.0,0.914122,1.0,0.964446,0.848191,1.0,0.937305,1.0


## 線形回帰(Bostonのデータについて多項式特徴量導入前後のスコア比較)

In [34]:
#多項式特徴量導入前

In [35]:
lr_org = LinearRegression()

In [36]:
lr_org.fit(X_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [37]:
lr_org.score(X_train,y_train)

0.7697699488741149

In [38]:
lr_org.score(X_test,y_test)

0.6354638433202129

In [39]:
#多項式特徴量導入後

In [40]:
lr_poly = LinearRegression()

In [41]:
lr_poly.fit(X_train_poly , y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [42]:
lr_poly.score(X_train_poly,y_train)

0.952051960903273

In [43]:
lr_poly.score(X_test_poly,y_test)

0.6074721959666032

## リッジ回帰(アルファがゼロの場合 ※つまり線形回帰とイコールになるはず)

In [44]:
ridge_alpha0 = Ridge(alpha=0)

In [45]:
ridge_alpha0.fit(X_train,y_train)

Ridge(alpha=0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

In [46]:
ridge_alpha0.score(X_train,y_train)

0.7697699488741149

In [47]:
ridge_alpha0.score(X_test,y_test)

0.635463843320212

In [48]:
#想定どおり、線形回帰の結果とイコールになった

## リッジ回帰(アルファが1の場合、Bostonのデータについて多項式特徴量導入前後のスコア比較)

In [49]:
#多項式特徴量導入前

In [50]:
ridge_alpha1_org = Ridge()

In [51]:
ridge_alpha1_org.fit(X_train,y_train)

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

In [52]:
ridge_alpha1_org.score(X_train,y_train)

0.7678858330771392

In [53]:
ridge_alpha1_org.score(X_test,y_test)

0.6266182204613855

In [54]:
#多項式特徴量導入後

In [55]:
ridge_alpha1_poly = Ridge()

In [56]:
ridge_alpha1_poly.fit(X_train_poly,y_train)

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

In [57]:
ridge_alpha1_poly.score(X_train_poly,y_train)

0.8865631765824034

In [58]:
ridge_alpha1_poly.score(X_test_poly,y_test)

0.7534782020980575

## リッジ回帰(アルファが0.1の場合)

In [59]:
ridge_alpha01_poly = Ridge(alpha=0.1)

In [60]:
ridge_alpha01_poly.fit(X_train_poly,y_train)

Ridge(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

In [61]:
ridge_alpha01_poly.score(X_train_poly,y_train)

0.9283910578910985

In [62]:
ridge_alpha01_poly.score(X_test_poly,y_test)

0.7722110138118866

In [63]:
ridge_alpha01_poly.coef_

array([ 0.00000000e+00, -2.32938993e+00, -5.40726150e+00, -6.09486857e+00,
       -7.92183750e-01,  1.22940752e+00,  1.90910335e+01,  4.39316995e+00,
       -1.11583652e+01,  9.55238587e+00, -7.23820083e-01, -6.12575965e-01,
        6.22862150e+00, -2.92058909e+00,  5.80512898e+00,  2.30341644e-02,
       -8.97499768e-01,  4.66020763e+00, -3.39956005e+00,  1.32783437e+00,
       -3.48639558e+00,  4.55188827e-01, -3.48855743e+00, -2.50119580e+00,
       -2.68700480e+00,  3.44971401e-01, -1.15634230e+00,  3.03260968e+00,
       -2.67771677e+00,  4.22708429e+00, -2.95266682e+00,  8.70739533e+00,
       -3.10585558e+00, -3.09525253e-01,  2.12431235e+00,  3.62455259e+00,
        2.26517355e+00, -2.82520858e+00, -6.15632758e+00,  8.04911011e+00,
        1.61811200e+00,  8.69802439e+00,  1.00365251e+00,  5.81969094e+00,
       -4.63827548e+00,  5.05977930e+00,  7.98418938e+00, -8.36893020e+00,
       -7.35621218e-01, -1.43090828e+01, -7.92183750e-01, -1.37842473e+01,
       -1.59572404e+01,  

In [64]:
ridge_alpha01_poly.coef_.shape

(105,)

In [65]:
np.sum(ridge_alpha01_poly.coef_ != 0)

104

## Lasso回帰(アルファが1の場合)

In [66]:
lasso_alpha1_poly = Lasso()

In [67]:
lasso_alpha1_poly.fit(X_train_poly,y_train)

Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [68]:
lasso_alpha1_poly.score(X_train_poly,y_train)

0.3083322652688746

In [69]:
lasso_alpha1_poly.score(X_test_poly,y_test)

0.22923289362997012

In [70]:
np.sum(lasso_alpha1_poly.coef_ != 0)

4

## Lasso回帰(アルファが0.01、繰り返し数が100000の場合)

In [71]:
lasso_alpha001_poly = Lasso(alpha=0.01,max_iter=100000)

In [72]:
lasso_alpha001_poly.fit(X_train_poly,y_train)

Lasso(alpha=0.01, copy_X=True, fit_intercept=True, max_iter=100000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [73]:
lasso_alpha001_poly.score(X_train_poly,y_train)

0.8964421720767356

In [74]:
lasso_alpha001_poly.score(X_test_poly,y_test)

0.7659973757586753

In [75]:
np.sum(lasso_alpha001_poly.coef_ != 0)

33