### 규제
* 학습이 과대적합도는 것을 방지하고자 하는 알고리즘
* 라쏘(Lasso)
    - L1규제를 추가한 모형
    - 영향력이 크지 않은 회귀계수 값을 0으로 만드는 특성이 있다.
        * 회귀계수 : 독립변수의 값이 변화함에 따라 종속변수에 미치는 영향력 크기
    - alpha를 이용하여 가중치 제어. alpha값에 따라 과적합될 우려가 있다.
    - 영향력이 작은 회귀계수를 0으로 만듦으로써 모델에서 가장 중요한 특성이 무엇인지 알 수 있다
* 릿지(Ridge)
    - L2규제를 추가한 모형
    - 계수값을 0이 아닌 작게 만드는 특성이 있다.
    - alpha를 이용하여 가중치 제어. alpha값에 따라 과적합될 우려가 있다.
* 엘라스틱넷(ElasticNet)
    - L1, L2를 함께 결합한 모형
    - 피처가 많은 데이터세트에 적용
    - L1 규제로 feature의 수를 줄이고 L2규제로 계수값의 크기를 조정
    - 파라미터
        * alpha : L1규제의 alpha(a) + L2규제의 alpha(b). L1과 L2의 alpha를 합처논 것이다.
        * l1_ratio = 0 : 0에 가까워 질수록 L2규제와 동일
        * l1_ratio = 1 : 1에 가까워 질수록 L1규제와 동일
        * 0 < l1_ratio < 1 : L1과 L2규제를 적절히 적용
* 계수 : 계산해서 얻은 값

In [1]:
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv("data/boston.csv")
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [3]:
df.columns

Index(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT', 'PRICE'],
      dtype='object')

In [5]:
f = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT']
label = 'PRICE'

X, y = df[f], df[label]

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = \
            train_test_split(X, y, test_size=0.3 )

X_train.shape, X_test.shape

((354, 13), (152, 13))

In [7]:
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Lasso
# alpha : 규제 강도
# 수치가 높을수록 강한 강도이다
# 강도가 높다 : 영향력이 높은것들도 0으로 만든다
lasso = Lasso( alpha = 0.07 )
lasso.fit( X_train, y_train )

train_pred = lasso.predict( X_train )
test_pred = lasso.predict( X_test )

print("score : ", lasso.score(X_train, y_train) ,
                "mse : ", mean_squared_error(train_pred, y_train))
print("score : ", lasso.score(X_test, y_test) ,
                "mse : ", mean_squared_error(test_pred, y_test))

score :  0.7097355517449534 mse :  24.70431150628309
score :  0.7551814914684486 mse :  20.25509988745104


In [8]:
alphas = [ 0.07, 0.1, 0.5, 1.3, 2 ]

for a in alphas:
    lasso = Lasso( alpha = a )
    lasso.fit( X_train, y_train )

    train_pred = lasso.predict( X_train )
    test_pred = lasso.predict( X_test )
    
    print("alpha : ", a)
    print("score : ", lasso.score(X_train, y_train) ,
                    "mse : ", mean_squared_error(train_pred, y_train))
    print("score : ", lasso.score(X_test, y_test) ,
                    "mse : ", mean_squared_error(test_pred, y_test))
    print("-" * 50)

alpha :  0.07
score :  0.7097355517449534 mse :  24.70431150628309
score :  0.7551814914684486 mse :  20.25509988745104
--------------------------------------------------
alpha :  0.1
score :  0.7085326063458954 mse :  24.806693792649984
score :  0.7550819613291534 mse :  20.263334530024952
--------------------------------------------------
alpha :  0.5
score :  0.690117045639873 mse :  26.374036093711606
score :  0.7253270240263718 mse :  22.725114200310827
--------------------------------------------------
alpha :  1.3
score :  0.6514427030100698 mse :  29.665596646066543
score :  0.6650288546969045 mse :  27.71389323554212
--------------------------------------------------
alpha :  2
score :  0.621569458971402 mse :  32.20809859858263
score :  0.642929202377295 mse :  29.542311633712128
--------------------------------------------------


In [9]:
from sklearn.model_selection import GridSearchCV

params = {"alpha" : [ 0.07, 0.1, 0.5, 1.3, 2 ] }

lasso = Lasso()

grid_cv = GridSearchCV(lasso, param_grid=params, cv=5 )
grid_cv.fit( X_train, y_train )

print("최적의 하이퍼 파라미터 : ", grid_cv.best_params_ )
print("train : ", grid_cv.score(X_train, y_train))
print("test : ", grid_cv.score(X_test, y_test))

최적의 하이퍼 파라미터 :  {'alpha': 0.07}
train :  0.7097355517449534
test :  0.7551814914684486


In [11]:
lasso = Lasso(alpha=3)
lasso.fit( X_train, y_train )
print(X_train.columns )
lasso.coef_

Index(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT'],
      dtype='object')


array([-0.01542137,  0.04796733, -0.        ,  0.        ,  0.        ,
        0.        ,  0.05718535, -0.        ,  0.0199944 , -0.00524226,
       -0.30256142,  0.00706153, -0.79798671])

In [12]:
alphas

[0.07, 0.1, 0.5, 1.3, 2]

In [16]:
coeff_df = pd.DataFrame( index = X_train.columns )
#coeff_df

for idx, alpha in enumerate( alphas ):
    print( idx , ":" , alpha)
    lasso = Lasso(alpha = alpha)
    lasso.fit( X_train, y_train)
    col_name = "alpha : " + str(alpha)
    coeff_df[col_name] = lasso.coef_
coeff_df

0 : 0.07
1 : 0.1
2 : 0.5
3 : 1.3
4 : 2


Unnamed: 0,alpha : 0.07,alpha : 0.1,alpha : 0.5,alpha : 1.3,alpha : 2
CRIM,-0.106645,-0.106446,-0.095474,-0.067241,-0.042636
ZN,0.06016,0.060384,0.062598,0.058445,0.047019
INDUS,-0.04053,-0.035452,-0.0,-0.0,0.0
CHAS,2.622554,2.117655,0.0,0.0,0.0
NOX,-0.0,-0.0,-0.0,-0.0,-0.0
RM,2.916672,2.810614,1.481244,0.0,0.0
AGE,0.001647,0.003264,0.020304,0.044069,0.056902
DIS,-1.265648,-1.254672,-1.048596,-0.579102,-0.115025
RAD,0.30083,0.304378,0.307778,0.238325,0.146172
TAX,-0.014343,-0.014583,-0.015489,-0.01279,-0.008896


In [17]:
from sklearn.linear_model import Ridge

alphas = [ 0.01 , 0.1 , 1 , 10 , 100 ]

In [19]:
for alpha in alphas:
    ridge = Ridge( alpha = alpha )
    ridge.fit(X_train, y_train)
    
    train_pred = ridge.predict(X_train)
    test_pred = ridge.predict(X_test)
    
    train_score = ridge.score( X_train, y_train )
    test_score = ridge.score( X_test, y_test )
    
    train_mse = mean_squared_error(train_pred, y_train)
    test_mse = mean_squared_error(test_pred, y_test)
    
    print("alpha : ", alpha )
    print("train : ", train_score, "mse : ", train_mse )
    print("test : ", test_score, "mse : ", test_mse )
    print("-" * 50)

alpha :  0.01
train :  0.7269772644705177 mse :  23.23687501988321
test :  0.7523650471698196 mse :  20.488118873386167
--------------------------------------------------
alpha :  0.1
train :  0.726852844715702 mse :  23.24746434420102
test :  0.7538723820160216 mse :  20.36340927501149
--------------------------------------------------
alpha :  1
train :  0.7230554244078314 mse :  23.570661534796987
test :  0.7579149064496089 mse :  20.028950345860626
--------------------------------------------------
alpha :  10
train :  0.7124027855817248 mse :  24.47730411368019
test :  0.7545026988117784 mse :  20.311259910425566
--------------------------------------------------
alpha :  100
train :  0.6962988319652581 mse :  25.847906297363256
test :  0.7256140931201485 mse :  22.701363491249158
--------------------------------------------------


In [20]:
alphas

[0.01, 0.1, 1, 10, 100]

In [22]:
coeff_df = pd.DataFrame( index = X_train.columns )
#coeff_df
for alpha in alphas:
    ridge = Ridge(alpha = alpha)
    ridge.fit( X_train, y_train )
    col_name = "alpha : " + str(alpha)
    coeff_df[col_name] = ridge.coef_
coeff_df

Unnamed: 0,alpha : 0.01,alpha : 0.1,alpha : 1,alpha : 10,alpha : 100
CRIM,-0.12153,-0.120409,-0.114784,-0.110369,-0.109011
ZN,0.058498,0.058602,0.05921,0.060954,0.064977
INDUS,0.027454,0.021192,-0.010587,-0.039586,-0.038155
CHAS,3.82557,3.806645,3.635813,2.549949,0.657917
NOX,-21.460572,-19.753703,-11.001874,-2.021321,-0.205769
RM,2.941903,2.956291,3.013522,2.807783,1.517343
AGE,0.016296,0.014876,0.007802,0.003696,0.018833
DIS,-1.652969,-1.624291,-1.477934,-1.332523,-1.186216
RAD,0.354559,0.349838,0.326593,0.315481,0.342118
TAX,-0.012499,-0.012611,-0.013232,-0.014461,-0.016045


In [23]:
from sklearn.linear_model import ElasticNet

# l1_ratios : 0에 가까울 수록 L2(릿지)규제에 가깝다
# l1_ratios : 1에 가까울 수록 L1(랏소)규제에 가깝다
ratios = [ 0.2 , 0.5 , 0.8 ] 
alphas = [0.1 , 0.7 , 1.5] #규제 강도

In [24]:
el = ElasticNet(alpha= 0.7, l1_ratio= 0.2)
el.fit( X_train, y_train )

print("train : ", el.score(X_train, y_train) )
print("test : ", el.score(X_test, y_test) )

train :  0.6830307698046927
test :  0.7067891208852564


In [25]:
params = {
    "alpha" : alphas,
    "l1_ratio" : ratios
}
el = ElasticNet()
grid_cv = GridSearchCV(el, param_grid=params, cv=5)
grid_cv.fit( X_train, y_train )

print("최적의 하이퍼 파라미터 : ", grid_cv.best_params_ )
print("train : ", grid_cv.score(X_train, y_train) )
print("test : ", grid_cv.score(X_test, y_test) )

최적의 하이퍼 파라미터 :  {'alpha': 0.1, 'l1_ratio': 0.2}
train :  0.7060082114121442
test :  0.745933395325818
