In [1]:
import numpy as np
import pandas as pd

from sklearn import linear_model
from sklearn.metrics import mean_squared_error

from sklearn.model_selection import KFold

In [2]:
# Ridge regression
model = linear_model.Ridge(alpha=0.1) # alpha : tuning parameter (lambda 상수), 값을 바꾸면 결과가 달라짐.

# 훈련하기
model.fit([[0, 0], [0, 0], [1, 1]], [0, 0.1, 1])

# 예측하기
pred = model.predict([[0, 1]])
pred

array([0.51395349])

In [3]:
print('coefficient :', model.coef_)

coefficient : [0.44186047 0.44186047]


In [4]:
# Lasso regression
model = linear_model.Lasso(alpha=0.1) # alpha : tuning parameter (lambda 상수), 값을 바꾸면 결과가 달라짐.

# 훈련하기
model.fit([[0, 0], [0, 0], [1, 1]], [0, 0.1, 1])

# 예측하기
pred = model.predict([[0, 1]])
pred

array([0.2])

In [5]:
print('coefficient :', model.coef_)

coefficient : [0.5 0. ]


* 실데이터

In [8]:
path = '../datasets/ML_data/'

raw = pd.read_csv(path+'Advertising.csv', index_col=0)
ad = raw.copy()
print(ad.shape)
ad.head(2)

(200, 4)


Unnamed: 0,TV,Radio,Newspaper,Sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4


In [15]:
# ad 데이터에 Lasso 적용하기 --> 최적의 lambda 선정
n_fold = 5

for t_param in (0.001, 1, 1000):
    print('Lambda :', t_param)

    # 모델 생성
    kfold = KFold(n_splits=n_fold)
    idx = 1
    
    sum_val_mse = 0
    for train, val in kfold.split(ad):
        print('='*10, 'Fold #', idx, '='*10)
    
        # train data의 feature, label
        X_train = ad.iloc[train][['TV', 'Radio', 'Newspaper']]
        y_train = ad.iloc[train][['Sales']]

        # validation data의 feature, label
        X_val = ad.iloc[val][['TV', 'Radio', 'Newspaper']]
        y_val = ad.iloc[val][['Sales']]

        # Lasso 적용하기
        model = linear_model.Lasso(alpha=t_param)

        # 훈련
        model.fit(X_train, y_train)

        # coefficient
        print('coefficients :', model.coef_)

        # validation set 예측
        y_val_pred = model.predict(X_val)

        # validation MSE
        val_MSE = mean_squared_error(y_val, y_val_pred)
        sum_val_mse = val_MSE

        print('-'*100)
        idx += 1

    print('Average Validation MSE :', round(sum_val_mse/n_fold, 3))
    print('='*100)
    print()

Lambda : 0.001
coefficients : [0.04585753 0.18790125 0.00361145]
----------------------------------------------------------------------------------------------------
coefficients : [0.04513111 0.1879391  0.00140999]
----------------------------------------------------------------------------------------------------
coefficients : [ 0.04698029  0.18872464 -0.00235756]
----------------------------------------------------------------------------------------------------
coefficients : [ 0.04315907  0.20013028 -0.00758214]
----------------------------------------------------------------------------------------------------
coefficients : [ 0.047252    0.17991513 -0.00094029]
----------------------------------------------------------------------------------------------------
Average Validation MSE : 0.558

Lambda : 1
coefficients : [0.04582866 0.18396308 0.00216856]
----------------------------------------------------------------------------------------------------
coefficients : [0.04501995 