# Perch 데이터로 규제 값에 따른 계수 크기 변화 관찰 (Ridge / Lasso)



In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import Ridge, Lasso
from sklearn.pipeline import Pipeline

# 데이터 로드
df = pd.read_csv('https://bit.ly/perch_csv_data')
X = df.to_numpy()

perch_weight = np.array([
    5.9, 32.0, 40.0, 51.5, 70.0, 100.0, 78.0, 80.0, 85.0, 85.0, 110.0,
    115.0, 125.0, 130.0, 120.0, 120.0, 130.0, 135.0, 110.0, 130.0,
    150.0, 145.0, 150.0, 170.0, 225.0, 145.0, 188.0, 180.0, 197.0,
    218.0, 300.0, 260.0, 265.0, 250.0, 250.0, 300.0, 320.0, 514.0,
    556.0, 840.0, 685.0, 700.0, 700.0, 690.0, 900.0, 650.0, 820.0,
    850.0, 900.0, 1015.0, 820.0, 1100.0, 1000.0, 1100.0, 1000.0,
    1000.0
], dtype=float)

y = perch_weight

print('X shape:', X.shape)
print('y shape:', y.shape)
df.head()


X shape: (56, 3)
y shape: (56,)


Unnamed: 0,length,height,width
0,8.4,2.11,1.41
1,13.7,3.53,2.0
2,15.0,3.82,2.43
3,16.2,4.59,2.63
4,17.4,4.59,2.94


In [2]:
# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

degree = 5
alphas = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]

def run_regularization(model_name, model_class):
    rows = []
    for a in alphas:
        if model_name == 'lasso':
            est = model_class(alpha=a, random_state=42, max_iter=10000)
        else:
            est = model_class(alpha=a, random_state=42)

        pipe = Pipeline([
            ('poly', PolynomialFeatures(degree=degree, include_bias=False)),
            ('scaler', StandardScaler()),
            (model_name, est),
        ])
        pipe.fit(X_train, y_train)

        reg = pipe.named_steps[model_name]
        coef = reg.coef_
        l2 = np.linalg.norm(coef, 2)
        l1 = np.linalg.norm(coef, 1)
        zeros = int(np.sum(coef == 0)) if model_name == 'lasso' else None
        r2_train = pipe.score(X_train, y_train)
        r2_test = pipe.score(X_test, y_test)

        rows.append({
            'alpha': a,
            '||w||2': l2,
            '||w||1': l1,
            '#zeros (lasso)': zeros,
            'R2_train': r2_train,
            'R2_test': r2_test,
        })

        if model_name == 'lasso':
            print(f"[Lasso ] alpha={a:>7} | ||w||2={l2:.6f}, ||w||1={l1:.6f}, zeros={zeros:2d}, R2(train)={r2_train:.4f}, R2(test)={r2_test:.4f}")
        else:
            print(f"[Ridge ] alpha={a:>7} | ||w||2={l2:.6f}, ||w||1={l1:.6f}, R2(train)={r2_train:.4f}, R2(test)={r2_test:.4f}")

    import pandas as pd
    return pd.DataFrame(rows)

ridge_df = run_regularization('ridge', Ridge)
lasso_df = run_regularization('lasso', Lasso)

ridge_df


[Ridge ] alpha= 0.0001 | ||w||2=5953.934595, ||w||1=34106.663120, R2(train)=0.9946, R2(test)=0.9665
[Ridge ] alpha=  0.001 | ||w||2=1642.969714, ||w||1=9756.596407, R2(train)=0.9930, R2(test)=0.9569
[Ridge ] alpha=   0.01 | ||w||2=575.452433, ||w||1=3256.730841, R2(train)=0.9918, R2(test)=0.9800
[Ridge ] alpha=    0.1 | ||w||2=146.026429, ||w||1=860.810562, R2(train)=0.9904, R2(test)=0.9828
[Ridge ] alpha=      1 | ||w||2=64.638032, ||w||1=410.818780, R2(train)=0.9896, R2(test)=0.9791
[Ridge ] alpha=     10 | ||w||2=52.994005, ||w||1=351.686371, R2(train)=0.9887, R2(test)=0.9725
[Ridge ] alpha=    100 | ||w||2=45.905432, ||w||1=337.942437, R2(train)=0.9842, R2(test)=0.9627


  model = cd_fast.enet_coordinate_descent(


[Lasso ] alpha= 0.0001 | ||w||2=1459.177328, ||w||1=8355.142177, zeros= 0, R2(train)=0.9925, R2(test)=0.9729


  model = cd_fast.enet_coordinate_descent(


[Lasso ] alpha=  0.001 | ||w||2=1419.131239, ||w||1=8055.854300, zeros= 0, R2(train)=0.9925, R2(test)=0.9732


  model = cd_fast.enet_coordinate_descent(


[Lasso ] alpha=   0.01 | ||w||2=1055.820298, ||w||1=5462.177588, zeros=11, R2(train)=0.9923, R2(test)=0.9762
[Lasso ] alpha=    0.1 | ||w||2=193.289616, ||w||1=538.317738, zeros=42, R2(train)=0.9902, R2(test)=0.9824
[Lasso ] alpha=      1 | ||w||2=159.087843, ||w||1=352.862415, zeros=47, R2(train)=0.9898, R2(test)=0.9802
[Lasso ] alpha=     10 | ||w||2=175.877000, ||w||1=341.493879, zeros=48, R2(train)=0.9888, R2(test)=0.9830
[Lasso ] alpha=    100 | ||w||2=186.310249, ||w||1=249.466783, zeros=52, R2(train)=0.9079, R2(test)=0.9091


Unnamed: 0,alpha,||w||2,||w||1,#zeros (lasso),R2_train,R2_test
0,0.0001,5953.934595,34106.66312,,0.994577,0.966455
1,0.001,1642.969714,9756.596407,,0.993046,0.956939
2,0.01,575.452433,3256.730841,,0.991781,0.980019
3,0.1,146.026429,860.810562,,0.990382,0.982798
4,1.0,64.638032,410.81878,,0.98961,0.979069
5,10.0,52.994005,351.686371,,0.988728,0.972533
6,100.0,45.905432,337.942437,,0.984184,0.962704


In [3]:
lasso_df


Unnamed: 0,alpha,||w||2,||w||1,#zeros (lasso),R2_train,R2_test
0,0.0001,1459.177328,8355.142177,0,0.992498,0.972888
1,0.001,1419.131239,8055.8543,0,0.992496,0.973182
2,0.01,1055.820298,5462.177588,11,0.992299,0.976155
3,0.1,193.289616,538.317738,42,0.990208,0.982438
4,1.0,159.087843,352.862415,47,0.989812,0.980184
5,10.0,175.877,341.493879,48,0.988762,0.983031
6,100.0,186.310249,249.466783,52,0.907862,0.909077


## 결과

Lasso에서 α를 키울수록 계수의 L2/L1 노름이 급격히 감소하고, 0이 된 계수 수가 0→52로 늘어 희소화가 뚜렷해짐.
훈련 R²는 대체로 0.99 내외를 유지하다가 α=100에서 0.91 수준으로 하락했고 테스트 R²는 α=0.0001-0.01에서 0.973-0.976이고, α=0.1-10 구간에서 최고로 개선되었음.
이 데이터에서는 α=0.1-10이 적정 범위이고, α가 너무 크면 과소적합이 발생함.