In [1]:
# Chapter 03 과제
# 규제 값을 바꿔보면서 모델의 계수(coefficient) 크기가 어떻게 변하는지 출력하기
# Ridge(L2) 및 Lasso(L1) 회귀 사용

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import Ridge, Lasso

# 1. 데이터 불러오기
# 농어(perch) 길이 데이터를 사용
df = pd.read_csv('https://bit.ly/perch_csv_data')
perch_full = df.to_numpy()

# 농어 무게 데이터
perch_weight = np.array([
    5.9, 32.0, 40.0, 51.5, 70.0, 100.0, 78.0, 80.0, 85.0, 85.0,
    110.0, 115.0, 125.0, 130.0, 120.0, 120.0, 130.0, 135.0, 110.0,
    130.0, 150.0, 145.0, 150.0, 170.0, 225.0, 145.0, 188.0, 180.0,
    197.0, 218.0, 300.0, 260.0, 265.0, 250.0, 250.0, 300.0, 320.0,
    514.0, 556.0, 840.0, 685.0, 700.0, 700.0, 690.0, 900.0, 650.0,
    820.0, 850.0, 900.0, 1015.0, 820.0, 1100.0, 1000.0, 1100.0,
    1000.0, 1000.0
])

# 2. 훈련 세트와 테스트 세트로 분할
train_input, test_input, train_target, test_target = train_test_split(
    perch_full, perch_weight, random_state=42
)

# 3. 다항 특성 변환
# 입력 특성을 고차항으로 확장 (비선형 관계 학습 가능)
poly = PolynomialFeatures(degree=5, include_bias=False)
poly.fit(train_input)
train_poly = poly.transform(train_input)
test_poly = poly.transform(test_input)

# 4. 특성 표준화
# Ridge, Lasso는 특성의 스케일에 민감하므로 표준화 필요
ss = StandardScaler()
ss.fit(train_poly)
train_poly = ss.transform(train_poly)
test_poly = ss.transform(test_poly)

# 5. 규제 강도(alpha)에 따른 Ridge, Lasso 계수 변화 출력
alphas = [0.01, 0.1, 1, 10, 100]

# Ridge Regression (L2 규제)
print("=== Ridge Regression ===")
for alpha in alphas:
    ridge = Ridge(alpha=alpha)
    ridge.fit(train_poly, train_target)
    print(f"alpha={alpha}, coefficients={ridge.coef_}")

# Lasso Regression (L1 규제)
print("\n=== Lasso Regression ===")
for alpha in alphas:
    lasso = Lasso(alpha=alpha, max_iter=10000)
    lasso.fit(train_poly, train_target)
    print(f"alpha={alpha}, coefficients={lasso.coef_}")


=== Ridge Regression ===
alpha=0.01, coefficients=[  27.8677573    63.06022968 -136.52883831   52.96410765   33.12948023
   67.56237681    1.13574116  -13.52605804   34.98952583   -0.97795585
    7.09170617   75.08423377   -9.30376241   14.38673458   94.08962926
  -60.57186192  -71.76749076   -6.952476    125.2440166   -71.25085649
   -3.59471865   19.83077257   32.51396253   14.26736039   49.53158842
   27.98329663  -25.07038895   -8.39028396   75.14481255  -24.20556919
 -104.723856    -99.23122038  -10.90112814  154.39485159 -132.47538652
    8.11018173  -52.68915034  114.07244483   12.48803274  -36.64856475
  176.44598804   39.54385175  -31.2771495   -35.37452556  187.68391987
   21.4546774   -64.74538278  -70.16298083    3.69221895  141.91992018
  -47.08385738 -141.80741176 -141.60027828  -48.80478588  131.38148311]
alpha=0.1, coefficients=[ 27.61933479  -1.64022156 -19.73256036  30.03061951  16.99800255
  17.83247301   4.46928273   3.87258547  22.8351822   19.50417483
  17.7598531

  model = cd_fast.enet_coordinate_descent(
