In [4]:
# 03b_modeling_ridge.ipynb

import pickle
import numpy as np
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV

# 데이터 불러오기 (예: pkl 또는 npy)
import pickle

with open("X_test.pkl", "rb") as f:
    X_train = pickle.load(f)
with open("y_test.pkl", "rb") as f:
    y_train = pickle.load(f)


# Ridge 모델 + 하이퍼파라미터 튜닝
ridge = Ridge()
params = {'alpha': [0.01, 0.1, 1, 10, 100]}
grid = GridSearchCV(ridge, params, cv=5, scoring='neg_mean_squared_error')
grid.fit(X_train, y_train)

# 최고 모델 저장
best_ridge = grid.best_estimator_

with open("ridge_model.pkl", "wb") as f:
    pickle.dump(best_ridge, f)

# 예측
y_pred = best_ridge.predict(X_test)

# 평가
import numpy as np
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"Best alpha: {grid.best_params_['alpha']}")
print(f"RMSE: {rmse:.2f}")
print(f"R^2: {r2:.2f}")

# 결과 저장
pd.DataFrame({
    "actual": y_test,
    "predicted": y_pred
}).to_csv("predictions_ridge.csv", index=False)


Best alpha: 100
RMSE: 4.46
R^2: 0.73


In [5]:
# 계수 벡터 (weights)
print(best_ridge.coef_)

# 절편 (bias term)
print(best_ridge.intercept_)

print(f"Best alpha: {grid.best_params_['alpha']}")


[-6.54861536e-02  1.14079410e-01  3.19932016e-02  1.20804204e-01
 -5.61685934e-02  3.05963556e-01  1.18527111e-02 -7.66255841e-01
  3.40365461e-01 -1.94361280e-02 -8.01985415e-01 -3.34769681e-04
 -6.44929609e-01]
49.10506493391819
Best alpha: 100


In [6]:
import pandas as pd

coefs = pd.Series(best_ridge.coef_, index=X_test.columns)
coefs = coefs.sort_values()

print(coefs)  # 표 형태로 계수 보기


PTRATIO   -0.801985
DIS       -0.766256
LSTAT     -0.644930
CRIM      -0.065486
NOX       -0.056169
TAX       -0.019436
B         -0.000335
AGE        0.011853
INDUS      0.031993
ZN         0.114079
CHAS       0.120804
RM         0.305964
RAD        0.340365
dtype: float64
