In [116]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import LinearRegression, ElasticNet

diabetes = load_diabetes()
Dia_df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
Dia_df["degree"] = diabetes.target
Dia_df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,degree
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


In [138]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline

X_train, X_test, y_train, y_test = train_test_split(
    diabetes.data,
    diabetes.target,
    test_size=0.3,
    random_state=42,
)

from sklearn.linear_model import Ridge, Lasso

param_grid = np.linspace(0.0001, 0.01, 50)
PolyModel = make_pipeline(
    PolynomialFeatures(degree=2),
    StandardScaler(),  # 스케일링은 다항식 쓸 때 성능 향상에 도움됨
    LinearRegression(),
)
PolyModel.fit(X_train, y_train)

# 4. Ridge 모델
ridge_grid = GridSearchCV(Ridge(), param_grid={"alpha": param_grid}, scoring="r2", cv=5)
ridge_grid.fit(X_train, y_train)

# 5. Lasso 모델
lasso_grid = GridSearchCV(
    Lasso(max_iter=10000), param_grid={"alpha": param_grid}, scoring="r2", cv=5
)
lasso_grid.fit(X_train, y_train)

# 6. ElasticNet 모델
elastic_grid = GridSearchCV(
    ElasticNet(max_iter=10000),
    param_grid={"alpha": param_grid, "l1_ratio": [0.1, 0.5, 0.9]},
    scoring="r2",
    cv=5,
)
elastic_grid.fit(X_train, y_train)
Rinear = LinearRegression()
Rinear.fit(X_train, y_train)

print(f"ridge training Data evalution : {ridge_grid.score(X_train, y_train)}")
print(f"ridge test Data evaluation : {ridge_grid.score(X_test, y_test)}")
print("ridge Best Hyperparameters:", ridge_grid.best_params_)
print("ridge Best Accuracy:", ridge_grid.best_score_)

print(f"lasso training Data evalution : {lasso_grid.score(X_train, y_train)}")
print(f"lasso test Data evaluation : {lasso_grid.score(X_test, y_test)}")
print("lasso Best Hyperparameters:", lasso_grid.best_params_)
print("lasso Best Accuracy:", lasso_grid.best_score_)

print(f"Rinear training Data evalution : {Rinear.score(X_train, y_train)}")
print(f"Rinear test Data evaluation : {Rinear.score(X_test, y_test)}")

print(f"elastic training Data evalution : {elastic_grid.score(X_train, y_train)}")
print(f"elastic test Data evaluation : {elastic_grid.score(X_test, y_test)}")
print("elastic Best Hyperparameters:", elastic_grid.best_params_)
print("elastic Best Accuracy:", elastic_grid.best_score_)

print(f"PolyModel training Data evalution : {PolyModel.score(X_train, y_train)}")
print(f"PolyModel test Data evaluation : {PolyModel.score(X_test, y_test)}")

ridge training Data evalution : 0.5239650845063213
ridge test Data evaluation : 0.47763342949961396
ridge Best Hyperparameters: {'alpha': np.float64(0.002928571428571429)}
ridge Best Accuracy: 0.45313116819621946
lasso training Data evalution : 0.5238422161184147
lasso test Data evaluation : 0.4784049182542792
lasso Best Hyperparameters: {'alpha': np.float64(0.006161224489795919)}
lasso Best Accuracy: 0.4531469951461958
Rinear training Data evalution : 0.5244124363545944
Rinear test Data evaluation : 0.4772897164322617
elastic training Data evalution : 0.523915892173388
elastic test Data evaluation : 0.47764650917863316
elastic Best Hyperparameters: {'alpha': np.float64(0.0001), 'l1_ratio': 0.9}
elastic Best Accuracy: 0.45313098146321584
PolyModel training Data evalution : 0.6081125571650736
PolyModel test Data evaluation : 0.41297706916206667


In [139]:
from sklearn.metrics import mean_squared_error, r2_score

y_test_predict = ridge_grid.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_test_predict))
r2 = r2_score(y_test, y_test_predict)

print(f"Ridge 테스트 데이터 예측 rmse : {rmse}")
print(f"Ridge 테스트 데이터 예측 R2 score : {r2}")

y_test_predict = lasso_grid.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_test_predict))
r2 = r2_score(y_test, y_test_predict)

print(f"LassoRegre 테스트 데이터 예측 rmse : {rmse}")
print(f"LassoRegre 테스트 데이터 예측 R2 score : {r2}")

y_test_predict = Rinear.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_test_predict))
r2 = r2_score(y_test, y_test_predict)

print(f"Rinear 테스트 데이터 예측 rmse : {rmse}")
print(f"Rinear 테스트 데이터 예측 R2 score : {r2}")

y_test_predict = elastic_grid.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_test_predict))
r2 = r2_score(y_test, y_test_predict)

print(f"Elastic 테스트 데이터 예측 rmse : {rmse}")
print(f"Elastic 테스트 데이터 예측 R2 score : {r2}")

y_test_predict = PolyModel.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_test_predict))
r2 = r2_score(y_test, y_test_predict)

print(f"PolyModel 테스트 데이터 예측 rmse : {rmse}")
print(f"PolyModel 테스트 데이터 예측 R2 score : {r2}")

Ridge 테스트 데이터 예측 rmse : 53.10268836962929
Ridge 테스트 데이터 예측 R2 score : 0.47763342949961396
LassoRegre 테스트 데이터 예측 rmse : 53.06345991687165
LassoRegre 테스트 데이터 예측 R2 score : 0.4784049182542792
Rinear 테스트 데이터 예측 rmse : 53.1201560709427
Rinear 테스트 데이터 예측 R2 score : 0.4772897164322617
Elastic 테스트 데이터 예측 rmse : 53.102023539119386
Elastic 테스트 데이터 예측 R2 score : 0.47764650917863316
PolyModel 테스트 데이터 예측 rmse : 56.293253982581575
PolyModel 테스트 데이터 예측 R2 score : 0.41297706916206667


# 파라미터 변환해보기

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge, Lasso, ElasticNet, LinearRegression
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error, r2_score

# 데이터 불러오기 및 분할
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 파라미터 후보 리스트
param_grid = np.linspace(0.0001, 0.01, 10)

# 결과 저장용 리스트
results = []

# Ridge, Lasso 여러 alpha에 대해 반복
for alpha in param_grid:
    for model_name, model_class in [("Ridge", Ridge), ("Lasso", Lasso)]:
        model = model_class(alpha=alpha, max_iter=10000)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        results.append(
            {
                "Model": model_name,
                "alpha": alpha,
                "R2 of X train": model.score(X_train, y_train),
                "R2 of X test": model.score(X_test, y_test),
                "예측 데이터 rmse": np.sqrt(mean_squared_error(y_test, y_pred)),
                "예측 데이터 R2 score": r2_score(y_test, y_pred),
            }
        )

# ElasticNet은 l1_ratio까지 함께 반복
for alpha in param_grid:
    for l1_ratio in [0.1, 0.5, 0.9]:
        model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, max_iter=10000)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        results.append(
            {
                "Model": "ElasticNet",
                "alpha": alpha,
                "l1_ratio": l1_ratio,
                "R2 of X train": model.score(X_train, y_train),
                "R2 of X test": model.score(X_test, y_test),
                "예측 데이터 rmse": np.sqrt(mean_squared_error(y_test, y_pred)),
                "예측 데이터 R2 score": r2_score(y_test, y_pred),
            }
        )

# LinearRegression (기본)
linear = LinearRegression()
linear.fit(X_train, y_train)
y_pred = linear.predict(X_test)
results.append(
    {
        "Model": "LinearRegression",
        "alpha": "",
        "l1_ratio": "",
        "R2 of X train": linear.score(X_train, y_train),
        "R2 of X test": linear.score(X_test, y_test),
        "예측 데이터 rmse": np.sqrt(mean_squared_error(y_test, y_pred)),
        "예측 데이터 R2 score": r2_score(y_test, y_pred),
    }
)

# Polynomial Regression (degree=2)
poly_model = make_pipeline(
    PolynomialFeatures(degree=2), StandardScaler(), LinearRegression()
)
poly_model.fit(X_train, y_train)
y_pred = poly_model.predict(X_test)
results.append(
    {
        "Model": "Polynomial",
        "alpha": "",
        "l1_ratio": "",
        "R2 of X train": poly_model.score(X_train, y_train),
        "R2 of X test": poly_model.score(X_test, y_test),
        "예측 데이터 rmse": np.sqrt(mean_squared_error(y_test, y_pred)),
        "예측 데이터 R2 score": r2_score(y_test, y_pred),
    }
)

# 결과 DataFrame 생성
df_results = pd.DataFrame(results)

# 순서 정리
columns_order = [
    "Model",
    "alpha",
    "l1_ratio",
    "R2 of X train",
    "R2 of X test",
    "예측 데이터 rmse",
    "예측 데이터 R2 score",
]
df_results = df_results[columns_order]

# 위에서 생성된 df_results 기준
df_results.to_csv("model_comparison_results.csv", index=False)
print("결과가 'model_comparison_results.csv' 파일로 저장되었습니다.")



# 데이터 불러오기 및 분할


diabetes = load_diabetes()


X = diabetes.data


y = diabetes.target


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)



# 파라미터 후보 리스트


param_grid = np.linspace(0.0001, 0.01, 10)



# 결과 저장용 리스트


results = []



# Ridge, Lasso 여러 alpha에 대해 반복


for alpha in param_grid:


    for model_name, model_class in [("Ridge", Ridge), ("Lasso", Lasso)]:


        model = model_class(alpha=alpha, max_iter=10000)


        model.fit(X_train, y_train)


        y_pred = model.predict(X_test)


        results.append(
            {
                "Model": model_name,
                "alpha": alpha,
                "R2 of X train": model.score(X_train, y_train),
                "R2 of X test": model.score(X_test, y_test),
                "예측 데이터 rmse": np.sqrt(mean_squared_error(y_test, y_pred)),
                "예측 데이터 R2 score": r2_score(y_test, y_pred),
            }
        )



# ElasticNet은 l1_ratio까지 함께 반복


for alpha in param_grid:


    for l1_ratio in [0.1, 0.5, 0.9]:


        model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, max_iter=10000)


        model.fit(X_train, y_train)


        y_pred = model.predict(X_test)


        results.append(
            {
                "Model": "ElasticNet",
                "alpha": alpha,
                "l1_ratio": l1_ratio,
                "R2 of X train": model.score(X_train, y_train),
                "R2 of X test": model.score(X_test, y_test),
                "예측 데이터 rmse": np.sqrt(mean_squared_error(y_test, y_pred)),
                "예측 데이터 R2 score": r2_score(y_test, y_pred),
            }
        )



# LinearRegression (기본)


linear = LinearRegression()


linear.fit(X_train, y_train)


y_pred = linear.predict(X_test)


results.append(
    {
        "Model": "LinearRegression",
        "alpha": "",
        "l1_ratio": "",
        "R2 of X train": linear.score(X_train, y_train),
        "R2 of X test": linear.score(X_test, y_test),
        "예측 데이터 rmse": np.sqrt(mean_squared_error(y_test, y_pred)),
        "예측 데이터 R2 score": r2_score(y_test, y_pred),
    }
)



# Polynomial Regression (degree=2)


poly_model = make_pipeline(
    PolynomialFeatures(degree=2), StandardScaler(), LinearRegression()
)


poly_model.fit(X_train, y_train)


y_pred = poly_model.predict(X_test)


results.append(
    {
        "Model": "Polynomial",
        "alpha": "",
        "l1_ratio": "",
        "R2 of X train": poly_model.score(X_train, y_train),
        "R2 of X test": poly_model.score(X_test, y_test),
        "예측 데이터 rmse": np.sqrt(mean_squared_error(y_test, y_pred)),
        "예측 데이터 R2 score": r2_score(y_test, y_pred),
    }
)



# 결과 DataFrame 생성


df_results = pd.DataFrame(results)



# 순서 정리


columns_order = [
    "Model",
    "alpha",
    "l1_ratio",
    "R2 of X train",
    "R2 of X test",
    "예측 데이터 rmse",
    "예측 데이터 R2 score",
]


df_results = df_results[columns_order]



# 위에서 생성된 df_results 기준


df_results.to_csv("model_comparison_results.csv", index=False)


print("결과가 'model_comparison_results.csv' 파일로 저장되었습니다.")

결과가 'model_comparison_results.csv' 파일로 저장되었습니다.
