In [4]:
%pip install -U scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [None]:
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns
from metrics import print_regression_metrcis
from sklearn.svm import SVR
# 데이터 불러오기
df = pd.read_csv("mlb최종.csv", encoding="cp949")


In [8]:
df['retire_age'] = df['retire_year'] - df['birth_year']
df_clean = df.dropna()

# 피처와 타겟
feature_cols = ['G', 'AB', 'H', '2B', '3B', 'HR', 'BB', 'SO', 'RBI', 'R', 'SB', 'CS', 'AVG', 'OBP', 'SLG', 'OPS']
X = df_clean[feature_cols]
y = df_clean['retire_age']

# 정규화 및 분할
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# SVR 하이퍼파라미터 후보
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['rbf', 'linear', 'poly'],
    'epsilon': [0.1, 0.2, 0.5]
}

# GridSearchCV 설정
grid_search = GridSearchCV(
    estimator=SVR(),
    param_grid=param_grid,
    cv=3,
    scoring='neg_mean_squared_error',
    n_jobs=-1,
    verbose=2
)

# 학습
grid_search.fit(X_train, y_train)

# 예측
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_val)

# 평가
print("Best Params:", grid_search.best_params_)
print_regression_metrcis(y_val, y_pred, "최적 SVR 모델 성능")

# 평가지표 저장
mse = mean_squared_error(y_val, y_pred)
mae = mean_absolute_error(y_val, y_pred)
r2 = r2_score(y_val, y_pred)

metrics_result = {
    "Best Params": grid_search.best_params_,
    "MAE": mae,
    "MSE": mse,
    "R2": r2
}

# 결과 정리
results = pd.DataFrame(grid_search.cv_results_)
results['mean_MSE'] = -results['mean_test_score']
results['param_C'] = results['param_C'].astype(str)
results['param_kernel'] = results['param_kernel'].astype(str)

# 시각화
plt.rcParams['font.family'] = 'Malgun Gothic'
plt.figure(figsize=(10, 6))
ax = sns.barplot(
    data=results,
    x='param_C',
    y='mean_MSE',
    hue='param_kernel',
    palette="Set2",
    errorbar=None  # 오차막대 제거
)

for container in ax.containers:
    ax.bar_label(container, fmt='%.2f', label_type='edge', fontsize=9)

plt.title("SVR 하이퍼파라미터 성능 비교 (MSE ↓)")
plt.ylabel("평균 제곱 오차 (MSE)")
plt.xlabel("패널티 계수 (C)")
plt.legend(title="커널 (kernel)")
plt.grid(axis='y')
plt.tight_layout()
plt.savefig("svr_hyperparameter_mse.png", dpi=300)
plt.show()

# 저장
with open("metrics_svr.pkl", "wb") as f:
    pickle.dump(metrics_result, f)

df_metrics = pd.DataFrame([metrics_result])
df_metrics.to_csv("metrics_svr.csv", index=False, encoding='utf-8-sig')
print("✅ metrics_svr.csv 저장 완료")


NameError: name 'SVR' is not defined

In [12]:
df_metrics = pd.DataFrame([metrics_result])  # 한 줄짜리 DataFrame
df_metrics.to_csv("metrics_rf.csv", index=False, encoding='utf-8-sig')
print("✅ metrics_rf.csv 저장 완료")

✅ metrics_rf.csv 저장 완료


In [None]:
# 피클 불러오기 코드
with open("metrics_rf.pkl", "rb") as f:
    loaded_metrics = pickle.load(f)

# 출력
print("\n📂 저장된 평가 지표 불러오기:")
for key, value in loaded_metrics.items():
    print(f"{key}: {value}")

Collecting seaborn
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Downloading seaborn-0.13.2-py3-none-any.whl (294 kB)
Installing collected packages: seaborn
Successfully installed seaborn-0.13.2
Note: you may need to restart the kernel to use updated packages.
