# optuna 설치

In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.6.0-py3-none-any.whl (404 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m404.7/404.7 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.10.1 optuna-4.6.0


In [None]:
!pip install optuna.integration

Collecting optuna.integration
  Downloading optuna_integration-4.6.0-py3-none-any.whl.metadata (12 kB)
Downloading optuna_integration-4.6.0-py3-none-any.whl (99 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.1/99.1 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: optuna.integration
Successfully installed optuna.integration-4.6.0


# 라이브러리 import

In [None]:
import os, numpy as np
import pandas as pd
import optuna
from sklearn.svm import SVC
from sklearn.metrics import (
    f1_score, classification_report, confusion_matrix,
    accuracy_score, roc_auc_score, average_precision_score
)

# 데이터 불러오기

In [None]:
train = pd.read_csv("closedworld_train.csv")
test = pd.read_csv("closedworld_test.csv")

y_train = train['label']
X_train = train.drop(columns=['label'])

y_test = test['label']
X_test = test.drop(columns=['label'])

print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

X_train shape: (13300, 26)
y_train shape: (13300,)
X_test shape: (5700, 26)
y_test shape: (5700,)


# train/validation 분리

In [None]:
# validation set
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42, stratify=y_train
)

# optuna objective 정의

In [None]:
NUM_CLASSES = len(np.unique(y_train))
RANDOM_STATE = 42

def objective(trial: optuna.Trial) -> float:
    params = {
        "kernel": "rbf",
        "C": trial.suggest_float("C", 1e-3, 1e3, log=True),
        "gamma": trial.suggest_float("gamma", 1e-4, 1e1, log=True),
        "probability": True,
        "random_state": RANDOM_STATE
    }

    model = SVC(**params)

    # 모델 학습
    model.fit(X_train, y_train)

    # Validation predictions
    val_pred = model.predict(X_val)
    val_proba = model.predict_proba(X_val)

    # Macro-F1 기준 최적화
    f1_macro = f1_score(y_val, val_pred, average='macro')

    return 1 - f1_macro

# optuna 실행

In [None]:
study = optuna.create_study(
    direction="minimize",
    study_name="svm_multiclass_f1macro"
)
study.optimize(objective, n_trials=30, show_progress_bar=True)

[I 2025-11-30 14:49:56,006] A new study created in memory with name: svm_multiclass_f1macro


  0%|          | 0/30 [00:00<?, ?it/s]

[I 2025-11-30 14:50:48,671] Trial 0 finished with value: 0.6298485752133351 and parameters: {'C': 0.9714035949917724, 'gamma': 0.015183615180613811}. Best is trial 0 with value: 0.6298485752133351.
[I 2025-11-30 14:52:12,687] Trial 1 finished with value: 0.8153281457413889 and parameters: {'C': 0.041291184867469646, 'gamma': 0.033439806588401434}. Best is trial 0 with value: 0.6298485752133351.
[I 2025-11-30 14:53:36,980] Trial 2 finished with value: 0.8417397840505064 and parameters: {'C': 0.0011110501912258558, 'gamma': 0.01361898792987644}. Best is trial 0 with value: 0.6298485752133351.
[I 2025-11-30 14:55:01,104] Trial 3 finished with value: 0.9349914493792537 and parameters: {'C': 0.006358548818602746, 'gamma': 0.0003525078982906262}. Best is trial 0 with value: 0.6298485752133351.
[I 2025-11-30 14:56:25,309] Trial 4 finished with value: 0.8183479738483866 and parameters: {'C': 0.005526595058889818, 'gamma': 0.02944467723375494}. Best is trial 0 with value: 0.6298485752133351.
[I

# Best Trial 확인 / 최종 모델 재학습

In [None]:
best_params = study.best_params
print("Best Params:", best_params)

final_model = SVC(
    kernel="rbf",
    C=best_params["C"],
    gamma=best_params["gamma"],
    probability=True,
    random_state=RANDOM_STATE
)

final_model.fit(X_train, y_train)

Best Params: {'C': 278.88259957929523, 'gamma': 0.04499206820318379}


# 최종 모델 평가

In [None]:
y_pred = final_model.predict(X_test)
y_prob = final_model.predict_proba(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)

# F1
f1_macro = f1_score(y_test, y_pred, average='macro')
f1_micro = f1_score(y_test, y_pred, average='micro')
f1_weighted = f1_score(y_test, y_pred, average='weighted')

# ROC-AUC
roc_auc = roc_auc_score(y_test, y_prob, multi_class='ovr', average='macro')

# PR-AUC
pr_auc = average_precision_score(y_test, y_prob, average='macro')

# Report
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("F1-macro:", f1_macro)
print("F1-micro:", f1_micro)
print("F1-weighted:", f1_weighted)
print("ROC-AUC:", roc_auc)
print("PR-AUC:", pr_auc)
print(report)

Accuracy: 0.6171929824561404
F1-macro: 0.6146179403437404
F1-micro: 0.6171929824561404
F1-weighted: 0.6146179403437404
ROC-AUC: 0.9697731429637922
PR-AUC: 0.6536487905387943
              precision    recall  f1-score   support

           0       0.60      0.63      0.62        60
           1       0.66      0.77      0.71        60
           2       0.63      0.73      0.68        60
           3       0.68      0.60      0.64        60
           4       0.61      0.47      0.53        60
           5       0.54      0.43      0.48        60
           6       0.62      0.75      0.68        60
           7       0.51      0.58      0.54        60
           8       0.52      0.52      0.52        60
           9       0.41      0.42      0.41        60
          10       0.59      0.55      0.57        60
          11       0.65      0.65      0.65        60
          12       0.66      0.75      0.70        60
          13       0.33      0.23      0.27        60
          14   