# optuna 설치

In [1]:
!pip install optuna



# 라이브러리 import

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import (
    f1_score, accuracy_score, roc_auc_score, average_precision_score
)
import optuna

# 데이터 불러오기

In [3]:
# Load Data
train_df = pd.read_csv('openworld_train.csv')
test_df  = pd.read_csv('openworld_test.csv')

# Feature / Target Split
X = train_df.drop(columns=["label"]).values
y = train_df["label"].values

X_test = test_df.drop(columns=["label"]).values
y_test = test_df["label"].values

# Train / Validation Split
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

print(X_train.shape)
print(X_val.shape)
print(X_test.shape)

(16240, 26)
(4060, 26)
(8700, 26)


# Optuna objective

In [6]:
def objective(trial):
    C = trial.suggest_float("C", 1e-2, 1e3, log=True)
    gamma = trial.suggest_float("gamma", 1e-4, 1e1, log=True)

    # SVM
    svm = SVC(
        C=C,
        gamma=gamma,
        kernel="rbf",
        probability=False,
        random_state=42
    )

    # Train
    svm.fit(X_train, y_train)

    # Validation
    val_pred = svm.predict(X_val)

    f1_macro = f1_score(y_val, val_pred, average='macro')

    return f1_macro


# Optuna 실행

In [7]:
study = optuna.create_study(
    direction="maximize",
    sampler=optuna.samplers.TPESampler(seed=42)
)
study.optimize(objective, n_trials=30, show_progress_bar=True)

print(f"Best Macro-F1: {study.best_value:.4f}")
print("Parameters:", study.best_params)

[I 2025-11-30 18:47:13,572] A new study created in memory with name: no-name-bcc445b4-29de-435a-b444-aa0fce922c5c


  0%|          | 0/30 [00:00<?, ?it/s]

[I 2025-11-30 18:52:04,783] Trial 0 finished with value: 0.0259202910516243 and parameters: {'C': 0.7459343285726545, 'gamma': 5.669849511478847}. Best is trial 0 with value: 0.0259202910516243.
[I 2025-11-30 18:52:40,604] Trial 1 finished with value: 0.5627474760986168 and parameters: {'C': 45.70563099801453, 'gamma': 0.09846738873614563}. Best is trial 1 with value: 0.5627474760986168.
[I 2025-11-30 18:53:17,466] Trial 2 finished with value: 0.005341880341880341 and parameters: {'C': 0.06026889128682508, 'gamma': 0.000602521573620386}. Best is trial 1 with value: 0.5627474760986168.
[I 2025-11-30 18:55:54,096] Trial 3 finished with value: 0.005341880341880341 and parameters: {'C': 0.0195172246414495, 'gamma': 2.1423021757741068}. Best is trial 1 with value: 0.5627474760986168.
[I 2025-11-30 18:57:05,325] Trial 4 finished with value: 0.5382127118216331 and parameters: {'C': 10.129197956845726, 'gamma': 0.3470266988650412}. Best is trial 1 with value: 0.5627474760986168.
[I 2025-11-30 

# 최종 모델 재학습

In [9]:
best_params = study.best_params

best_model = SVC(
    C=best_params["C"],
    gamma=best_params["gamma"],
    kernel="rbf",
    probability=True,
    random_state=42
)

best_model.fit(X_train, y_train)

In [10]:
# Test Prediction
test_pred = best_model.predict(X_test)
test_proba = best_model.predict_proba(X_test)

# Accuracy / F1 scores
test_acc = accuracy_score(y_test, test_pred)
test_f1_macro = f1_score(y_test, test_pred, average='macro')
test_f1_micro = f1_score(y_test, test_pred, average='micro')
test_f1_weighted = f1_score(y_test, test_pred, average='weighted')

# ROC-AUC (multi-class OVR)
test_roc_auc = roc_auc_score(
    y_test, test_proba,
    multi_class='ovr',
    average='macro'
)

# PR-AUC (multi-class macro)
test_pr_auc = average_precision_score(
    y_test, test_proba,
    average='macro'
)

print(f"Accuracy        : {test_acc:.4f}")
print(f"F1 (macro)      : {test_f1_macro:.4f}")
print(f"F1 (micro)      : {test_f1_micro:.4f}")
print(f"F1 (weighted)   : {test_f1_weighted:.4f}")
print(f"ROC-AUC (macro) : {test_roc_auc:.4f}")
print(f"PR-AUC (macro)  : {test_pr_auc:.4f}")

Accuracy        : 0.6549
F1 (macro)      : 0.5785
F1 (micro)      : 0.6549
F1 (weighted)   : 0.6496
ROC-AUC (macro) : 0.9664
PR-AUC (macro)  : 0.5838
