<a href="https://colab.research.google.com/github/KevinCY-Kim/dacon_ML_Smart-shipping-and-logistics/blob/main/%EB%B9%84%EC%A0%95%EC%83%81%EC%9E%91%EB%8F%99_%EB%B6%84%EB%A5%98_with_Optuna.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install optuna



In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, accuracy_score
import optuna

In [None]:
# =======================
# 1. 데이터 로드
# =======================
train = pd.read_csv("train.csv", sep=",")
test = pd.read_csv("test.csv", sep=",")

X = train.drop(columns=["ID", "target"])
y = train["target"]

X_test = test.drop(columns=["ID"])

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [None]:
from sklearn.metrics import f1_score
# =======================
# 2. Optuna 목적 함수 정의
# =======================
def DecisionTreeobjective(trial):
    max_depth = trial.suggest_int("max_depth", 2, 20)
    min_samples_split = trial.suggest_int("min_samples_split", 2, 20)
    model = DecisionTreeClassifier(max_depth=max_depth, min_samples_split=min_samples_split, random_state=4)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_valid)
    return f1_score(y_valid, y_pred, average="macro")

def RandomForestobjective(trial):
    n_estimators = trial.suggest_int("n_estimators", 50, 300)
    max_depth = trial.suggest_int("max_depth", 2, 20)
    model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_valid)
    return f1_score(y_valid, y_pred, average="macro")

def Logisticobjective(trial):
    C = trial.suggest_loguniform("C", 1e-3, 10)
    model = LogisticRegression(C=C, max_iter=1000, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_valid)
    return f1_score(y_valid, y_pred, average="macro")

In [None]:
# =======================
# 3. Optuna 실행
# =======================
print("Tuning Decision Tree...")
study_dt = optuna.create_study(direction="maximize")
study_dt.optimize(DecisionTreeobjective, n_trials=50)
print("Best params (DT):", study_dt.best_params)

print("Tuning Random Forest...")
study_rf = optuna.create_study(direction="maximize")
study_rf.optimize(RandomForestobjective, n_trials=50)
print("Best params (RF):", study_rf.best_params)

print("Tuning Logistic Regression...")
study_lr = optuna.create_study(direction="maximize")
study_lr.optimize(Logisticobjective, n_trials=50)
print("Best params (LR):", study_lr.best_params)

Tuning Decision Tree...


NameError: name 'optuna' is not defined

In [None]:
# =======================
# 4. 최적 모델 생성
# =======================
dt_best = DecisionTreeClassifier(**study_dt.best_params, random_state=42)
rf_best = RandomForestClassifier(**study_rf.best_params, random_state=42)
lr_best = LogisticRegression(**study_lr.best_params, max_iter=1000, random_state=42)

In [None]:
# =======================
# 5. 스태킹
# =======================
estimators = [
    ('dt', dt_best),
    ('rf', rf_best)
]

stack_model = StackingClassifier(
    estimators=estimators,
    final_estimator=lr_best,
    passthrough=True  # 전방모델 예측결과도 함께 사용
)

stack_model.fit(X_train, y_train)
y_pred = stack_model.predict(X_valid)

print("Accuracy:", accuracy_score(y_valid, y_pred))
print("F1 Score:", f1_score(y_valid, y_pred))


In [None]:
# =======================
# 6. 최종 예측
# =======================
X_test = test.drop(columns=["ID"])
test_pred = stack_model.predict(X_test)

submission = pd.DataFrame({
    "ID": test["ID"],
    "target": test_pred
})

submission.to_csv("submission.csv", index=False)
print("submission.csv 파일 생성 완료!")