In [None]:
import numpy as np
import pandas as pd
import random

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    log_loss,
    roc_auc_score
)

from gwo import GreyWolfOptimizer

In [None]:
trainValData = pd.read_csv("../dataset/train_val.data")
testData = pd.read_csv("../dataset/test.data")

X_trainVal = trainValData.drop(columns=["Diagnosis"])
y_trainVal = trainValData["Diagnosis"]

X_test = testData.drop(columns=["Diagnosis"])
y_test = testData["Diagnosis"]

X_train, X_val, y_train, y_val = train_test_split(
    X_trainVal,
    y_trainVal,
    test_size=0.2,
    random_state=1,
    stratify=y_trainVal
)

In [None]:
baselineModel = DecisionTreeClassifier(
    max_depth=5,
    min_samples_split=2,
    min_samples_leaf=1,
    random_state=1
)

baselineModel.fit(X_trainVal, y_trainVal)
y_pred_baseline = baselineModel.predict(X_test)

baselineResults = {
    "Accuracy": accuracy_score(y_test, y_pred_baseline),
    "Precision": precision_score(y_test, y_pred_baseline),
    "Recall": recall_score(y_test, y_pred_baseline),
    "F1": f1_score(y_test, y_pred_baseline)
}

baselineResults

In [None]:
def decisionTreeObjectiveFunction(position):
    max_depth = int(position[0])
    min_samples_split = int(position[1])
    min_samples_leaf = int(position[2])
    max_features = int(position[3])

    model = DecisionTreeClassifier(
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        max_features=max_features,
        random_state=1
    )

    model.fit(X_train, y_train)
    y_pred_proba = model.predict_proba(X_val)[:, 1]
    fitness = log_loss(y_val, y_pred_proba) + (1 - roc_auc_score(y_val, y_pred_proba)) * 0.1
    return fitness

In [None]:
gwo = GreyWolfOptimizer(
    objectiveFunction=decisionTreeObjectiveFunction,
    lowerBound=[2, 2, 1, 1],  # max_depth, min_samples_split, min_samples_leaf, max_features
    upperBound=[20, 10, 5, X_train.shape[1]],
    dimension=4
)

gwoResult = gwo.optimize()
bestParams = gwoResult["bestPosition"]

print("\n=== BEST PARAMETER FROM GWO ===")
print("max_depth        :", int(bestParams[0]))
print("min_samples_split:", int(bestParams[1]))
print("min_samples_leaf :", int(bestParams[2]))
print("max_features     :", int(bestParams[3]))

In [None]:
bestParamsDict = {
    "max_depth": int(bestParams[0]),
    "min_samples_split": int(bestParams[1]),
    "min_samples_leaf": int(bestParams[2]),
    "max_features": int(bestParams[3])
}

finalModel = DecisionTreeClassifier(
    max_depth=bestParamsDict["max_depth"],
    min_samples_split=bestParamsDict["min_samples_split"],
    min_samples_leaf=bestParamsDict["min_samples_leaf"],
    max_features=bestParamsDict["max_features"],
    random_state=1
)

finalModel.fit(X_trainVal, y_trainVal)
y_pred_gwo = finalModel.predict(X_test)

gwoResults = {
    "Accuracy": accuracy_score(y_test, y_pred_gwo),
    "Precision": precision_score(y_test, y_pred_gwo),
    "Recall": recall_score(y_test, y_pred_gwo),
    "F1": f1_score(y_test, y_pred_gwo)
}

gwoResults

In [None]:
comparison = pd.DataFrame(
    [baselineResults, gwoResults],
    index=["Baseline DT", "DT + AGWO"]
)

comparison