# Import Libraries

In [1]:
# libraries models
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
import lightgbm as lgb
from sklearn.ensemble import AdaBoostClassifier


# libraries feng and evaluation
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score, recall_score, precision_score, classification_report, ConfusionMatrixDisplay
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
import xgboost as xgb
import lightgbm as lgb

# Other libraries
import optuna
import json
import src.util as utils
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

  from .autonotebook import tqdm as notebook_tqdm


# Load Config and Data

In [2]:
config = utils.load_config()

In [3]:
def load_train_feng(params: dict) -> pd.DataFrame:
    # Load train set
    X_train = utils.pickle_load(params['train_processed_set_path'][0])
    y_train = utils.pickle_load(params['train_processed_set_path'][1])
    
    # Load train set transforming with log
    X_train_log = utils.pickle_load(params['train_processed_log_set_path'][0])
    y_train_log = utils.pickle_load(params['train_processed_log_set_path'][1])
    
    # Load train set SMOTE
    X_train_SMOTE = utils.pickle_load(params['train_processed_SMOTE_set_path'][0])
    y_train_SMOTE = utils.pickle_load(params['train_processed_SMOTE_set_path'][1])
    
    # Load train set wiht log and SMOTE
    X_train_log_SMOTE = utils.pickle_load(params['train_processed_log_SMOTE_set_path'][0])
    y_train_log_SMOTE = utils.pickle_load(params['train_processed_log_SMOTE_set_path'][1])

    return X_train, y_train, X_train_log, y_train_log, X_train_SMOTE, y_train_SMOTE, X_train_log_SMOTE, y_train_log_SMOTE

def load_valid(params: dict) -> pd.DataFrame:
    # Load valid set
    X_valid = utils.pickle_load(params['valid_processed_set_path'][0])
    y_valid = utils.pickle_load(params['valid_processed_set_path'][1])
    
    # Load valid set with transforming log
    X_valid_log = utils.pickle_load(params['valid_processed_log_set_path'][0])
    y_valid_log = utils.pickle_load(params['valid_processed_log_set_path'][1])

    return X_valid, y_valid, X_valid_log, y_valid_log

def load_test(params: dict) -> pd.DataFrame:
    # Load test set
    X_test = utils.pickle_load(params['test_processed_set_path'][0])
    y_test = utils.pickle_load(params['test_processed_set_path'][1])
    
    # Load test set with transforming log
    X_test_log = utils.pickle_load(params['test_processed_log_set_path'][0])
    y_test_log = utils.pickle_load(params['test_processed_log_set_path'][1])

    return X_test, y_test, X_test_log, y_test_log

In [4]:
# Load data train
X_train, y_train, X_train_log, y_train_log, X_train_SMOTE, y_train_SMOTE, X_train_log_SMOTE, y_train_log_SMOTE = load_train_feng(config)

# laod data valid
X_valid, y_valid, X_valid_log, y_valid_log = load_valid(config)

# Load data test
X_test, y_test, X_test_log, y_test_log = load_test(config)

In [5]:
# checpoint/sanity check
print('------------Set Train---------------')
print((X_train.shape, y_train.shape), '\n')
print((X_train_log.shape, y_train_log.shape), '\n')
print((X_train_SMOTE.shape, y_train_SMOTE.shape), '\n')
print((X_train_log_SMOTE.shape, y_train_log_SMOTE.shape), '\n')

print('------------Set Valid---------------')
print((X_valid.shape, y_valid.shape), '\n')
print((X_valid_log.shape, y_valid_log.shape), '\n')

print('------------Set Test---------------')
print((X_test.shape, y_test.shape), '\n')
print((X_test_log.shape, y_test_log.shape), '\n')

------------Set Train---------------
((700, 7), (700,)) 

((700, 7), (700,)) 

((1330, 7), (1330,)) 

((1330, 7), (1330,)) 

------------Set Valid---------------
((150, 7), (150,)) 

((150, 7), (150,)) 

------------Set Test---------------
((150, 7), (150,)) 

((150, 7), (150,)) 



For the context, the transformation log only in features (X variables). So, the naming of the log in target/label is for the sake of differentiation, the value doesn't change. Here's the proof.

In [6]:
y_train.value_counts()

label
0    665
1     35
Name: count, dtype: int64

In [7]:
y_train_log.value_counts()

label
0    665
1     35
Name: count, dtype: int64

In [8]:
y_train_log_SMOTE.value_counts()

label
0    665
1    665
Name: count, dtype: int64

# Compare Multiple Models

In [9]:
# intialize model
models = {
    'gbc': GradientBoostingClassifier(random_state=42),
    'lightgbm': lgb.LGBMClassifier(random_state=42, verbose=-1),
    'xgb': xgb.XGBClassifier(random_state=42), 
    'rf': RandomForestClassifier(random_state=42),
    'et': ExtraTreesClassifier(random_state=42),
    'dt': DecisionTreeClassifier(random_state=42),
    'knn': KNeighborsClassifier(),
    'ada': AdaBoostClassifier(random_state=42),
    'lr': LogisticRegression(random_state=42, solver='liblinear'), 
    'svm': SVC(random_state=42, probability=True),
}

# create function
def get_best_models_cv(X: pd.DataFrame, y: pd.Series, models: dict, sort_by: str, n_splits: int = 5):
    """
    Performs cross-validation for each model and returns a DataFrame of the results.

    Args:
        X (pd.DataFrame): The training features DataFrame.
        y (pd.Series): The training target Series.
        models (dict): A dictionary containing model names and model objects.
        sort_by (str): The metric to sort the results by.
        n_splits (int): The number of splits for StratifiedKFold.

    Returns:
        pd.DataFrame: A DataFrame containing the average metrics for each model, sorted.
    """
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    results = pd.DataFrame(columns=['Model', 'Accuracy', 'AUC', 'Recall', 'Prec.', 'F1'])
    
    for name, model in models.items():
        accuracy_scores = []
        auc_scores = []
        recall_scores = []
        precision_scores = []
        f1_scores = []
        
        for train_index, val_index in skf.split(X, y):
            X_train_fold, X_val_fold = X.iloc[train_index], X.iloc[val_index]
            y_train_fold, y_val_fold = y.iloc[train_index], y.iloc[val_index]
            
            model.fit(X_train_fold, y_train_fold)
            y_pred = model.predict(X_val_fold)
            
            # to handle cases where the model fails predict_proba
            if hasattr(model, 'predict_proba'):
                try: 
                    y_proba = model.predict_proba(X_val_fold)[:, 1]
                    auc_scores.append(roc_auc_score(y_val_fold, y_proba))
                except (ValueError, AttributeError):
                    auc_scores.append(0.0) 
            else:
                auc_scores.append(0.0)

            accuracy_scores.append(accuracy_score(y_val_fold, y_pred))
            recall_scores.append(recall_score(y_val_fold, y_pred))
            precision_scores.append(precision_score(y_val_fold, y_pred))
            f1_scores.append(f1_score(y_val_fold, y_pred))
        
        results.loc[len(results)] = [
            name,
            pd.Series(accuracy_scores).mean(),
            pd.Series(auc_scores).mean(),
            pd.Series(recall_scores).mean(),
            pd.Series(precision_scores).mean(),
            pd.Series(f1_scores).mean()
        ]
        
    return results.sort_values(by=sort_by, ascending=False).reset_index(drop=True)

## Data No Transforming

1. The data named into X_train and y_train
2. No Transfomring means the `amount` features is left to follow the original data
3. The unbalanced label is not handling 

In [10]:
# try experiment with data Not Transforming
experiment_in_data_not_transforming = get_best_models_cv(
    X_train, 
    y_train,
    models,
    sort_by='F1'
)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

In [11]:
experiment_in_data_not_transforming

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1
0,dt,0.908571,0.532331,0.114286,0.097222,0.105
1,et,0.901429,0.504619,0.057143,0.048571,0.052101
2,rf,0.917143,0.478733,0.028571,0.05,0.036364
3,xgb,0.932857,0.539527,0.028571,0.028571,0.028571
4,lightgbm,0.925714,0.602578,0.0,0.0,0.0
5,gbc,0.938571,0.574651,0.0,0.0,0.0
6,knn,0.95,0.537164,0.0,0.0,0.0
7,ada,0.95,0.52696,0.0,0.0,0.0
8,lr,0.95,0.479699,0.0,0.0,0.0
9,svm,0.95,0.435016,0.0,0.0,0.0


## Data SMOTE

In [12]:
# try experiment with data Not Transforming
experiment_in_data_smote = get_best_models_cv(
    X_train_SMOTE, 
    y_train_SMOTE,
    models,
    sort_by='Accuracy'
)

In [13]:
experiment_in_data_smote

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1
0,gbc,0.939098,0.976635,0.909774,0.967452,0.937254
1,xgb,0.93609,0.979637,0.92782,0.944148,0.935592
2,lightgbm,0.935338,0.977059,0.924812,0.945885,0.934827
3,rf,0.927068,0.977918,0.921805,0.932259,0.926731
4,et,0.927068,0.962112,0.921805,0.932467,0.926761
5,dt,0.912782,0.912782,0.912782,0.913442,0.912757
6,ada,0.893233,0.953372,0.821053,0.966667,0.885573
7,knn,0.727068,0.814342,0.881203,0.673666,0.763496
8,lr,0.567669,0.583538,0.631579,0.560404,0.593234
9,svm,0.556391,0.578439,0.639098,0.550693,0.581447


## Data Transforming Log

In [14]:
experiment_in_data_transforming = get_best_models_cv(
    X_train_log,
    y_train_log,
    models,
    sort_by='F1'
)

experiment_in_data_transforming

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1
0,dt,0.907143,0.518045,0.085714,0.079365,0.082143
1,et,0.898571,0.501396,0.057143,0.050794,0.053571
2,rf,0.91,0.496885,0.028571,0.04,0.033333
3,xgb,0.932857,0.539527,0.028571,0.028571,0.028571
4,lightgbm,0.925714,0.602578,0.0,0.0,0.0
5,gbc,0.938571,0.574221,0.0,0.0,0.0
6,knn,0.95,0.472718,0.0,0.0,0.0
7,ada,0.95,0.52696,0.0,0.0,0.0
8,lr,0.95,0.491085,0.0,0.0,0.0
9,svm,0.95,0.512997,0.0,0.0,0.0


## Data Trasnforming log and SMOTE

In [15]:
experiment_in_data_transforming_smote = get_best_models_cv(
    X_train_log_SMOTE,
    y_train_log_SMOTE,
    models,
    sort_by='Accuracy'
)

experiment_in_data_transforming_smote

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1
0,lightgbm,0.873684,0.952383,0.885714,0.864988,0.875193
1,xgb,0.87218,0.953474,0.867669,0.875613,0.871611
2,gbc,0.870677,0.953214,0.87218,0.86966,0.870688
3,rf,0.863158,0.942716,0.869173,0.858691,0.863812
4,et,0.861654,0.896314,0.866165,0.858274,0.862113
5,dt,0.847368,0.847368,0.855639,0.841654,0.848503
6,knn,0.83985,0.917542,0.897744,0.805072,0.848635
7,ada,0.743609,0.82778,0.831579,0.709621,0.764001
8,svm,0.655639,0.737928,0.806015,0.619621,0.700544
9,lr,0.591729,0.605619,0.742857,0.570968,0.64553


## Comparing The Result

1. For the experiment on the data without log transformation, the accuracy metrics appear to be good. However, accuracy is not a suitable metric for imbalanced data. A more appropriate metric, such as the F1-Score, shows very low values, only around 10%.

2. The same pattern is observed with the data where the amount feature was log-transformed. The accuracy is still high, but once again, the F1-score remains very poor. In fact, the F1-score after the log transformation is even worse compared to the data without the log transformation.

3. **In the SMOTE data experiment, the scores across all metrics seem good. However, it is important to note that these high scores are likely due to overfitting, as the evaluation was based on synthetic data created by the SMOTE algorithm.**

4. Regarding the SMOTE data, with or without log transformation, the performance is consistently better with the non-transformed data. The SMOTE-only data shows a slightly better performance compared to the SMOTE with log transformation data.

5. Therefore, further validation and evaluation will be conducted to test the model's performance on non-synthetic data (the original data that the model has not seen) to get a realistic assessment.

# Validation of The Result

Notes:
* The original data has only 3 features and 1 label. These three features are very general, describing only the merchant type, device type, and the total amount spent.
* Due to the limited data quality, the models may not be able to learn meaningful patterns effectively.
* It is highly probable that predictions are based on chance or a lack of data context. This increases the likelihood of the models overfitting and performing poorly on the validation and test sets.
* **The models we chose to use are based on the best performance, ranging from complex models like Gradient Boosting to a simpler model like Decision Tree.**
* Tree-based models are favored over linear or distance-based models due to their better empirical performance. This, however, does not change our belief that there is still a significant potential for overfitting because of the data's limited quality.

In [16]:
# define model (initliaze)
gbc_model = GradientBoostingClassifier(random_state=42)
xgb_model = xgb.XGBClassifier(random_state=42)
lgb_model = lgb.LGBMClassifier(random_state=42, verbose=-1)
dt_model = DecisionTreeClassifier(random_state=42)

In [17]:
def single_model_cv_report(model, X_train, y_train, n_splits=5):
    """
    Performs cross-validation on a single model and returns a DataFrame
    containing per-fold metrics and their average.
    """
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    results = pd.DataFrame(columns=['Fold', 'Accuracy', 'AUC', 'Recall', 'Prec.', 'F1'])
    
    accuracy_scores = []
    auc_scores = []
    recall_scores = []
    precision_scores = []
    f1_scores = []

    for fold, (train_index, val_index) in enumerate(skf.split(X_train, y_train)):
        X_train_fold, X_val_fold = X_train.iloc[train_index], X_train.iloc[val_index]
        y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]
        
        # Fit the model on each fold
        model.fit(X_train_fold, y_train_fold)
        
        # Predict on the validation fold
        y_pred = model.predict(X_val_fold)
        
        # Handle models without predict_proba if necessary, though it's assumed for AUC
        if hasattr(model, 'predict_proba'):
            try: 
                y_proba = model.predict_proba(X_val_fold)[:, 1]
                auc = roc_auc_score(y_val_fold, y_proba)
            except (ValueError, AttributeError):
                auc = 0.0
        else:
            auc = 0.0
        
        # Calculate metrics
        acc = accuracy_score(y_val_fold, y_pred)
        rec = recall_score(y_val_fold, y_pred, zero_division=0)
        prec = precision_score(y_val_fold, y_pred, zero_division=0)
        f1 = f1_score(y_val_fold, y_pred, zero_division=0)

        # Store metrics for calculating the average
        accuracy_scores.append(acc)
        auc_scores.append(auc)
        recall_scores.append(rec)
        precision_scores.append(prec)
        f1_scores.append(f1)
        
        # Store per-fold results
        results.loc[len(results)] = [f"Fold {fold}", acc, auc, rec, prec, f1]

    # Add the mean row
    results.loc[len(results)] = ['Mean', pd.Series(accuracy_scores).mean(), pd.Series(auc_scores).mean(), pd.Series(recall_scores).mean(), pd.Series(precision_scores).mean(), pd.Series(f1_scores).mean()]
    
    return results

## model gbc and xgb

In [18]:
# try to fitting in mutlitple fold
single_model_cv_report(gbc_model, X_train_SMOTE, y_train_SMOTE)

Unnamed: 0,Fold,Accuracy,AUC,Recall,Prec.,F1
0,Fold 0,0.93609,0.975748,0.894737,0.97541,0.933333
1,Fold 1,0.954887,0.992114,0.947368,0.961832,0.954545
2,Fold 2,0.932331,0.965487,0.887218,0.975207,0.929134
3,Fold 3,0.947368,0.983832,0.894737,1.0,0.944444
4,Fold 4,0.924812,0.965996,0.924812,0.924812,0.924812
5,Mean,0.939098,0.976635,0.909774,0.967452,0.937254


In [19]:
def get_final_metrics_df(model_name, y_true, y_pred, y_proba):
    """
     Calculates the evaluation metrics and returns them as a DataFrame.
    """
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, zero_division=0)
    recall = recall_score(y_true, y_pred, zero_division=0)
    f1 = f1_score(y_true, y_pred, zero_division=0)
    auc = roc_auc_score(y_true, y_proba)
    
    metrics = {
        'Model': [model_name],
        'Accuracy': [accuracy],
        'Recall': [recall],
        'Prec.': [precision],
        'F1': [f1],
        'AUC': [auc]
    }
    
    return pd.DataFrame(metrics)

In [20]:
# validate the result on data that the model has never seen before
# predict on set validation (X_valid)
fit_model_gbc = gbc_model.fit(X_train_SMOTE, y_train_SMOTE)

y_pred_gbc = fit_model_gbc.predict(X_valid)
y_proba_gbc = fit_model_gbc.predict_proba(X_valid)[:, 1] 

gbc_metrics_eval = get_final_metrics_df(
    'gradient boosting classifier', 
    y_valid,   
    y_pred_gbc,      
    y_proba_gbc       
)

gbc_metrics_eval

Unnamed: 0,Model,Accuracy,Recall,Prec.,F1,AUC
0,gradient boosting classifier,0.913333,0.0,0.0,0.0,0.371039


Based on these results, the model's accuracy appears to be quite good. However, other metrics like Recall, Precision, and F1-score show very poor performance in detecting the minority class (fraud). A score of 0.0 on these metrics indicates that the model is unable to recognize patterns to predict y = 1.

The reasons:
* The limited features available were not strong enough to find patterns that distinguish fraud.
* Model Overfitting on Synthetic Data. Trained with SMOTE data, the model failed to generalize patterns from the synthetic data to the original validation data.
* The model is overly biased towards the majority class due to not getting enough relevant information from the minority class, causing it to consistently predict y = 0.

**The model is still overfitting** because it is only able to achieve a high score on this specific metric and is only good on data training. Meanwhile, a high recall is needed to show how well the model can reduce false negatives (detect all actual fraud cases), while a high precision is needed to show how accurate the model's fraud predictions are.

In [21]:
# try to fitting in mutlitple fold (xgb)
single_model_cv_report(xgb_model, X_train_SMOTE, y_train_SMOTE)

Unnamed: 0,Fold,Accuracy,AUC,Recall,Prec.,F1
0,Fold 0,0.93985,0.982475,0.932331,0.946565,0.939394
1,Fold 1,0.93985,0.99005,0.954887,0.927007,0.940741
2,Fold 2,0.928571,0.974334,0.894737,0.959677,0.92607
3,Fold 3,0.954887,0.985075,0.932331,0.976378,0.953846
4,Fold 4,0.917293,0.96625,0.924812,0.911111,0.91791
5,Mean,0.93609,0.979637,0.92782,0.944148,0.935592


In [22]:
# validate the result on data that the model has never seen before
# predict on set validation (X_valid)
fit_model_xgb = xgb_model.fit(X_train_SMOTE, y_train_SMOTE)

y_pred_xgb = fit_model_xgb.predict(X_valid)
y_proba_xgb = fit_model_xgb.predict_proba(X_valid)[:, 1] 

xgb_metrics_eval = get_final_metrics_df(
    'XGBoost Classifier', 
    y_valid,   
    y_pred_xgb,      
    y_proba_xgb       
)

xgb_metrics_eval

Unnamed: 0,Model,Accuracy,Recall,Prec.,F1,AUC
0,XGBoost Classifier,0.913333,0.0,0.0,0.0,0.383803


The results will be more or less the same because both XGBoost and GradientBoost are ensemble tree techniques, with differences mainly in parameter naming and other implementation details. XGBoost is a Gradient Boosting implementation from the XGBoost library, while GradientBoost is the implementation from the scikit-learn library. The purpose of using both libraries is to find the best candidate for further tuning.

**Therefore, the main problem remains the data, not the algorithm. Garbage in garbage out**

## Model lgb & dt

In [23]:
# try to fitting in mutlitple fold
single_model_cv_report(lgb_model, X_train_SMOTE, y_train_SMOTE)

Unnamed: 0,Fold,Accuracy,AUC,Recall,Prec.,F1
0,Fold 0,0.93985,0.980044,0.909774,0.968,0.937984
1,Fold 1,0.93985,0.98892,0.954887,0.927007,0.940741
2,Fold 2,0.93985,0.964356,0.924812,0.953488,0.938931
3,Fold 3,0.951128,0.98793,0.917293,0.983871,0.949416
4,Fold 4,0.906015,0.964045,0.917293,0.897059,0.907063
5,Mean,0.935338,0.977059,0.924812,0.945885,0.934827


In [24]:
# validate the result on data that the model has never seen before
# predict on set validation (X_valid)
fit_model_lgb = lgb_model.fit(X_train_SMOTE, y_train_SMOTE)

y_pred_lgb = fit_model_lgb.predict(X_valid)
y_proba_lgb = fit_model_lgb.predict_proba(X_valid)[:, 1] 

lgb_metrics_eval = get_final_metrics_df(
    'light gradient boosting', 
    y_valid,   
    y_pred_lgb,      
    y_proba_lgb       
)

lgb_metrics_eval

Unnamed: 0,Model,Accuracy,Recall,Prec.,F1,AUC
0,light gradient boosting,0.886667,0.0,0.0,0.0,0.360035


In [25]:
# try to fitting in mutlitple fold (decision tree)
single_model_cv_report(dt_model, X_train_SMOTE, y_train_SMOTE)

Unnamed: 0,Fold,Accuracy,AUC,Recall,Prec.,F1
0,Fold 0,0.902256,0.902256,0.909774,0.896296,0.902985
1,Fold 1,0.921053,0.921053,0.932331,0.911765,0.921933
2,Fold 2,0.917293,0.917293,0.879699,0.95122,0.914062
3,Fold 3,0.924812,0.924812,0.93985,0.912409,0.925926
4,Fold 4,0.898496,0.898496,0.902256,0.895522,0.898876
5,Mean,0.912782,0.912782,0.912782,0.913442,0.912757


In [26]:
# validate the result on data that the model has never seen before
# predict on set validation (X_valid)
fit_model_dt = dt_model.fit(X_train_SMOTE, y_train_SMOTE)

y_pred_dt = fit_model_dt.predict(X_valid)
y_proba_dt = fit_model_dt.predict_proba(X_valid)[:, 1] 

dt_metrics_eval = get_final_metrics_df(
    'decision tree', 
    y_valid,   
    y_pred_dt,      
    y_proba_dt       
)

dt_metrics_eval

Unnamed: 0,Model,Accuracy,Recall,Prec.,F1,AUC
0,decision tree,0.88,0.0,0.0,0.0,0.464789


Based on these results, the Light Gradient Boosting and Decision Tree models yielded lower and not better accuracy scores compared to Gradient Boosting. **All models are overfitting**, and the next step is to attempt tuning. However, it is important to realize that tuning may not lead to significant changes, perhaps only a 1-5% improvement, because the main problem lies in the unrepresentative features that prevent the models from learning meaningful patterns. In some cases, a performance increase might not occur at all if the untuned model has already reached its "best" or peak performance.

# Tuning

The model that will be tuned is the best-performing one, which is `gradient boosting classifier` dari XGBoost.

We'll using optuna to tune the model, ref:
1. https://xgboosting.com/xgboost-hyperparameter-optimization-with-optuna/
2. https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html

## First Tuned

In [27]:
def objective(trial):
    # define hyperparameter
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'learning_rate': trial.suggest_float('learning_rate', 1e-4, 0.1, log=True),
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0.0, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 100.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 100.0, log=True),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'random_state': 42,
    }
    
    # create and train XGBoost Classifier
    xgb_classifier = xgb.XGBClassifier(**params)
    xgb_classifier.fit(X_train_SMOTE, y_train_SMOTE)
    
    # Evaluate model on validation set
    y_pred = xgb_classifier.predict(X_valid)
    f1 = f1_score(y_valid, y_pred, zero_division=0)
    
    return f1


# Optimize parameters using Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

# Train final model with best hyperparameters
best_params = study.best_params
best_model = xgb.XGBClassifier(**best_params)
best_model.fit(X_train_SMOTE, y_train_SMOTE)

# Evaluate best model on test set
y_pred_test = best_model.predict(X_valid)
test_f1_score = f1_score(y_valid, y_pred_test, zero_division=0)

print(f"Best test F1-score: {test_f1_score:.4f}")
print(f"Best parameters: {best_params}")

[I 2025-07-29 20:37:14,239] A new study created in memory with name: no-name-ef66d854-ae70-4e30-b2c0-1f1ee5e09c13
[I 2025-07-29 20:37:14,352] Trial 0 finished with value: 0.0 and parameters: {'n_estimators': 228, 'learning_rate': 0.06390424682492317, 'max_depth': 2, 'subsample': 0.7091112074112309, 'colsample_bytree': 0.5717205606987503, 'gamma': 0.8450855304164736, 'reg_alpha': 1.6969912354410773e-05, 'reg_lambda': 0.021934384461797043, 'min_child_weight': 5}. Best is trial 0 with value: 0.0.
[I 2025-07-29 20:37:14,622] Trial 1 finished with value: 0.0 and parameters: {'n_estimators': 383, 'learning_rate': 0.006692039075731563, 'max_depth': 7, 'subsample': 0.8335984035520136, 'colsample_bytree': 0.7375767995756026, 'gamma': 0.7118507795136032, 'reg_alpha': 0.0002558921187864774, 'reg_lambda': 0.34562781151435096, 'min_child_weight': 9}. Best is trial 0 with value: 0.0.
[I 2025-07-29 20:37:14,767] Trial 2 finished with value: 0.0 and parameters: {'n_estimators': 240, 'learning_rate': 0

Best test F1-score: 0.0980
Best parameters: {'n_estimators': 332, 'learning_rate': 0.0009466622773768325, 'max_depth': 1, 'subsample': 0.9991616634431045, 'colsample_bytree': 0.9390961259791742, 'gamma': 0.38309728009590377, 'reg_alpha': 0.00014433845779669914, 'reg_lambda': 8.475790046311447e-05, 'min_child_weight': 8}


### Evaluation on X_test

In [31]:
# fit with best_model
model_xgb_tuned = best_model.fit(X_train_SMOTE, y_train_SMOTE)

# evaluation on set testing
y_pred_xgb_tuned = model_xgb_tuned.predict(X_test)
y_proba_xgb_tuned = model_xgb_tuned.predict_proba(X_test)[:, 1]

xgb_tuned_metrics_eval = get_final_metrics_df(
    'XGBoost Tuned',
    y_test,
    y_pred_xgb_tuned,
    y_proba_xgb_tuned
)

xgb_tuned_metrics_eval

Unnamed: 0,Model,Accuracy,Recall,Prec.,F1,AUC
0,XGBoost Tuned,0.46,0.714286,0.059524,0.10989,0.547952


Successfully increased the F1-score by around 10% and Recall to 71%. However, there is a trade-off in reduced accuracy, so tuning with multiple metrics will be tested.

In [28]:
# # create dictionary of result first tuned
# results = {
# "best_f1_score": test_f1_score,
# "best_parameters": best_params
# }

# # name file
# output_filepath = 'first_tuned_params.json'

# # dump into file
# with open(output_filepath, 'w') as json_file:
#     json.dump(results, json_file, indent=4)

## Second Tuned

1. Melibatkan data SMOTE dan log SMOTE pada eksperimen
2. Coba optimize dua metrik akurasi dan F1 score
3. Coba menggunakan NSGAIISampler untuk multiv-objective https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.NSGAIISampler.html

### SMOTE

In [45]:
def objective_multi_1(trial):
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'learning_rate': trial.suggest_float('learning_rate', 1e-4, 0.2, log=True),
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0.0, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 100.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 100.0, log=True),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'random_state': 42,
    }
    
    xgb_classifier = xgb.XGBClassifier(**params)
    xgb_classifier.fit(X_train_SMOTE, y_train_SMOTE)
    
    y_pred = xgb_classifier.predict(X_valid)
    f1 = f1_score(y_valid, y_pred, zero_division=0)
    accuracy = accuracy_score(y_valid, y_pred)
    
    return f1, accuracy

# maximize two metrics
study_multi_objective_1 = optuna.create_study(directions=['maximize', 'maximize'])
study_multi_objective_1.optimize(objective_multi_1, n_trials=200)

# save the experiment
pareto_multiple_solutions_1 = study_multi_objective_1.best_trials

print("\n--- Multi-objective Tuning ---")
print("Pareto Optimal Solutions:")
for trial in pareto_multiple_solutions_1:
    print(f"  Trial {trial.number}: F1={trial.values[0]:.4f}, Accuracy={trial.values[1]:.4f}, Params={trial.params}")

[I 2025-07-29 23:44:30,239] A new study created in memory with name: no-name-89f2eefc-af30-405e-9026-d7ec51653321
[I 2025-07-29 23:44:30,579] Trial 0 finished with values: [0.0, 0.84] and parameters: {'n_estimators': 119, 'learning_rate': 0.0004877909682366891, 'max_depth': 3, 'subsample': 0.6436401346368643, 'colsample_bytree': 0.8918469470410036, 'gamma': 0.746791388577106, 'reg_alpha': 0.0042937029568614745, 'reg_lambda': 0.11151457039657507, 'min_child_weight': 8}.
[I 2025-07-29 23:44:30,844] Trial 1 finished with values: [0.0, 0.9266666666666666] and parameters: {'n_estimators': 319, 'learning_rate': 0.0014110783282131334, 'max_depth': 5, 'subsample': 0.9639453145606405, 'colsample_bytree': 0.9879108307488977, 'gamma': 0.4597661046132322, 'reg_alpha': 0.0007908146882983003, 'reg_lambda': 0.004710883759364585, 'min_child_weight': 2}.
[I 2025-07-29 23:44:30,991] Trial 2 finished with values: [0.0, 0.94] and parameters: {'n_estimators': 153, 'learning_rate': 0.00010256551164479332, '


--- Multi-objective Tuning ---
Pareto Optimal Solutions:
  Trial 5: F1=0.0000, Accuracy=0.9467, Params={'n_estimators': 226, 'learning_rate': 0.00011567135309269058, 'max_depth': 4, 'subsample': 0.8848346245081853, 'colsample_bytree': 0.9688626485080318, 'gamma': 0.6785192122664382, 'reg_alpha': 3.57617539695936, 'reg_lambda': 2.4068530286401994e-06, 'min_child_weight': 5}
  Trial 7: F1=0.0000, Accuracy=0.9467, Params={'n_estimators': 451, 'learning_rate': 0.0385021188440168, 'max_depth': 4, 'subsample': 0.6119177816563899, 'colsample_bytree': 0.8743965869670625, 'gamma': 0.5096020934352232, 'reg_alpha': 48.86457499955536, 'reg_lambda': 0.013893210721923942, 'min_child_weight': 1}
  Trial 8: F1=0.0000, Accuracy=0.9467, Params={'n_estimators': 277, 'learning_rate': 0.009220141467878409, 'max_depth': 2, 'subsample': 0.8604652026736055, 'colsample_bytree': 0.608075970495102, 'gamma': 0.8239228211964786, 'reg_alpha': 4.2426856357665353e-08, 'reg_lambda': 89.47398746098166, 'min_child_weig

Hasil trial ke 72 paling "seimbang" dengan akurasi yang sedikit lebih baik namun F1 score juga masih ada peningkatan dari 0 ke 3%. Sementara akurasi lebih baik

```teks
Trial 72: F1=0.0488, Accuracy=0.7400, Params={'n_estimators': 366, 'learning_rate': 0.0385021188440168, 'max_depth': 1, 'subsample': 0.6119177816563899, 'colsample_bytree': 0.787692050075387, 'gamma': 0.8198001646999291, 'reg_alpha': 42.04445033447535, 'reg_lambda': 0.013893210721923942, 'min_child_weight': 7}
```

In [49]:
# check params in trial 72
if 72 < len(study_multi_objective_1.trials):
    selected_trial_72 = study_multi_objective_1.trials[72]
    
    # store best params
    best_params_trial72 = selected_trial_72.params
    best_f1_trial72 = selected_trial_72.values[0]
    best_accuracy_trial72 = selected_trial_72.values[1]
    
    # print the result
    print(f"Try to use params from Trial {selected_trial_72.number}")
    print(f'F1: {best_f1_trial72}, Accuracy: {best_accuracy_trial72}')
    print(f"Params: {best_params_trial72}")

Try to use params from Trial 72
F1: 0.04878048780487805, Accuracy: 0.74
Params: {'n_estimators': 366, 'learning_rate': 0.0385021188440168, 'max_depth': 1, 'subsample': 0.6119177816563899, 'colsample_bytree': 0.787692050075387, 'gamma': 0.8198001646999291, 'reg_alpha': 42.04445033447535, 'reg_lambda': 0.013893210721923942, 'min_child_weight': 7}


In [54]:
# evaluation on test set
model_tuned_params72 = xgb.XGBClassifier(**best_params_trial72)
model_tuned_params72.fit(X_train_SMOTE, y_train_SMOTE)

# predict on set test
y_pred_test_params72 = model_tuned_params72.predict(X_test)
y_proba_test_params72 = model_tuned_params72.predict_proba(X_test)[:, 1]

# get the mterics from tuned params trial on no.72
metrics_params72_test = get_final_metrics_df(
    f'XGBoost Tuned (Params Trial 72)',
    y_test,
    y_pred_test_params72,
    y_proba_test_params72)

# print the result
metrics_params72_test

Unnamed: 0,Model,Accuracy,Recall,Prec.,F1,AUC
0,XGBoost Tuned (Params Trial 72),0.873333,0.0,0.0,0.0,0.624875


Hasilnya masih overfitting juga

## SMOTE and Log Set

In [55]:
def objective_multi_2(trial):
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'learning_rate': trial.suggest_float('learning_rate', 1e-4, 0.2, log=True),
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0.0, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 100.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 100.0, log=True),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'random_state': 42,
    }
    
    xgb_classifier = xgb.XGBClassifier(**params)
    xgb_classifier.fit(X_train_log_SMOTE, y_train_log_SMOTE)
    
    y_pred = xgb_classifier.predict(X_valid_log)
    f1 = f1_score(y_valid_log, y_pred, zero_division=0)
    accuracy = accuracy_score(y_valid_log, y_pred)
    
    return f1, accuracy

# maximize two metrics
study_multi_objective_2 = optuna.create_study(directions=['maximize', 'maximize'])
study_multi_objective_2.optimize(objective_multi_2, n_trials=200)

# save the experiment
pareto_multiple_solutions_2 = study_multi_objective_2.best_trials

print("\n--- Multi-objective Tuning 2 ---")
print("Pareto Optimal Solutions 2:")
for trial in pareto_multiple_solutions_2:
    print(f"  Trial {trial.number}: F1={trial.values[0]:.4f}, Accuracy={trial.values[1]:.4f}, Params={trial.params}")

[I 2025-07-30 00:10:30,242] A new study created in memory with name: no-name-50c79928-9a70-45d7-ac7b-b38ebb73f7e5
[I 2025-07-30 00:10:31,056] Trial 0 finished with values: [0.0, 0.7066666666666667] and parameters: {'n_estimators': 403, 'learning_rate': 0.00011213926235389065, 'max_depth': 8, 'subsample': 0.6345241504934453, 'colsample_bytree': 0.8332782930294047, 'gamma': 0.8353396504505979, 'reg_alpha': 4.254544562255791, 'reg_lambda': 0.034593589590182064, 'min_child_weight': 7}.
[I 2025-07-30 00:10:31,483] Trial 1 finished with values: [0.0, 0.8266666666666667] and parameters: {'n_estimators': 434, 'learning_rate': 0.01068187837975927, 'max_depth': 7, 'subsample': 0.819886656533354, 'colsample_bytree': 0.5267545159673019, 'gamma': 0.21666623407686347, 'reg_alpha': 0.941895609062445, 'reg_lambda': 3.533954102467036, 'min_child_weight': 3}.
[I 2025-07-30 00:10:31,624] Trial 2 finished with values: [0.0, 0.68] and parameters: {'n_estimators': 147, 'learning_rate': 0.0001063354444850028


--- Multi-objective Tuning 2 ---
Pareto Optimal Solutions 2:
  Trial 10: F1=0.0952, Accuracy=0.4933, Params={'n_estimators': 245, 'learning_rate': 0.011365503504037245, 'max_depth': 1, 'subsample': 0.5783254900992628, 'colsample_bytree': 0.5231123578421033, 'gamma': 0.23092889605263178, 'reg_alpha': 2.707802830479951e-05, 'reg_lambda': 9.743593310683012e-07, 'min_child_weight': 3}
  Trial 23: F1=0.0000, Accuracy=0.9467, Params={'n_estimators': 80, 'learning_rate': 0.0003347971683690768, 'max_depth': 1, 'subsample': 0.8402709393448053, 'colsample_bytree': 0.8426535259399206, 'gamma': 0.21797515093478725, 'reg_alpha': 60.026032151613684, 'reg_lambda': 4.121514835856523, 'min_child_weight': 10}
  Trial 46: F1=0.0000, Accuracy=0.9467, Params={'n_estimators': 311, 'learning_rate': 0.0011541012965980415, 'max_depth': 5, 'subsample': 0.8914061678288178, 'colsample_bytree': 0.5137744012553398, 'gamma': 0.08601685236947276, 'reg_alpha': 67.76163000744212, 'reg_lambda': 1.8823302942641073e-08, 

In [56]:
# check params in trial 186
if 186 < len(study_multi_objective_2.trials):
    selected_trial_186 = study_multi_objective_2.trials[186]
    
    # store best params
    best_params_trial186 = selected_trial_186.params
    best_f1_trial186 = selected_trial_186.values[0]
    best_accuracy_trial186 = selected_trial_186.values[1]
    
    # print the result
    print(f"Try to use params from Trial {selected_trial_186.number}")
    print(f'F1: {best_f1_trial186}, Accuracy: {best_accuracy_trial186}')
    print(f"Params: {best_params_trial186}")

Try to use params from Trial 186
F1: 0.044444444444444446, Accuracy: 0.7133333333333334
Params: {'n_estimators': 373, 'learning_rate': 0.08899399543870935, 'max_depth': 1, 'subsample': 0.8994777486990299, 'colsample_bytree': 0.5291020078785107, 'gamma': 0.37509399734917503, 'reg_alpha': 0.001802473005290577, 'reg_lambda': 3.0342199997285854e-08, 'min_child_weight': 3}


In [57]:
# evaluation on test set
model_tuned_params186 = xgb.XGBClassifier(**best_params_trial186)
model_tuned_params186.fit(X_train_log_SMOTE, y_train_log_SMOTE)

# predict on set test
y_pred_test_params186 = model_tuned_params186.predict(X_test_log)
y_proba_test_params186 = model_tuned_params186.predict_proba(X_test_log)[:, 1]

# get the mterics from tuned params trial on no.186
metrics_params186_test = get_final_metrics_df(
    f'XGBoost Tuned (Params Trial 186)',
    y_test_log,
    y_pred_test_params186,
    y_proba_test_params186)

# print the result
metrics_params186_test

Unnamed: 0,Model,Accuracy,Recall,Prec.,F1,AUC
0,XGBoost Tuned (Params Trial 186),0.793333,0.142857,0.038462,0.060606,0.6004


## SMOTE (NSGAIISampler)

In [58]:
from optuna.samplers import NSGAIISampler

def objective_multi_3(trial):
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'learning_rate': trial.suggest_float('learning_rate', 1e-4, 0.2, log=True),
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0.0, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 100.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 100.0, log=True),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'random_state': 42,
    }
    
    xgb_classifier = xgb.XGBClassifier(**params)
    xgb_classifier.fit(X_train_SMOTE, y_train_SMOTE)
    
    y_pred = xgb_classifier.predict(X_valid)
    f1 = f1_score(y_valid, y_pred, zero_division=0)
    accuracy = accuracy_score(y_valid, y_pred)
    
    return f1, accuracy

study_multi_objective_nsgaii_1 = optuna.create_study(
    directions=['maximize', 'maximize'],
    sampler=NSGAIISampler(seed=42)
)
study_multi_objective_nsgaii_1.optimize(objective_multi_3, n_trials=200)

pareto_multiple_solutions_nsgaii_1 = study_multi_objective_nsgaii_1.best_trials

# get the result
print("\n--- Multi-objective Tuning with NSGAII (1) ---")
print("Pareto Optimal Solutions NSGAII 1:")
for trial in pareto_multiple_solutions_nsgaii_1:
    print(f"  Trial {trial.number}: F1={trial.values[0]:.4f}, Accuracy={trial.values[1]:.4f}, Params={trial.params}")

[I 2025-07-30 00:26:05,984] A new study created in memory with name: no-name-63a08f2d-ee52-4bbe-b5ee-147e23980077
[I 2025-07-30 00:26:06,526] Trial 0 finished with values: [0.0, 0.92] and parameters: {'n_estimators': 218, 'learning_rate': 0.13751068413926434, 'max_depth': 8, 'subsample': 0.7993292420985183, 'colsample_bytree': 0.5780093202212182, 'gamma': 0.15599452033620265, 'reg_alpha': 3.809220577048033e-08, 'reg_lambda': 4.589458612326471, 'min_child_weight': 7}.
[I 2025-07-30 00:26:07,080] Trial 1 finished with values: [0.0, 0.9333333333333333] and parameters: {'n_estimators': 369, 'learning_rate': 0.00011693648443440106, 'max_depth': 10, 'subsample': 0.9162213204002109, 'colsample_bytree': 0.6061695553391381, 'gamma': 0.18182496720710062, 'reg_alpha': 6.8240955406304e-07, 'reg_lambda': 1.1026112761509996e-05, 'min_child_weight': 6}.
[I 2025-07-30 00:26:07,286] Trial 2 finished with values: [0.0, 0.9266666666666666] and parameters: {'n_estimators': 244, 'learning_rate': 0.00091486


--- Multi-objective Tuning with NSGAII (1) ---
Pareto Optimal Solutions NSGAII 1:
  Trial 4: F1=0.0000, Accuracy=0.9467, Params={'n_estimators': 187, 'learning_rate': 0.00021009638415064324, 'max_depth': 7, 'subsample': 0.7200762468698007, 'colsample_bytree': 0.5610191174223894, 'gamma': 0.4951769101112702, 'reg_alpha': 2.2074212100007785e-08, 'reg_lambda': 12.39378679190814, 'min_child_weight': 3}
  Trial 7: F1=0.0000, Accuracy=0.9467, Params={'n_estimators': 210, 'learning_rate': 0.0008460056107201562, 'max_depth': 6, 'subsample': 0.5704621124873813, 'colsample_bytree': 0.9010984903770198, 'gamma': 0.07455064367977082, 'reg_alpha': 73.9382838287635, 'reg_lambda': 0.5277736371601186, 'min_child_weight': 2}
  Trial 14: F1=0.0000, Accuracy=0.9467, Params={'n_estimators': 418, 'learning_rate': 0.06939009363289807, 'max_depth': 1, 'subsample': 0.7553736512887829, 'colsample_bytree': 0.7087055015743895, 'gamma': 0.22210781047073025, 'reg_alpha': 1.579987581019726e-07, 'reg_lambda': 2.3776

In [59]:
# check params in trial 33 == 121
# choose the closest one
if 33 < len(study_multi_objective_nsgaii_1.trials):
    selected_trial_33 = study_multi_objective_nsgaii_1.trials[33]
    
    # store best params
    best_params_trial33 = selected_trial_33.params
    best_f1_trial33 = selected_trial_33.values[0]
    best_accuracy_trial33 = selected_trial_33.values[1]
    
    # print the result
    print(f"Try to use params from NGSAII Trial {selected_trial_33.number}")
    print(f'F1: {best_f1_trial33}, Accuracy: {best_accuracy_trial33}')
    print(f"Params: {best_params_trial33}")

Try to use params from NGSAII Trial 33
F1: 0.03636363636363636, Accuracy: 0.6466666666666666
Params: {'n_estimators': 147, 'learning_rate': 0.011380992697772686, 'max_depth': 1, 'subsample': 0.5258408605843039, 'colsample_bytree': 0.7656773157840739, 'gamma': 0.5406351216101065, 'reg_alpha': 0.023675492117025566, 'reg_lambda': 0.18235317831429848, 'min_child_weight': 10}


In [60]:
# evaluation on test set
model_tuned_ngsaii_params33 = xgb.XGBClassifier(**best_params_trial33)
model_tuned_ngsaii_params33.fit(X_train_SMOTE, y_train_SMOTE)

# predict on set test
y_pred_test_params33 = model_tuned_ngsaii_params33.predict(X_test)
y_proba_test_params33 = model_tuned_ngsaii_params33.predict_proba(X_test)[:, 1]

# get the mterics from tuned params trial on no.186
metrics_params33_test = get_final_metrics_df(
    f'XGBoost Tuned NGSAII (Params Trial 33)',
    y_test,
    y_pred_test_params33,
    y_proba_test_params33)

# print the result
metrics_params33_test

Unnamed: 0,Model,Accuracy,Recall,Prec.,F1,AUC
0,XGBoost Tuned NGSAII (Params Trial 33),0.793333,0.0,0.0,0.0,0.529471


## SMOTE and Log Set (NSGAII)

In [61]:
# try in set log and SMOTE
def objective_multi_4(trial):
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'learning_rate': trial.suggest_float('learning_rate', 1e-4, 0.2, log=True),
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0.0, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 100.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 100.0, log=True),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'random_state': 42,
    }
    
    xgb_classifier = xgb.XGBClassifier(**params)
    xgb_classifier.fit(X_train_log_SMOTE, y_train_log_SMOTE)
    
    y_pred = xgb_classifier.predict(X_valid_log)
    f1 = f1_score(y_valid_log, y_pred, zero_division=0)
    accuracy = accuracy_score(y_valid_log, y_pred)
    
    return f1, accuracy

study_multi_objective_nsgaii_2 = optuna.create_study(
    directions=['maximize', 'maximize'],
    sampler=NSGAIISampler(seed=42)
)
study_multi_objective_nsgaii_2.optimize(objective_multi_4, n_trials=200)

pareto_multiple_solutions_nsgaii_2 = study_multi_objective_nsgaii_2.best_trials

# get the result
print("\n--- Multi-objective Tuning (Log and SMOTE) with NSGAII 2---")
print("Pareto Optimal Solutions 2 :")
for trial in pareto_multiple_solutions_nsgaii_2:
    print(f"  Trial {trial.number}: F1={trial.values[0]:.4f}, Accuracy={trial.values[1]:.4f}, Params={trial.params}")

[I 2025-07-30 00:38:09,690] A new study created in memory with name: no-name-86cdfed8-3ab8-4028-9ea7-be60e8709634
[I 2025-07-30 00:38:10,173] Trial 0 finished with values: [0.0, 0.84] and parameters: {'n_estimators': 218, 'learning_rate': 0.13751068413926434, 'max_depth': 8, 'subsample': 0.7993292420985183, 'colsample_bytree': 0.5780093202212182, 'gamma': 0.15599452033620265, 'reg_alpha': 3.809220577048033e-08, 'reg_lambda': 4.589458612326471, 'min_child_weight': 7}.
[I 2025-07-30 00:38:10,527] Trial 1 finished with values: [0.0, 0.8533333333333334] and parameters: {'n_estimators': 369, 'learning_rate': 0.00011693648443440106, 'max_depth': 10, 'subsample': 0.9162213204002109, 'colsample_bytree': 0.6061695553391381, 'gamma': 0.18182496720710062, 'reg_alpha': 6.8240955406304e-07, 'reg_lambda': 1.1026112761509996e-05, 'min_child_weight': 6}.
[I 2025-07-30 00:38:10,760] Trial 2 finished with values: [0.0, 0.7866666666666666] and parameters: {'n_estimators': 244, 'learning_rate': 0.00091486


--- Multi-objective Tuning (Log and SMOTE) with NSGAII 2---
Pareto Optimal Solutions 2 :
  Trial 7: F1=0.0000, Accuracy=0.9467, Params={'n_estimators': 210, 'learning_rate': 0.0008460056107201562, 'max_depth': 6, 'subsample': 0.5704621124873813, 'colsample_bytree': 0.9010984903770198, 'gamma': 0.07455064367977082, 'reg_alpha': 73.9382838287635, 'reg_lambda': 0.5277736371601186, 'min_child_weight': 2}
  Trial 104: F1=0.0000, Accuracy=0.9467, Params={'n_estimators': 210, 'learning_rate': 0.18009897850557288, 'max_depth': 4, 'subsample': 0.8490808570098726, 'colsample_bytree': 0.9010984903770198, 'gamma': 0.696029796674973, 'reg_alpha': 73.9382838287635, 'reg_lambda': 0.5277736371601186, 'min_child_weight': 7}
  Trial 153: F1=0.0000, Accuracy=0.9467, Params={'n_estimators': 418, 'learning_rate': 0.0008460056107201562, 'max_depth': 1, 'subsample': 0.5704621124873813, 'colsample_bytree': 0.9010984903770198, 'gamma': 0.07455064367977082, 'reg_alpha': 73.9382838287635, 'reg_lambda': 0.527773

In [62]:
# check params in trial 178
if 178 < len(study_multi_objective_nsgaii_2.trials):
    selected_trial_178 = study_multi_objective_nsgaii_2.trials[178]
    
    # store best params
    best_params_trial178 = selected_trial_178.params
    best_f1_trial178 = selected_trial_178.values[0]
    best_accuracy_trial178 = selected_trial_178.values[1]
    
    # print the result
    print(f"Try to use params from NGSAII 2 Trial {selected_trial_178.number}")
    print(f'F1: {best_f1_trial178}, Accuracy: {best_accuracy_trial178}')
    print(f"Params: {best_params_trial178}")

Try to use params from NGSAII 2 Trial 178
F1: 0.05405405405405406, Accuracy: 0.7666666666666667
Params: {'n_estimators': 438, 'learning_rate': 0.1362686835004624, 'max_depth': 1, 'subsample': 0.6236740508715988, 'colsample_bytree': 0.5363815031820968, 'gamma': 0.8218600592903562, 'reg_alpha': 1.864683494051024e-07, 'reg_lambda': 0.001668908784195831, 'min_child_weight': 1}


In [63]:
# evaluation on test set
model_tuned_ngsaii_params178 = xgb.XGBClassifier(**best_params_trial178)
model_tuned_ngsaii_params178.fit(X_train_log_SMOTE, y_train_log_SMOTE)

# predict on set test
y_pred_test_params178 = model_tuned_ngsaii_params178.predict(X_test_log)
y_proba_test_params178 = model_tuned_ngsaii_params178.predict_proba(X_test_log)[:, 1]

# get the mterics from tuned params trial on no.186
metrics_params178_test = get_final_metrics_df(
    f'XGBoost Tuned NGSAII (Params Trial 178)',
    y_test_log,
    y_pred_test_params178,
    y_proba_test_params178)

# print the result
metrics_params178_test

Unnamed: 0,Model,Accuracy,Recall,Prec.,F1,AUC
0,XGBoost Tuned NGSAII (Params Trial 178),0.86,0.0,0.0,0.0,0.565934


# Save Model