<a href="https://colab.research.google.com/github/NvdSuni/Thesis-code-complete/blob/main/XGboost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

!pip install optuna
!pip install imbalanced-learn

import optuna
from imblearn.over_sampling import SMOTE
import xgboost as xgb
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
import joblib

In [None]:
from google.colab import drive
from imblearn.over_sampling import SMOTE
import optuna
import xgboost as xgb
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn import __version__ as sklearn_version
import joblib


import imblearn
imblearn_version = imblearn.__version__

print(f"optuna: {optuna.__version__}")
print(f"imbalanced-learn: {imblearn_version}")
print(f"xgboost: {xgb.__version__}")
print(f"numpy: {np.__version__}")
print(f"matplotlib: {plt.matplotlib.__version__}")
print(f"scikit-learn: {sklearn_version}")
print(f"joblib: {joblib.__version__}")


In [None]:
X_train_combined = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/X_train_combined.npy")
X_val_combined = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/X_val_combined.npy")
y_train_combined = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/y_train_combined.npy")
y_val_combined = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/y_val_combined.npy")

#Untrained Model

In [None]:
xgb_classifier = xgb.XGBClassifier(objective='multi:softprob', num_class=7, eval_metric='mlogloss', random_state=42)


xgb_classifier.fit(X_train_combined, y_train_combined)

y_proba_xgb = xgb_classifier.predict_proba(X_val_combined)

y_pred_xgb = np.argmax(y_proba_xgb, axis=1)

roc_auc_xgb = roc_auc_score(y_val_combined, y_proba_xgb, multi_class='ovr')
print("XGBoost ROC-AUC:", roc_auc_xgb)

report_xgb = classification_report(y_val_combined, y_pred_xgb)
print(report_xgb)

joblib.dump(xgb_classifier, '/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/Models/XGBoost/XGBoost.joblib')

#Tuned Model

In [None]:
def objective(trial):
    params = {
        'objective': 'multi:softmax',
        'num_class': 7,
        'random_state': 42,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'gamma': trial.suggest_float('gamma', 0.0, 1.0),
        'alpha': trial.suggest_float('alpha', 0.0, 1.0),
        'lambda': trial.suggest_float('lambda', 0.0, 1.0),
        'early_stopping_rounds': 15
    }

    xgb_classifier = xgb.XGBClassifier(**params)

    eval_set = [(X_val_combined, y_val_combined)]

    xgb_classifier.fit(X_train_combined, y_train_combined, eval_set = eval_set)

    y_pred_proba = xgb_classifier.predict_proba(X_val_combined)
    roc_auc = roc_auc_score(y_val_combined, y_pred_proba, multi_class='ovr', average='macro')

    return roc_auc

#Create the study object
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

best_params = study.best_params


best_xgb_model = xgb.XGBClassifier(**best_params)
best_xgb_model.fit(X_train_combined, y_train_combined)


y_pred_xgb_tuned = best_xgb_model.predict(X_val_combined)

accuracy_xgb_tuned = accuracy_score(y_val_combined, y_pred_xgb_tuned)
print("Tuned XGBoost Accuracy:", accuracy_xgb_tuned)

report_xgb_tuned = classification_report(y_val_combined, y_pred_xgb_tuned)
print(report_xgb_tuned)

joblib.dump(best_xgb_model, '/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/Models/XGBoost/XGBoost_tuned.joblib')

#Class imbalance treated + Tuned

In [None]:
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train_combined, y_train_combined)

In [None]:
def objective(trial):
    params = {
        'objective': 'multi:softmax',
        'num_class': 7,
        'random_state': 42,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'gamma': trial.suggest_float('gamma', 0.0, 1.0),
        'alpha': trial.suggest_float('alpha', 0.0, 1.0),
        'lambda': trial.suggest_float('lambda', 0.0, 1.0),
        'early_stopping_rounds': 15
    }

    xgb_classifier = xgb.XGBClassifier(**params)

    eval_set = [(X_val_combined, y_val_combined)]


    xgb_classifier.fit(X_resampled, y_resampled, eval_set = eval_set)

    y_pred_proba = xgb_classifier.predict_proba(X_val_combined)
    roc_auc = roc_auc_score(y_val_combined, y_pred_proba, multi_class='ovr', average='macro')

    return roc_auc

#Create the study object
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

best_params = study.best_params


best_xgb_model_smote = xgb.XGBClassifier(**best_params)
best_xgb_model_smote.fit(X_resampled, y_resampled)


y_pred_xgb_tuned_smote = best_xgb_model_smote.predict(X_val_combined)


accuracy_xgb_tuned_smote = accuracy_score(y_val_combined, y_pred_xgb_tuned)
print("Tuned XGBoost Accuracy:", accuracy_xgb_tuned_smote)

report_xgb_tuned_smote = classification_report(y_val_combined, y_pred_xgb_tuned)
print(report_xgb_tuned_smote)
joblib.dump(best_xgb_model_smote, '/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/Models/XGBoost/XGBoost_tuned_smote.joblib')

#X-ray Specific

In [None]:
X_train_Xray = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/X_train_Xray_reduced.npy")
X_val_Xray = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/X_val_Xray_reduced.npy")
y_train_Xray = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/train_labels_complete_Xray.npy")
y_val_Xray = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/val_labels_complete_Xray.npy")

In [None]:
y_train_1d = np.argmax(y_train_Xray, axis=1)
y_val_1d = np.argmax(y_val_Xray, axis=1)

In [None]:
#Shuffle the training data
X_train_shuffled_Xray, y_train_shuffled_Xray = shuffle(X_train_Xray, y_train_1d, random_state=42)
X_val_shuffled_Xray, y_val_shuffled_Xray = shuffle(X_val_Xray, y_val_1d, random_state=42)

#XGBoost: Xray Untuned

In [None]:
xgb_classifier_Xray = xgb.XGBClassifier(objective='multi:softmax', num_class=5, random_state=42, eval_metric='mlogloss')


xgb_classifier_Xray.fit(X_train_shuffled_Xray, y_train_shuffled_Xray, eval_set=[(X_val_shuffled_Xray, y_val_shuffled_Xray)], verbose=True)

y_pred_xgb = xgb_classifier_Xray.predict(X_val_shuffled_Xray)

accuracy_xgb_Xray = accuracy_score(y_val_shuffled_Xray, y_pred_xgb)
print("XGBoost Accuracy:", accuracy_xgb_Xray)


roc_auc_Xray = roc_auc_score(y_val_shuffled_Xray, xgb_classifier_Xray.predict_proba(X_val_shuffled_Xray), multi_class='ovr')
print("XGBoost ROC-AUC:", roc_auc_Xray)

report_xgb_Xray = classification_report(y_val_shuffled_Xray, y_pred_xgb)
print(report_xgb_Xray)

joblib.dump(xgb_classifier_Xray, '/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/Models/XGBoost/XGBoost_Xray.joblib')

#Xray specific: Tuned model

In [None]:
def objective(trial):
    params = {
        'objective': 'multi:softmax',
        'num_class': 5,
        'random_state': 42,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'gamma': trial.suggest_float('gamma', 0.0, 1.0),
        'alpha': trial.suggest_float('alpha', 0.0, 1.0),
        'lambda': trial.suggest_float('lambda', 0.0, 1.0),
        'early_stopping_rounds': 15
    }

    xgb_classifier_Xray_tuned = xgb.XGBClassifier(**params)

    xgb_classifier_Xray_tuned.fit(X_train_shuffled_Xray, y_train_shuffled_Xray, eval_set=[(X_val_shuffled_Xray, y_val_shuffled_Xray)], verbose=True)

    y_pred_proba = xgb_classifier_Xray_tuned.predict_proba(X_val_shuffled_Xray)
    roc_auc = roc_auc_score(y_val_shuffled_Xray, y_pred_proba, multi_class='ovr', average='macro')

    return roc_auc

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

best_params = study.best_params
print(f"Best Hyperparameters: {best_params}")


best_xgb_model_Xray = xgb.XGBClassifier(**best_params)
best_xgb_model_Xray.fit(X_train_shuffled_Xray, y_train_shuffled_Xray, eval_metric='mlogloss')

y_pred_proba_tuned_Xray = best_xgb_model_Xray.predict_proba(X_val_shuffled_Xray)

y_pred_tuned_Xray = np.argmax(y_pred_proba_tuned_Xray, axis=1)


roc_auc_tuned_Xray = roc_auc_score(y_val_shuffled_Xray, y_pred_proba_tuned_Xray, multi_class='ovr', average='macro')
print("Tuned XGBoost ROC-AUC:", roc_auc_tuned_Xray)

report_xgb_tuned_Xray = classification_report(y_val_shuffled_Xray, y_pred_tuned_Xray)
print(report_xgb_tuned_Xray)

joblib.dump(xgb_classifier_Xray, '/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/Models/XGBoost/XGBoost_tuned_Xray.joblib')

#Xray specific: Class imbalance treated and Tuned


In [None]:
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train_shuffled_Xray, y_train_shuffled_Xray)

In [None]:
def objective(trial):
    params = {
        'objective': 'multi:softmax',
        'num_class': 7,
        'random_state': 42,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'gamma': trial.suggest_float('gamma', 0.0, 1.0),
        'alpha': trial.suggest_float('alpha', 0.0, 1.0),
        'lambda': trial.suggest_float('lambda', 0.0, 1.0),
        'early_stopping_rounds': 15
    }

    xgb_classifier_Xray_tuned_smote = xgb.XGBClassifier(**params)

    evals = [(X_val_shuffled_Xray, y_val_shuffled_Xray)]
    xgb_classifier_Xray_tuned_smote.fit(
        X_train_smote,
        y_train_smote,
        eval_set=evals,
        verbose=True
    )

    y_proba = xgb_classifier_Xray_tuned_smote.predict_proba(X_val_shuffled_Xray)
    roc_auc = roc_auc_score(y_val_shuffled_Xray, y_proba, multi_class='ovr')

    return roc_auc


study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)


best_params = study.best_params
print(f"Best Hyperparameters: {best_params}")


best_xgb_classifier_Xray_tuned_smote = xgb.XGBClassifier(**best_params)
best_xgb_classifier_Xray_tuned_smote.fit(X_train_smote, y_train_smote, eval_metric='mlogloss', eval_set=[(X_val_shuffled_Xray, y_val_shuffled_Xray)], verbose=False)


y_pred_xgb = best_xgb_classifier_Xray_tuned_smote.predict(X_val_shuffled_Xray)
accuracy_xgb_Xray_tuned_smote = accuracy_score(y_val_shuffled_Xray, y_pred_xgb)
print("XGBoost Accuracy:", accuracy_xgb_Xray_tuned_smote)


roc_auc_final = roc_auc_score(y_val_shuffled_Xray, best_xgb_classifier_Xray_tuned_smote.predict_proba(X_val_shuffled_Xray), multi_class='ovr')
print("XGBoost ROC-AUC:", roc_auc_final)


report_xgb_final_Xray_tuned_smote = classification_report(y_val_shuffled_Xray, y_pred_xgb)
print(report_xgb_final_Xray_tuned_smote)

joblib.dump(best_xgb_classifier_Xray_tuned_smote, '/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/Models/XGBoost/XGBoost_tuned_smote_Xray.joblib')

#MRI Specific

In [None]:
X_train_MRNet = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/X_train_MRNet_reduced.npy")
X_val_MRNet = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/X_val_MRNet_reduced.npy")
y_train_MRNet = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/y_train_MRNet.npy")
y_val_MRNet = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/y_val_MRNet.npy")

In [None]:
xgb_classifier_MRI = xgb.XGBClassifier(objective='binary:logistic', random_state=42, eval_metric='logloss')


xgb_classifier_MRI.fit(X_train_MRNet, y_train_MRNet, eval_set=[(X_val_MRNet, y_val_MRNet)], verbose=True)

y_pred_xgb_MRNet = xgb_classifier_MRI.predict(X_val_MRNet)

accuracy_xgb_MRNet = accuracy_score(y_val_MRNet, y_pred_xgb_MRNet)
print("XGBoost Accuracy:", accuracy_xgb_MRNet)

roc_auc = roc_auc_score(y_val_MRNet, xgb_classifier_MRI.predict_proba(X_val_MRNet)[:, 1])
print("XGBoost ROC-AUC:", roc_auc)

report_xgb_MRNet = classification_report(y_val_MRNet, y_pred_xgb_MRNet)
print(report_xgb_MRNet)
joblib.dump(xgb_classifier_MRI, "/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/Models/XGBoost/XGBoost_MRI.joblib")

#MRI Specific: Tuned model

In [None]:
def objective(trial):
    params = {
        'objective': 'binary:logistic',
        'random_state': 42,
        'learning_rate': trial.suggest_float('learning_rate', 0.0001, 0.3),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'gamma': trial.suggest_float('gamma', 0.0, 1.0),
        'alpha': trial.suggest_float('alpha', 0.0, 1.0),
        'lambda': trial.suggest_float('lambda', 0.0, 1.0),
        'early_stopping_rounds': 15
    }

    xgb_classifier_tuned_MRI = xgb.XGBClassifier(**params, eval_metric='logloss')

    xgb_classifier_tuned_MRI.fit(X_train_MRNet, y_train_MRNet, eval_set=[(X_val_MRNet, y_val_MRNet)], verbose=True
    )

    y_pred_proba = xgb_classifier_tuned_MRI.predict_proba(X_val_MRNet)[:, 1]
    roc_auc = roc_auc_score(y_val_MRNet, y_pred_proba)

    return roc_auc


study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

best_params = study.best_params


best_xgb_model_MRNet = xgb.XGBClassifier(**best_params, early_stopping_rounds=15, eval_metric='logloss')
best_xgb_model_MRNet.fit(X_train_MRNet, y_train_MRNet, eval_set=[(X_val_MRNet, y_val_MRNet)], verbose=True)


y_pred_proba_tuned_MRNet = best_xgb_model_MRNet.predict_proba(X_val_MRNet)[:, 1]


y_pred_tuned_MRNet = np.round(y_pred_proba_tuned_MRNet)

roc_auc_tuned_MRNet = roc_auc_score(y_val_MRNet, y_pred_proba_tuned_MRNet)
print("Tuned XGBoost ROC-AUC:", roc_auc_tuned_MRNet)

report_xgb_tuned_MRNet = classification_report(y_val_MRNet, y_pred_tuned_MRNet)
print(report_xgb_tuned_MRNet)

joblib.dump(best_xgb_model_MRNet, "/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/Models/XGBoost/XGBoost_tuned_MRI.joblib")

#MRI Specific: Class imbalance treated and Tuned

In [None]:
smote = SMOTE(random_state=42)
X_train_smote_MRNet, y_train_smote_MRNet = smote.fit_resample(X_train_MRNet, y_train_MRNet)
print(X_train_smote_MRNet.shape, y_train_smote_MRNet.shape)

In [None]:
def objective(trial):
    params = {
        'objective': 'binary:logistic',
        'random_state': 42,
        'learning_rate': trial.suggest_float('learning_rate', 0.0001, 0.3),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'gamma': trial.suggest_float('gamma', 0.0, 1.0),
        'alpha': trial.suggest_float('alpha', 0.0, 1.0),
        'lambda': trial.suggest_float('lambda', 0.0, 1.0),
        'early_stopping_rounds': 15,
    }

    xgb_classifier_tuned_smote_MRI = xgb.XGBClassifier(**params)

    xgb_classifier_tuned_smote_MRI.fit(X_train_smote_MRNet, y_train_smote_MRNet, eval_set=[(X_val_MRNet, y_val_MRNet)], verbose=True)

    y_pred_proba = xgb_classifier_tuned_smote_MRI.predict_proba(X_val_MRNet)[:, 1]
    roc_auc = roc_auc_score(y_val_MRNet, y_pred_proba)

    return roc_auc


study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)


best_params = study.best_params

best_xgb_model_MRNet_smote = xgb.XGBClassifier(**best_params, early_stopping_rounds=15, eval_metric='logloss')

best_xgb_model_MRNet_smote.fit(X_train_smote_MRNet, y_train_smote_MRNet, eval_set=[(X_val_MRNet, y_val_MRNet)], verbose=True)


y_pred_proba_tuned_MRNet_smote = best_xgb_model_MRNet_smote.predict_proba(X_val_MRNet)[:, 1]


y_pred_tuned_MRNet_smote = np.round(y_pred_proba_tuned_MRNet_smote)


roc_auc_tuned_MRNet = roc_auc_score(y_val_MRNet, y_pred_proba_tuned_MRNet_smote)
print("Tuned XGBoost ROC-AUC:", roc_auc_tuned_MRNet)

report_xgb_tuned_MRNet_smote = classification_report(y_val_MRNet, y_pred_tuned_MRNet_smote)
print(report_xgb_tuned_MRNet_smote)

joblib.dump(best_xgb_model_MRNet_smote, "/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/Models/XGBoost/XGBoost_tuned_smote_MRI.joblib")