<a href="https://colab.research.google.com/github/NvdSuni/Thesis-code-complete/blob/main/Logistic_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

!pip install optuna
!pip install imbalanced-learn

import optuna
from imblearn.over_sampling import SMOTE
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
import joblib
from sklearn.linear_model import LogisticRegression

In [None]:
import imblearn
imblearn_version = imblearn.__version__

#Print versions
print(f"optuna: {optuna.__version__}")
print(f"imbalanced-learn: {imblearn_version}")
print(f"numpy: {np.__version__}")
print(f"matplotlib: {plt.matplotlib.__version__}")
print(f"joblib: {joblib.__version__}")

In [None]:
X_train_combined = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/X_train_combined.npy")
X_val_combined = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/X_val_combined.npy")
y_train_combined = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/y_train_combined.npy")
y_val_combined = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/y_val_combined.npy")

In [None]:
np.random.seed(42)

#Logistic Regression untuned

In [None]:
model = LogisticRegression(max_iter=1000, random_state = 42, multi_class='ovr')


model.fit(X_train_combined, y_train_combined)

y_pred = model.predict(X_val_combined)

accuracy = accuracy_score(y_val_combined, y_pred)
classification_report_str = classification_report(y_val_combined, y_pred)
roc_auc = roc_auc_score(y_val_combined, model.predict_proba(X_val_combined), multi_class='ovr')

print(f'Accuracy: {accuracy}')
print('Classification Report:\n', classification_report_str)
print(f'ROC-AUC: {roc_auc}')

joblib.dump(model, "/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/Models/Logistic Regression/logistic_regression.joblib")

#Tuned model

In [None]:
def objective(trial):
    params = {
        'random_state': 42,
        'C': trial.suggest_float('C', 0.01, 10.0),
        'penalty': trial.suggest_categorical('penalty', ['l2']),
        'max_iter': trial.suggest_int('max_iter', 100, 1000),
        'solver': trial.suggest_categorical('solver', ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']),
        'fit_intercept': trial.suggest_categorical('fit_intercept', [True, False]),
        'class_weight': trial.suggest_categorical('class_weight', [None, 'balanced']),
        'tol': trial.suggest_float('tol', 1e-5, 1e-1),
    }

    logistic_regression_model = LogisticRegression(**params)

    logistic_regression_model.fit(X_train_combined, y_train_combined)

    y_pred_proba = logistic_regression_model.predict_proba(X_val_combined)

    roc_auc = roc_auc_score(y_val_combined, y_pred_proba, multi_class='ovr', average='macro')

    return roc_auc


study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

best_params = study.best_params

best_logistic_regression_model = LogisticRegression(**best_params)
best_logistic_regression_model.fit(X_train_combined, y_train_combined)

y_pred_logreg_tuned = best_logistic_regression_model.predict_proba(X_val_combined)

roc_auc_logreg_tuned = roc_auc_score(y_val_combined, y_pred_logreg_tuned, multi_class='ovr', average='macro')
print("Tuned Logistic Regression ROC-AUC:", roc_auc_logreg_tuned)

report_logreg_tuned = classification_report(y_val_combined, y_pred_logreg_tuned.argmax(axis=1))
print(report_logreg_tuned)

joblib.dump(best_logistic_regression_model, '/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/Models/Logistic Regression/logistic_regression_tuned.joblib')


#Tuned + Class imbalance treated

In [None]:
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train_combined, y_train_combined)

In [None]:
def objective(trial):
    params = {
        'random_state': 42,
        'C': trial.suggest_float('C', 0.01, 10.0),
        'penalty': trial.suggest_categorical('penalty', ['l2']),
        'max_iter': trial.suggest_int('max_iter', 100, 1000),
        'solver': trial.suggest_categorical('solver', ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']),
        'fit_intercept': trial.suggest_categorical('fit_intercept', [True, False]),
        'class_weight': trial.suggest_categorical('class_weight', [None, 'balanced']),
        'tol': trial.suggest_float('tol', 1e-5, 1e-1),
    }

    logistic_regression_model = LogisticRegression(**params)

    logistic_regression_model.fit(X_train_smote, y_train_smote)

    y_pred_proba = logistic_regression_model.predict_proba(X_val_combined)

    roc_auc = roc_auc_score(y_val_combined, y_pred_proba, multi_class='ovr', average='macro')

    return roc_auc

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

best_params = study.best_params

best_logistic_regression_model_smote = LogisticRegression(**best_params)
best_logistic_regression_model_smote.fit(X_train_smote, y_train_smote)

y_pred_logreg_tuned_smote = best_logistic_regression_model_smote.predict_proba(X_val_combined)

roc_auc_logreg_tuned_smote = roc_auc_score(y_val_combined, y_pred_logreg_tuned_smote, multi_class='ovr', average='macro')
print("Tuned Logistic Regression ROC-AUC:", roc_auc_logreg_tuned_smote)

report_logreg_tuned_smote = classification_report(y_val_combined, y_pred_logreg_tuned_smote.argmax(axis=1))
print(report_logreg_tuned_smote)

joblib.dump(best_logistic_regression_model_smote, '/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/Models/Logistic Regression/logistic_regression_tuned_smote.joblib')


#Xray specific

In [None]:
X_train_Xray = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/X_train_Xray_reduced.npy")
X_val_Xray = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/X_val_Xray_reduced.npy")
y_train_Xray = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/train_labels_complete_Xray.npy")
y_val_Xray = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/val_labels_complete_Xray.npy")

In [None]:
y_train_1d = np.argmax(y_train_Xray, axis=1)
y_val_1d = np.argmax(y_val_Xray, axis=1)
X_train_shuffled_Xray, y_train_shuffled_Xray = shuffle(X_train_Xray, y_train_1d, random_state=42)
X_val_shuffled_Xray, y_val_shuffled_Xray = shuffle(X_val_Xray, y_val_1d, random_state=42)

#Untuned Xray

In [None]:
model = LogisticRegression(max_iter=1000, random_state = 42, multi_class='ovr')

model.fit(X_train_Xray, y_train_shuffled_Xray)

y_pred = model.predict(X_val_shuffled_Xray)

accuracy = accuracy_score(y_val_shuffled_Xray, y_pred)
classification_report_str = classification_report(y_val_shuffled_Xray, y_pred)
roc_auc = roc_auc_score(y_val_shuffled_Xray, model.predict_proba(X_val_shuffled_Xray), multi_class='ovr')

print(f'Accuracy: {accuracy}')
print('Classification Report:\n', classification_report_str)
print(f'ROC-AUC: {roc_auc}')

joblib.dump(model, "/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/Models/Logistic Regression/logistic_regression_Xray.joblib")

#Tuned Xray

In [None]:
def objective(trial):
    params = {
        'random_state': 42,
        'C': trial.suggest_float('C', 0.01, 10.0),
        'penalty': trial.suggest_categorical('penalty', ['l2']),
        'max_iter': trial.suggest_int('max_iter', 100, 1000),
        'solver': trial.suggest_categorical('solver', ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']),
        'fit_intercept': trial.suggest_categorical('fit_intercept', [True, False]),
        'class_weight': trial.suggest_categorical('class_weight', [None, 'balanced']),
        'tol': trial.suggest_float('tol', 1e-5, 1e-1),
    }

    logistic_regression_model = LogisticRegression(**params)

    logistic_regression_model.fit(X_train_shuffled_Xray, y_train_shuffled_Xray)

    y_pred_proba = logistic_regression_model.predict_proba(X_val_shuffled_Xray)

    roc_auc = roc_auc_score(y_val_shuffled_Xray, y_pred_proba, multi_class='ovr', average='macro')

    return roc_auc

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

best_params = study.best_params

best_logistic_regression_model_Xray = LogisticRegression(**best_params)
best_logistic_regression_model_Xray.fit(X_train_shuffled_Xray, y_train_shuffled_Xray)

y_pred_logreg_tuned_Xray = best_logistic_regression_model_Xray.predict_proba(X_val_shuffled_Xray)

roc_auc_logreg_tuned_Xray = roc_auc_score(y_val_shuffled_Xray, y_pred_logreg_tuned_Xray, multi_class='ovr', average='macro')
print("Tuned Logistic Regression ROC-AUC:", roc_auc_logreg_tuned_Xray)

report_logreg_tuned_Xray = classification_report(y_val_shuffled_Xray, y_pred_logreg_tuned_Xray.argmax(axis=1))
print(report_logreg_tuned_Xray)

joblib.dump(best_logistic_regression_model_Xray, '/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/Models/Logistic Regression/logistic_regression_tuned_Xray.joblib')


#Tuned + Class imbalance treated Xray

In [None]:
smote = SMOTE(random_state=42)
X_train_smote_Xray, y_train_smote_Xray = smote.fit_resample(X_train_shuffled_Xray, y_train_shuffled_Xray)


In [None]:
def objective(trial):
    params = {
        'random_state': 42,
        'C': trial.suggest_float('C', 0.01, 10.0),
        'penalty': trial.suggest_categorical('penalty', ['l2']),
        'max_iter': trial.suggest_int('max_iter', 100, 1000),
        'solver': trial.suggest_categorical('solver', ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']),
        'fit_intercept': trial.suggest_categorical('fit_intercept', [True, False]),
        'class_weight': trial.suggest_categorical('class_weight', [None, 'balanced']),
        'tol': trial.suggest_float('tol', 1e-5, 1e-1),
    }

    logistic_regression_model = LogisticRegression(**params)

    logistic_regression_model.fit(X_train_smote_Xray, y_train_smote_Xray)

    y_pred_proba = logistic_regression_model.predict_proba(X_val_shuffled_Xray)

    roc_auc = roc_auc_score(y_val_shuffled_Xray, y_pred_proba, multi_class='ovr', average='macro')

    return roc_auc

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

best_params = study.best_params

best_logistic_regression_model_smote_Xray = LogisticRegression(**best_params)
best_logistic_regression_model_smote_Xray.fit(X_train_smote_Xray, y_train_smote_Xray)

y_pred_logreg_tuned_smote_Xray = best_logistic_regression_model_smote_Xray.predict_proba(X_val_shuffled_Xray)

roc_auc_logreg_tuned_Xray = roc_auc_score(y_val_shuffled_Xray, y_pred_logreg_tuned_Xray, multi_class='ovr', average='macro')
print("Tuned Logistic Regression ROC-AUC:", roc_auc_logreg_tuned_Xray)

report_logreg_tuned_smote_Xray = classification_report(y_val_shuffled_Xray, y_pred_logreg_tuned_Xray.argmax(axis=1))
print(report_logreg_tuned_smote_Xray)

joblib.dump(best_logistic_regression_model_smote_Xray, '/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/Models/Logistic Regression/logistic_regression_tuned_smote_Xray.joblib')


#MRI Specific

In [None]:
X_train_MRNet = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/X_train_MRNet_reduced.npy")
X_val_MRNet = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/X_val_MRNet_reduced.npy")
y_train_MRNet = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/y_train_MRNet.npy")
y_val_MRNet = np.load("/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/y_val_MRNet.npy")

In [None]:
model = LogisticRegression(max_iter=1000, random_state = 42, multi_class='ovr')

model.fit(X_train_MRNet, y_train_MRNet)

y_pred = model.predict(X_val_MRNet)

accuracy = accuracy_score(y_val_MRNet, y_pred)
classification_report_str = classification_report(y_val_MRNet, y_pred)
roc_auc = roc_auc_score(y_val_MRNet, model.predict_proba(X_val_MRNet)[:, 1], multi_class='ovr')

print(f'Accuracy: {accuracy}')
print('Classification Report:\n', classification_report_str)
print(f'ROC-AUC: {roc_auc}')


joblib.dump(model, "/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/Models/Logistic Regression/logistic_regression_MRI.joblib")

#Tuned MRI

In [None]:
def objective(trial):
    params = {
        'random_state': 42,
        'C': trial.suggest_float('C', 0.01, 10.0),
        'penalty': trial.suggest_categorical('penalty', ['l2']),
        'max_iter': trial.suggest_int('max_iter', 100, 1000),
        'solver': trial.suggest_categorical('solver', ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']),
        'fit_intercept': trial.suggest_categorical('fit_intercept', [True, False]),
        'class_weight': trial.suggest_categorical('class_weight', [None, 'balanced']),
        'tol': trial.suggest_float('tol', 1e-5, 1e-1),
    }

    logistic_regression_model = LogisticRegression(**params)

    logistic_regression_model.fit(X_train_MRNet, y_train_MRNet)

    y_pred_proba = logistic_regression_model.predict_proba(X_val_MRNet)

    roc_auc = roc_auc_score(y_val_MRNet, y_pred_proba[:, 1], multi_class='ovr', average='macro')

    return roc_auc

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

best_params = study.best_params

best_logistic_regression_model_MRNet = LogisticRegression(**best_params)
best_logistic_regression_model_MRNet.fit(X_train_MRNet, y_train_MRNet)

y_pred_logreg_tuned_MRNet = best_logistic_regression_model_MRNet.predict_proba(X_val_MRNet)

roc_auc_logreg_tuned_MRNet = roc_auc_score(y_val_MRNet, y_pred_logreg_tuned_MRNet[:, 1], multi_class='ovr', average='macro')
print("Tuned Logistic Regression ROC-AUC:", roc_auc_logreg_tuned_MRNet)

report_logreg_tuned_MRNet = classification_report(y_val_MRNet, y_pred_logreg_tuned_MRNet.argmax(axis=1))
print(report_logreg_tuned_MRNet)

joblib.dump(best_logistic_regression_model_MRNet, '/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/Models/Logistic Regression/logistic_regression_tuned_MRI.joblib')


#Tuned + Class imbalance

In [None]:
smote = SMOTE(random_state=42)
X_train_smote_MRNet, y_train_smote_MRNet = smote.fit_resample(X_train_MRNet, y_train_MRNet)


In [None]:
def objective(trial):
    params = {
        'random_state': 42,
        'C': trial.suggest_float('C', 0.01, 10.0),
        'penalty': trial.suggest_categorical('penalty', ['l2']),
        'max_iter': trial.suggest_int('max_iter', 100, 1000),
        'solver': trial.suggest_categorical('solver', ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']),
        'fit_intercept': trial.suggest_categorical('fit_intercept', [True, False]),
        'class_weight': trial.suggest_categorical('class_weight', [None, 'balanced']),
        'tol': trial.suggest_float('tol', 1e-5, 1e-1),
    }

    logistic_regression_model = LogisticRegression(**params)

    logistic_regression_model.fit(X_train_smote_MRNet, y_train_smote_MRNet)

    y_pred_proba = logistic_regression_model.predict_proba(X_val_MRNet)

    roc_auc = roc_auc_score(y_val_MRNet, y_pred_proba[:, 1], multi_class='ovr', average='macro')

    return roc_auc
n
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

best_params = study.best_params

best_logistic_regression_model_smote_MRNet = LogisticRegression(**best_params)
best_logistic_regression_model_smote_MRNet.fit(X_train_smote_MRNet, y_train_smote_MRNet)

y_pred_logreg_tuned_smote_MRNet = best_logistic_regression_model_smote_MRNet.predict_proba(X_val_MRNet)

roc_auc_logreg_tuned_smote_MRNet = roc_auc_score(y_val_MRNet, y_pred_logreg_tuned_MRNet[:, 1], multi_class='ovr', average='macro')
print("Tuned Logistic Regression ROC-AUC:", roc_auc_logreg_tuned_smote_MRNet)

report_logreg_tuned_smote_MRNet = classification_report(y_val_MRNet, y_pred_logreg_tuned_MRNet.argmax(axis=1))
print(report_logreg_tuned_smote_MRNet)

joblib.dump(best_logistic_regression_model_smote_MRNet, '/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/Models/Logistic Regression/logistic_regression_tuned_smote_MRI.joblib')