# Credit Risk Resampling Techniques

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter

In [3]:
# Additional libraries/functions/models that will be used:

# train_test_split will be used to split the data
from sklearn.model_selection import train_test_split 

# StandardScaler will be used for the Data Pre-processing section
from sklearn.preprocessing import StandardScaler    

from imblearn.metrics import classification_report_imbalanced


# Read the CSV into DataFrame

In [4]:
# Load the data
file_path = Path('Resources/lending_data.csv')
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,loan_size,interest_rate,homeowner,borrower_income,debt_to_income,num_of_accounts,derogatory_marks,total_debt,loan_status
0,10700.0,7.672,own,52800,0.431818,5,1,22800,low_risk
1,8400.0,6.692,own,43600,0.311927,3,0,13600,low_risk
2,9000.0,6.963,rent,46100,0.349241,3,0,16100,low_risk
3,10700.0,7.664,own,52700,0.43074,5,1,22700,low_risk
4,10800.0,7.698,mortgage,53000,0.433962,5,1,23000,low_risk


In [5]:
# The homeowner column needs to be encoded to facilitate the analysis
# The get_dummies() function from Pandas will be used to perform integer encoding of the homeowner column

df = pd.get_dummies(df, columns=["homeowner"])
df.head()

Unnamed: 0,loan_size,interest_rate,borrower_income,debt_to_income,num_of_accounts,derogatory_marks,total_debt,loan_status,homeowner_mortgage,homeowner_own,homeowner_rent
0,10700.0,7.672,52800,0.431818,5,1,22800,low_risk,0,1,0
1,8400.0,6.692,43600,0.311927,3,0,13600,low_risk,0,1,0
2,9000.0,6.963,46100,0.349241,3,0,16100,low_risk,0,0,1
3,10700.0,7.664,52700,0.43074,5,1,22700,low_risk,0,1,0
4,10800.0,7.698,53000,0.433962,5,1,23000,low_risk,1,0,0


# Split the Data into Training and Testing

In [6]:
x_cols = [i for i in df.columns if i not in ("loan_status")]

# Create our features
X = df[x_cols]

# Create our target
y = df["loan_status"]



In [7]:
X.describe()

Unnamed: 0,loan_size,interest_rate,borrower_income,debt_to_income,num_of_accounts,derogatory_marks,total_debt,homeowner_mortgage,homeowner_own,homeowner_rent
count,77536.0,77536.0,77536.0,77536.0,77536.0,77536.0,77536.0,77536.0,77536.0,77536.0
mean,9805.562577,7.292333,49221.949804,0.377318,3.82661,0.392308,19221.949804,0.497472,0.398911,0.103616
std,2093.223153,0.889495,8371.635077,0.081519,1.904426,0.582086,8371.635077,0.499997,0.489678,0.304764
min,5000.0,5.25,30000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,8700.0,6.825,44800.0,0.330357,3.0,0.0,14800.0,0.0,0.0,0.0
50%,9500.0,7.172,48100.0,0.376299,4.0,0.0,18100.0,0.0,0.0,0.0
75%,10400.0,7.528,51400.0,0.416342,4.0,1.0,21400.0,1.0,1.0,0.0
max,23800.0,13.235,105200.0,0.714829,16.0,3.0,75200.0,1.0,1.0,1.0


In [8]:
# Check the balance of our target values
y.value_counts()

low_risk     75036
high_risk     2500
Name: loan_status, dtype: int64

In [9]:
# Create X_train, X_test, y_train, y_test
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)


In [10]:
Counter(y_train)

Counter({'low_risk': 56271, 'high_risk': 1881})

In [11]:
Counter(y_test)

Counter({'low_risk': 18765, 'high_risk': 619})

## Data Pre-Processing

Scale the training and testing data using the `StandardScaler` from `sklearn`. Remember that when scaling the data, you only scale the features data (`X_train` and `X_testing`).

In [12]:
# Create the StandardScaler instance
scaler = StandardScaler()

In [13]:
# Fit the Standard Scaler with the training data
# When fitting scaling functions, only train on the training dataset

X_scaler = scaler.fit(X_train)

In [14]:
# Scale the training and testing data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


# Simple Logistic Regression

In [15]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(solver='lbfgs', random_state=1, max_iter=2000)
model.fit(X_train, y_train)

LogisticRegression(max_iter=2000, random_state=1)

In [16]:
# Calculate the balanced accuracy score
from sklearn.metrics import balanced_accuracy_score
y_pred = model.predict(X_test)
balanced_acc_score = balanced_accuracy_score(y_test, y_pred)

# Display the balanced accuracy score
print(f"Balanced Accuracy Score Simple Logistic Regression : {balanced_acc_score}")

Balanced Accuracy Score Simple Logistic Regression : 0.9520479254722232


In [17]:
# Confusion matrix
from sklearn.metrics import confusion_matrix

# Calculating the confusion matrix
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
)

# Displaying results
print("Confusion Matrix Simple Logistic Regression")
display(cm_df)
# Based on the information from the counter: Low risk is "1" and high risk is "0".

Confusion Matrix Simple Logistic Regression


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,563,56
Actual 1,102,18663


In [18]:
# Print the imbalanced classification report
# Creation of the imbalanced classification report
imb_classification_report_simple_log_reg = classification_report_imbalanced(y_test, y_pred)

# Display the report
print("Imbalanced Classification Report Simple Logistic Regression Model")
print(imb_classification_report_simple_log_reg)

Imbalanced Classification Report Simple Logistic Regression Model
                   pre       rec       spe        f1       geo       iba       sup

  high_risk       0.85      0.91      0.99      0.88      0.95      0.90       619
   low_risk       1.00      0.99      0.91      1.00      0.95      0.91     18765

avg / total       0.99      0.99      0.91      0.99      0.95      0.91     19384



# Oversampling

In this section, you will compare two oversampling algorithms to determine which algorithm results in the best performance. You will oversample the data using the naive random oversampling algorithm and the SMOTE algorithm. For each algorithm, be sure to complete the folliowing steps:

1. View the count of the target classes using `Counter` from the collections library. 
3. Use the resampled data to train a logistic regression model.
3. Calculate the balanced accuracy score from sklearn.metrics.
4. Print the confusion matrix from sklearn.metrics.
5. Generate a classication report using the `imbalanced_classification_report` from imbalanced-learn.

Note: Use a random state of 1 for each sampling algorithm to ensure consistency between tests

### Naive Random Oversampling

In [19]:
# Import RandomOverSampler from imblearn library
from imblearn.over_sampling import RandomOverSampler

ros = RandomOverSampler(random_state=1)

# Fit the RandomOverSampler to the data
X_resampled, y_resampled = ros.fit_resample(X_train, y_train)

# View the count of target classes with Counter
Counter(y_resampled)

Counter({'low_risk': 56271, 'high_risk': 56271})

In [20]:
# Train the Logistic Regression model using the resampled data
model = LogisticRegression(solver='lbfgs', random_state=1, max_iter=2000)
model.fit(X_resampled, y_resampled)

LogisticRegression(max_iter=2000, random_state=1)

In [21]:
y_pred_random = model.predict(X_test)

# Calculate the balanced accuracy score
naive_oversampling_balanced_acc_score = balanced_accuracy_score(y_test, y_pred_random)

# Display the balanced accuracy score
print(f"Balanced Accuracy Score Naive Random Oversampling : {naive_oversampling_balanced_acc_score}")


Balanced Accuracy Score Naive Random Oversampling : 0.9936781215845847


In [22]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, y_pred_random)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
)
# Displaying results
print("Confusion Matrix Naive Random Oversampling")
display(cm_df)
# Based on the information from the counter: Low risk is "1" and high risk is "0".

Confusion Matrix Naive Random Oversampling


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,615,4
Actual 1,116,18649


In [23]:
# Print the imbalanced classification report
# Creation of the imbalanced classification report
imb_classification_report_random = classification_report_imbalanced(y_test, y_pred_random)

# Display the report
print("Imbalanced Classification Report Naive Random Oversampling")
print(imb_classification_report_random)

Imbalanced Classification Report Naive Random Oversampling
                   pre       rec       spe        f1       geo       iba       sup

  high_risk       0.84      0.99      0.99      0.91      0.99      0.99       619
   low_risk       1.00      0.99      0.99      1.00      0.99      0.99     18765

avg / total       0.99      0.99      0.99      0.99      0.99      0.99     19384



### SMOTE Oversampling

#### For Research Purposes two sampling strategies were used with SMOTE: 0.5 and 1 to analyze possible differences

In [24]:
# Resample the training data with SMOTE
# Import the SMOTE model from imblearn library
from imblearn.over_sampling import SMOTE

# Fit the SMOTE model to the data with sampling strategy = 0.5
X_resampled, y_resampled = SMOTE(random_state=1, sampling_strategy=0.5).fit_resample(
    X_train, y_train
)

# View the count of target classes with Counter
Counter(y_resampled)

Counter({'low_risk': 56271, 'high_risk': 28135})

In [25]:
# Train the Logistic Regression model using the resampled data
model = LogisticRegression(solver='lbfgs', random_state=1, max_iter=2000)
model.fit(X_resampled, y_resampled)

LogisticRegression(max_iter=2000, random_state=1)

In [26]:
y_pred_smote = model.predict(X_test)
# Calculate the balanced accuracy score
SMOTE_balanced_acc_score = balanced_accuracy_score(y_test, y_pred_smote)

# Display the balanced accuracy score
print(f"Balanced Accuracy Score SMOTE Oversampling using sampling_strategy=0.5 : {SMOTE_balanced_acc_score}")

Balanced Accuracy Score SMOTE Oversampling using sampling_strategy=0.5 : 0.9921159034000586


In [27]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, y_pred_smote)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
)
# Displaying results
print("Confusion Matrix SMOTE Oversampling using sampling_strategy=0.5")
display(cm_df)
# Based on the information from the counter: Low risk is "1" and high risk is "0".

Confusion Matrix SMOTE Oversampling using sampling_strategy=0.5


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,613,6
Actual 1,114,18651


In [28]:
# Print the imbalanced classification report
# Creation of the imbalanced classification report
imb_classification_report_SMOTE = classification_report_imbalanced(y_test, y_pred_smote)

# Display the report
print("Imbalanced Classification Report SMOTE Oversampling using sampling_strategy=0.5")
print(imb_classification_report_SMOTE)

Imbalanced Classification Report SMOTE Oversampling using sampling_strategy=0.5
                   pre       rec       spe        f1       geo       iba       sup

  high_risk       0.84      0.99      0.99      0.91      0.99      0.98       619
   low_risk       1.00      0.99      0.99      1.00      0.99      0.98     18765

avg / total       0.99      0.99      0.99      0.99      0.99      0.98     19384



In [29]:
# Resample the training data with SMOTE
# Import the SMOTE model from imblearn library
from imblearn.over_sampling import SMOTE

# Fit the SMOTE model to the data with sampling strategy = 1
X_resampled, y_resampled = SMOTE(random_state=1, sampling_strategy=1.0).fit_resample(
    X_train, y_train
)

# View the count of target classes with Counter
Counter(y_resampled)

Counter({'low_risk': 56271, 'high_risk': 56271})

In [30]:
# Train the Logistic Regression model using the resampled data
model = LogisticRegression(solver='lbfgs', random_state=1, max_iter=2000)
model.fit(X_resampled, y_resampled)

LogisticRegression(max_iter=2000, random_state=1)

In [31]:
y_pred_smote_sample_1 = model.predict(X_test)
# Calculate the balanced accuracy score
SMOTE_sample_1_balanced_acc_score = balanced_accuracy_score(y_test, y_pred_smote_sample_1)

# Display the balanced accuracy score
print(f"Balanced Accuracy Score SMOTE Oversampling using sampling_strategy=1.0 : {SMOTE_sample_1_balanced_acc_score}")


Balanced Accuracy Score SMOTE Oversampling using sampling_strategy=1.0 : 0.9936781215845847


In [32]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, y_pred_smote_sample_1)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
)
# Displaying results
print("Confusion Matrix SMOTE Oversampling using sampling_strategy=1.0")
display(cm_df)
# Based on the information from the counter: Low risk is "1" and high risk is "0".

Confusion Matrix SMOTE Oversampling using sampling_strategy=1.0


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,615,4
Actual 1,116,18649


In [33]:
# Print the imbalanced classification report
# Creation of the imbalanced classification report
imb_classification_report_SMOTE_sample_1 = classification_report_imbalanced(y_test, y_pred_smote_sample_1)

# Display the report
print("Imbalanced Classification Report SMOTE Oversampling using sampling_strategy=1.0")
print(imb_classification_report_SMOTE_sample_1)

Imbalanced Classification Report SMOTE Oversampling using sampling_strategy=1.0
                   pre       rec       spe        f1       geo       iba       sup

  high_risk       0.84      0.99      0.99      0.91      0.99      0.99       619
   low_risk       1.00      0.99      0.99      1.00      0.99      0.99     18765

avg / total       0.99      0.99      0.99      0.99      0.99      0.99     19384



# Undersampling

In this section, you will test an undersampling algorithm to determine which algorithm results in the best performance compared to the oversampling algorithms above. You will undersample the data using the Cluster Centroids algorithm and complete the folliowing steps:

1. View the count of the target classes using `Counter` from the collections library. 
3. Use the resampled data to train a logistic regression model.
3. Calculate the balanced accuracy score from sklearn.metrics.
4. Display the confusion matrix from sklearn.metrics.
5. Generate a classication report using the `imbalanced_classification_report` from imbalanced-learn.

Note: Use a random state of 1 for each sampling algorithm to ensure consistency between tests

In [34]:
# Resample the data using the ClusterCentroids resampler
# Import ClusterCentroids from the imblearn library
from imblearn.under_sampling import ClusterCentroids

cc = ClusterCentroids(random_state=1)

# Fit the SMOTE model to the data
X_resampled, y_resampled = cc.fit_resample(X_train, y_train)

# View the count of target classes with Counter
Counter(y_resampled)

Counter({'high_risk': 1881, 'low_risk': 1881})

In [35]:
# Train the Logistic Regression model using the resampled data
model = LogisticRegression(solver='lbfgs', random_state=1, max_iter=2000)
model.fit(X_resampled, y_resampled)

LogisticRegression(max_iter=2000, random_state=1)

In [36]:
y_pred_undersampling = model.predict(X_test)

# Calculate the balanced accuracy score
cc_balanced_acc_score = balanced_accuracy_score(y_test, y_pred_undersampling)

# Display the balanced accuracy score
print(f"Balanced Accuracy Score Undersampling : {cc_balanced_acc_score}")

Balanced Accuracy Score Undersampling : 0.9865149130022852


In [37]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, y_pred_undersampling)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
)
# Displaying results
print("Confusion Matrix Undersampling")
display(cm_df)
# Based on the information from the counter: Low risk is "1" and high risk is "0".

Confusion Matrix Undersampling


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,606,13
Actual 1,112,18653


In [38]:
# Print the imbalanced classification report
# Creation of the imbalanced classification report
imb_classification_report_Undersampling = classification_report_imbalanced(y_test, y_pred_undersampling)

# Display the report
print("Imbalanced Classification Report Undersampling")
print(imb_classification_report_Undersampling)

Imbalanced Classification Report Undersampling
                   pre       rec       spe        f1       geo       iba       sup

  high_risk       0.84      0.98      0.99      0.91      0.99      0.97       619
   low_risk       1.00      0.99      0.98      1.00      0.99      0.97     18765

avg / total       0.99      0.99      0.98      0.99      0.99      0.97     19384



# Combination (Over and Under) Sampling

In this section, you will test a combination over- and under-sampling algorithm to determine if the algorithm results in the best performance compared to the other sampling algorithms above. You will resample the data using the SMOTEENN algorithm and complete the folliowing steps:

1. View the count of the target classes using `Counter` from the collections library. 
3. Use the resampled data to train a logistic regression model.
3. Calculate the balanced accuracy score from sklearn.metrics.
4. Display the confusion matrix from sklearn.metrics.
5. Generate a classication report using the `imbalanced_classification_report` from imbalanced-learn.

Note: Use a random state of 1 for each sampling algorithm to ensure consistency between tests

In [39]:
# Resample the training data with SMOTEENN
# Import SMOTEENN from the imblearn library
from imblearn.combine import SMOTEENN

smote_enn = SMOTEENN(random_state=1)

# Fit the SMOTE model to the data
X_resampled, y_resampled = smote_enn.fit_resample(X_train, y_train)

# View the count of target classes with Counter
Counter(y_resampled)


Counter({'high_risk': 55622, 'low_risk': 55948})

In [40]:
# Train the Logistic Regression model using the resampled data
model = LogisticRegression(solver='lbfgs', random_state=1, max_iter=2000)
model.fit(X_resampled, y_resampled)

LogisticRegression(max_iter=2000, random_state=1)

In [41]:
y_pred_smoteen = model.predict(X_test)

# Calculate the balanced accuracy score
smoteenn_balanced_acc_score = balanced_accuracy_score(y_test, y_pred_smoteen)

# Display the balanced accuracy score
print(f"Balanced Accuracy Score Combination (SMOTEENN) : {smoteenn_balanced_acc_score}")


Balanced Accuracy Score Combination (SMOTEENN) : 0.9935182494822666


In [42]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, y_pred_smoteen)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
)
# Displaying results
print("Confusion Matrix Combination (SMOTEENN)")
display(cm_df)
# Based on the information from the counter: Low risk is "1" and high risk is "0".

Confusion Matrix Combination (SMOTEENN)


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,615,4
Actual 1,122,18643


In [43]:
# Print the imbalanced classification report
# Creation of the imbalanced classification report
imb_classification_report_SMOTEENN = classification_report_imbalanced(y_test, y_pred_smoteen)

# Display the report
print("Imbalanced Classification Report Combination (SMOTEENN)")
print(imb_classification_report_SMOTEENN)

Imbalanced Classification Report Combination (SMOTEENN)
                   pre       rec       spe        f1       geo       iba       sup

  high_risk       0.83      0.99      0.99      0.91      0.99      0.99       619
   low_risk       1.00      0.99      0.99      1.00      0.99      0.99     18765

avg / total       0.99      0.99      0.99      0.99      0.99      0.99     19384



# Final Questions

**1. Which model had the best balanced accuracy score?**

In [44]:
print(f"Balanced Accuracy Score Simple Logistic Regression : {balanced_acc_score}")
print(f"Balanced Accuracy Score Naive Random Oversampling : {naive_oversampling_balanced_acc_score}")
print(f"Balanced Accuracy Score SMOTE Oversampling using sampling_strategy=0.5 : {SMOTE_balanced_acc_score}")
print(f"Balanced Accuracy Score SMOTE Oversampling using sampling_strategy=1.0 : {SMOTE_sample_1_balanced_acc_score}")
print(f"Balanced Accuracy Score Undersampling : {cc_balanced_acc_score}")
print(f"Balanced Accuracy Score Combination (SMOTEENN) : {smoteenn_balanced_acc_score}")

Balanced Accuracy Score Simple Logistic Regression : 0.9520479254722232
Balanced Accuracy Score Naive Random Oversampling : 0.9936781215845847
Balanced Accuracy Score SMOTE Oversampling using sampling_strategy=0.5 : 0.9921159034000586
Balanced Accuracy Score SMOTE Oversampling using sampling_strategy=1.0 : 0.9936781215845847
Balanced Accuracy Score Undersampling : 0.9865149130022852
Balanced Accuracy Score Combination (SMOTEENN) : 0.9935182494822666


The naive random oversampling and the SMOTE Oversampling using sample strategy of 1, have the highest balanced accuracy score at: 0.996378. Both of these models had the same Balanced Accuracy Score. These models were better at predicting low risk loans that were actually low risk as well as high risk loans that were actually high risk.
The simple logistic regression model (without applying oversampling, undersampling or a combination) had the lowest balanced accuracy score. The application of oversampling techniques or a combination (SMOTEENN) were slightly more effective (0.99 vs .98) at increasing the balanced accuracy score than undersampling.

**2. Which model had the best recall score?**

The naive random oversampling, the SMOTE Oversampling (both of them:using sample strategy of 1 and 0.5) and the Combination (SMOTEENN) have the highest recall score at: 0.99. These for classification methods offered higher recall scores for predicting high risk loans than undersampling and the simple regression model (without any sampling technique). All the classification methods had the same recall score (0.99) for predicting low risk loans.
Oversampling classification methods had a slightly higher sensitivity (recall score) than undersampling specially when predicting high_risk loans (0.99 vs 0.98)


**3. Which model had the best geometric mean score?**

The imbalanced classification reports indicate that all the classification techniques used in the analysis had the same geometric mean score of 0.99, with the exception of the original simple logistic regression (without over or undersampling).
Since we need more decimal points to make a better assessment, the geometric_mean_score function was used to have more decimal points. The naive random oversampling and the SMOTE Oversampling using sample strategy of 1, have the highest geometric mean score at: 0.9936. It can be concluded that all the classification techniques used had a geometric mean score of 0.99 with Random Oversampling and SMOTE having slight advantage over the rest.
The geometric mean (G-mean) is the root of the product of class-wise sensitivity. This measure tries to maximize the accuracy on each of the classes while keeping these accuracies balanced. (from: https://imbalanced-learn.org/dev/references/generated/imblearn.metrics.geometric_mean_score.html)


In [45]:
# Import geometric mean score from the metrics of imblearn
from imblearn.metrics import geometric_mean_score

# Estimate the geometric mean score
geometric_mean_score_simple_log_reg = geometric_mean_score(y_test, y_pred)
geometric_mean_score_random = geometric_mean_score(y_test, y_pred_random)
geometric_mean_score_SMOTE = geometric_mean_score(y_test, y_pred_smote)
geometric_mean_score_SMOTE_sample_1 = geometric_mean_score(y_test, y_pred_smote_sample_1)
geometric_mean_score_Undersampling = geometric_mean_score(y_test, y_pred_undersampling)
geometric_mean_score_SMOTEENN = geometric_mean_score(y_test, y_pred_smoteen)

# Print the geometric mean score
print(f"Geometric Mean Score Simple Logistic Regression Model : {geometric_mean_score_simple_log_reg}")
print(f"Geometric Mean Score Naive Random Oversampling : {geometric_mean_score_random}")
print(f"Geometric Mean Score SMOTE Oversampling using sampling_strategy=0.5 : {geometric_mean_score_SMOTE}")
print(f"Geometric Mean Score SMOTE Oversampling using sampling_strategy=1.0 : {geometric_mean_score_SMOTE_sample_1}")
print(f"Geometric Mean Score Undersampling : {geometric_mean_score_Undersampling}")
print(f"Geometric Mean Score Combination (SMOTEENN) : {geometric_mean_score_SMOTEENN}")

Geometric Mean Score Simple Logistic Regression Model : 0.9510981054376492
Geometric Mean Score Naive Random Oversampling : 0.9936781117000861
Geometric Mean Score SMOTE Oversampling using sampling_strategy=0.5 : 0.9921142542343241
Geometric Mean Score SMOTE Oversampling using sampling_strategy=1.0 : 0.9936781117000861
Geometric Mean Score Undersampling : 0.9864862773374231
Geometric Mean Score Combination (SMOTEENN) : 0.9935182492866586
