In [7]:
import pandas as pd
import pickle
pd.set_option('display.max_columns', 500)

df = None
with open('data/normalisation.pk','rb') as f:
    df = pickle.load(f)

df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,-0.5516,-0.617801,-0.99139,-0.311169,1.468177,-0.470401,0.207971,0.025791,0.403993,0.251412,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,1.612727,1.065235,0.489095,-0.143772,0.635558,0.463917,-0.114805,-0.183361,-0.145783,-0.069083,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,0.624501,0.066084,0.717293,-0.165946,2.345865,-2.890083,1.109969,-0.121359,-2.261857,0.52498,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,-0.226487,0.178228,0.507757,-0.287924,-0.631418,-1.059647,-0.684093,1.965775,-1.232622,-0.208038,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,-0.822843,0.538196,1.345852,-1.11967,0.175121,-0.451449,-0.237033,-0.038195,0.803487,0.408542,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


## Data balancing
    - class weights, cost-sensitive learning, undersampling, near miss undersampling, oversampling, SMOTE (Syntheic Minority Over-sampling technique), ensemble methods

### Undersampling
The undersampling method restricts the majority class to the number of observartions of the the minority class. This is useful when the minority class still presents a big enough number of observations.

In [8]:
import pandas as pd
from imblearn.under_sampling import RandomUnderSampler

def countClass(df):
    class_counts = df['Class'].value_counts()
    if 1 not in class_counts:
        class_counts[1] = 0
    print(f"Number of zeros: {(class_counts[0])}, percentage: {round(class_counts[0]/len(df)*100,4)}%")
    print(f"Number of ones: {(class_counts[1])}, percentage: {round(class_counts[1]/len(df)*100,4)}%")
    

def random_under_sampling(df):
    # Separate the features from the target variable
    X = df.drop('Class', axis=1)
    y = df['Class']
    
    # Apply random under sampling to balance the class distribution
    rus = RandomUnderSampler(random_state=42)
    X_resampled, y_resampled = rus.fit_resample(X, y)
    
    # Combine the resampled features and target variable into a new DataFrame
    df_resampled = pd.concat([pd.DataFrame(X_resampled), pd.DataFrame(y_resampled)], axis=1)
    df_resampled.columns = df.columns
    
    return df_resampled

print(df.shape)
countClass(df)
df_undersampling = random_under_sampling(df)
print(df_undersampling.shape)
countClass(df_undersampling)

(284807, 31)
Number of zeros: 284315, percentage: 99.8273%
Number of ones: 492, percentage: 0.1727%
(984, 31)
Number of zeros: 492, percentage: 50.0%
Number of ones: 492, percentage: 50.0%


### NearMiss Undersampling
NearMiss undersamples the data as the normal undersampling method. Nevertheless, the observations on the majority class are not randomly selected. Actually, they are selected based on their distance from the minority class. There are 3 versions that differs on the distance metric:
- NearMiss-1: select the samples whose average distance to the k nearest samples of the minority class is the smallest.
- NearMiss-2: select the samples whose average distance to the farthest k samples of the minority class is the smallest.
- NearMiss-3: select the samples which are farthest from a decision boundary (a hyperplane that separates the classes).

In [9]:
import pandas as pd
from imblearn.under_sampling import NearMiss

def nearmiss_under_sampling(df,v = 1):
    # Separate the features from the target variable
    X = df.drop('Class', axis=1)
    y = df['Class']
    
    # Apply NearMiss under sampling to balance the class distribution
    nm = NearMiss(version=v)
    X_resampled, y_resampled = nm.fit_resample(X, y)
    
    # Combine the resampled features and target variable into a new DataFrame
    df_resampled = pd.concat([pd.DataFrame(X_resampled), pd.DataFrame(y_resampled)], axis=1)
    df_resampled.columns = df.columns
    
    return df_resampled


print(df.shape)
countClass(df)
df_nearmiss_v1 = nearmiss_under_sampling(df,v=1)
print(df_nearmiss_v1.shape)
countClass(df_nearmiss_v1)
df_nearmiss_v3 = nearmiss_under_sampling(df,v=3)
print(df_nearmiss_v3.shape)
countClass(df_nearmiss_v3)

(284807, 31)
Number of zeros: 284315, percentage: 99.8273%
Number of ones: 492, percentage: 0.1727%
(984, 31)
Number of zeros: 492, percentage: 50.0%
Number of ones: 492, percentage: 50.0%
(984, 31)
Number of zeros: 492, percentage: 50.0%
Number of ones: 492, percentage: 50.0%


### Oversampling
The oversampling method increases the number of instances of the minority class to match the number of the majority class. This may be challenging since we can't produce new observations of a certain class out of the blue. I way to do it is to duplicate observations, as if we have got multiple equal observations. Thus, random oversampling produces new instances by randomly selecting pre-existing instances.

In [10]:
import pandas as pd
from imblearn.over_sampling import RandomOverSampler

def random_over_sampling(df):
    # Separate the features from the target variable
    X = df.drop('Class', axis=1)
    y = df['Class']
    
    # Apply random over-sampling to balance the class distribution
    ros = RandomOverSampler(random_state=42)
    X_resampled, y_resampled = ros.fit_resample(X, y)
    
    # Combine the resampled features and target variable into a new DataFrame
    df_resampled = pd.concat([pd.DataFrame(X_resampled), pd.DataFrame(y_resampled)], axis=1)
    df_resampled.columns = df.columns
    
    return df_resampled

print(df.shape)
countClass(df)
df_oversampling = random_over_sampling(df)
print(df_oversampling.shape)
countClass(df_oversampling)

(284807, 31)
Number of zeros: 284315, percentage: 99.8273%
Number of ones: 492, percentage: 0.1727%
(568630, 31)
Number of zeros: 284315, percentage: 50.0%
Number of ones: 284315, percentage: 50.0%


### SMOTE (Synthetic Minority Over-sampling technique)
The SMOTE method is similar to the oversampling method but they differ on how new instances of the minority class are made. SMOTE is a data augmentation method, i.e. it produces new samples by making small perturbations to the existing ones. The algorithm is:
- select a random sample
- identify the k nearest neighbors for the sample
- take one of the neighbors randomly
- get the vector that transforms the sample into the neighbors
- multiple the vector by a random number between 0 and 1 and add it to the sample
In other words, the new entry is a random point that lies in a line that connects two near already existing points.

https://towardsdatascience.com/smote-fdce2f605729

In [11]:
import pandas as pd
from imblearn.over_sampling import SMOTE

def smote_over_sampling(df):
    # Separate the features from the target variable
    X = df.drop('Class', axis=1)
    y = df['Class']
    
    # Apply SMOTE over-sampling to balance the class distribution
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y)
    
    # Combine the resampled features and target variable into a new DataFrame
    df_resampled = pd.concat([pd.DataFrame(X_resampled), pd.DataFrame(y_resampled)], axis=1)
    df_resampled.columns = df.columns
    
    return df_resampled

print(df.shape)
countClass(df)
df_smote = smote_over_sampling(df)
print(df_smote.shape)
countClass(df_smote)

(284807, 31)
Number of zeros: 284315, percentage: 99.8273%
Number of ones: 492, percentage: 0.1727%
(568630, 31)
Number of zeros: 284315, percentage: 50.0%
Number of ones: 284315, percentage: 50.0%


### Class weights
Class weights is a form of training a model but addressing different weights to the classification of each class.
The weight of class i has the weight:
$$
w_i = n_samples/(n_classes * n_classes_i)
$$

https://www.analyticsvidhya.com/blog/2020/10/improve-class-imbalance-class-weights/

In [16]:
import pandas as pd
from sklearn.utils.class_weight import compute_class_weight

def class_weight_resampling(df):
    # Separate the features from the target variable
    X = df.drop('Class', axis=1)
    y = df['Class']
    
    # Calculate class weights for the imbalanced dataset
    class_weights = compute_class_weight('balanced', classes=[0, 1], y=y)
    
    return class_weights

    # Create a dictionary of class weights
    # class_weights_dict = {0: class_weights[0], 1: class_weights[1]}
    
    # # Apply class weights to balance the class distribution
    # df_resampled = df.sample(frac=1, replace=True, weights=y.map(class_weights_dict), random_state=42)
    
    # return df_resampled

print(df.shape)
countClass(df)
class_weights = class_weight_resampling(df)
print(class_weights)
# df_class_weight = class_weight_resampling(df)
# print(df_class_weight.shape)
# countClass(df_class_weight)

(284807, 31)
Number of zeros: 284315, percentage: 99.8273%
Number of ones: 492, percentage: 0.1727%
[  0.50086524 289.43800813]


In [14]:
# df_class_weight.head()

### Ensemble methods

We have seen balancing methods that alter data (undersampling, oversampling, etc) and a method that change the weights of the classifier for each class. The ensemble method has a different approach. As usual ensemble methods, it combines models to improve performance. Typically, it has two steps: resampling (undersampling, oversampling, SMOTE, etc) and model aggregation (prediction voting, weighted voting or stacking (meta-classifier that uses the predictions of all classifiers as input and, then, learn to combine them to produce the final prediction)).

There are many methods:
- Bagging: creates multiples partitions from the original dataset and trains a classifier on each partition. The final prediciton is made by aggregating the predictions from all classifiers.
- Boosting: creates multiple resampled datasets. Each dataset gives a higher weight to misclassified examples from the minority class. A classifier is trained in each dataset and the final prediction is a combination of all classifiers.
- Adaboost: similar to boosting but each classifier is given a weight based on its performance. The aggregation of prediction is, then, also weighted.
- RUSBoost: variant of boosting but uses undersampling instead of SMOTE.
- Gradient Boosting: improves a weak model, usually a decision tree, by adding new models that focus on the mistakes of the previous models. It minimizes a loss function by adjusting the weights of the training examples.
- XGBoost: (extreme gradient boosting) is an effective and optimized implementation of gradient boosting. It includes tree pruning, regularization and early stopping which help prevent overfitting.
- EasyEnsemble: similar to bagging with undersampling. Selects multiples subsets of the majority class and combines with the minority class. The train models on each balanced samples.
- Balanced Random Forest (BRF): a random subset of features is selected, as in Random Forest, then a new dataset is constructed undersampling the majority class or oversampling the minority and, at last, multiple decision trees are trained.The final prediction is an aggregation of all predictions.

https://www.analyticsvidhya.com/blog/2017/03/imbalanced-data-classification/

In [25]:
import pandas as pd
from imblearn.ensemble import BalancedRandomForestClassifier

def random_forest_ensemble(df):
    # Separate the features from the target variable
    X = df.drop('Class', axis=1)
    y = df['Class']
    
    # Create an ensemble classifier with balanced sampling
    clf = BalancedRandomForestClassifier(n_estimators=100, random_state=42)
    
    # Fit the classifier to the data and make predictions
    clf.fit(X, y)
    y_pred = clf.predict(X)
    
    # Combine the predictions and the original target variable into a new DataFrame
    # df_resampled = pd.concat([X, pd.Series(y_pred, name='Class')], axis=1)
    
    return y_pred,y

ensemble_RF_y_pred, ensemble_RF_y_test = random_forest_ensemble(df)


In [26]:
import pandas as pd
from imblearn.ensemble import EasyEnsembleClassifier

def easy_ensemble(df, method='brf'):
    # Separate the features from the target variable
    X = df.drop('Class', axis=1)
    y = df['Class']
    
    # Create an ensemble classifier with balanced sampling
    clf = EasyEnsembleClassifier(n_estimators=100, random_state=42)
    
    # Fit the classifier to the data and make predictions
    clf.fit(X, y)
    y_pred = clf.predict(X)
    
    # Combine the predictions and the original target variable into a new DataFrame
    # df_resampled = pd.concat([X, pd.Series(y_pred, name='Class')], axis=1)
    
    return y_pred,y

ensemble_easy_y_pred, ensemble_easy_y_test = easy_ensemble(df)

In [27]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from imblearn.under_sampling import RandomUnderSampler

def bagging_ensemble(df):
    # Separate the features and target variable
    X = df.drop('Class', axis=1)
    y = df['Class']
    
    # Create an instance of the Decision Tree classifier
    tree = DecisionTreeClassifier()
    
    # Create an instance of the Random Under-Sampler
    rus = RandomUnderSampler(random_state=42)
    
    # Create an instance of the Bagging classifier
    bagging = BaggingClassifier(base_estimator=tree, random_state=42)
    
    # Fit the Bagging classifier to the data after under-sampling
    X_res, y_res = rus.fit_resample(X, y)
    bagging.fit(X_res, y_res)
    y_pred = bagging.predict(X)
    
    return y_pred,y

ensemble_bag_y_pred, ensemble_bag_y_test = bagging_ensemble(df)




In [28]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from imblearn.under_sampling import RandomUnderSampler

def adaboosting_ensemble(df):
    # Separate the features and target variable
    X = df.drop('Class', axis=1)
    y = df['Class']
    
    # Create an instance of the Decision Tree classifier
    tree = DecisionTreeClassifier()
    
    # Create an instance of the Random Under-Sampler
    rus = RandomUnderSampler(random_state=42)
    
    # Create an instance of the AdaBoost classifier
    boosting = AdaBoostClassifier(base_estimator=tree, random_state=42)
    
    # Fit the AdaBoost classifier to the data after under-sampling
    X_res, y_res = rus.fit_resample(X, y)
    boosting.fit(X_res, y_res)
    y_pred = boosting.predict(X)
    
    return y_pred,y

ensemble_adaB_y_pred, ensemble_adaB_y_test = adaboosting_ensemble(df)




In [29]:


from sklearn.ensemble import GradientBoostingClassifier
from imblearn.under_sampling import RandomUnderSampler

def gradient_boosting_ensemble(df):
    # Separate the features and target variable
    X = df.drop('Class', axis=1)
    y = df['Class']
    
    # Create an instance of the Random Under-Sampler
    rus = RandomUnderSampler(random_state=42)
    
    # Create an instance of the Gradient Boosting classifier
    boosting = GradientBoostingClassifier(random_state=42)
    
    # Fit the Gradient Boosting classifier to the data after under-sampling
    X_res, y_res = rus.fit_resample(X, y)
    boosting.fit(X_res, y_res)
    y_pred = boosting.predict(X)
    
    return y_pred,y

ensemble_gB_y_pred, ensemble_gB_y_test = gradient_boosting_ensemble(df)



In [30]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split

def xgb_ensemble(df):
    # Split the data into features and target
    X = df.drop('Class', axis=1)
    y = df['Class']

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create a DMatrix for the XGBoost model
    dtrain = xgb.DMatrix(X_train, label=y_train)

    # Set the XGBoost parameters
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'auc',
        'max_depth': 6,
        'learning_rate': 0.1,
        'scale_pos_weight': len(y_train[y_train == 0]) / len(y_train[y_train == 1])
    }

    # Train the XGBoost model
    model = xgb.train(params, dtrain, num_boost_round=100)

    # Make predictions on the test set
    dtest = xgb.DMatrix(X_test)
    y_pred = model.predict(dtest)

    # Return the predicted probabilities
    return y_pred,y_test

ensemble_xgB_y_pred, ensemble_xgB_y_test = xgb_ensemble(df)

#### Balancing methods comparison

In [22]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, cohen_kappa_score, matthews_corrcoef, balanced_accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression


def applyLogisticRegression(df, method_name = "",class_weights = None):
    # Split data into train and test sets
    X = df.drop('Class', axis=1)
    y = df['Class']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train classifier
    clf = LogisticRegression(random_state=42)
    if class_weights != None:
        clf = LogisticRegression(random_state=42,class_weight={0:class_weights[0], 1:class_weights[1]})
    clf.fit(X_train, y_train)
    
    # Make predictions on test set
    y_pred = clf.predict(X_test)
    
    # Calculate performance metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision_0 = precision_score(y_test, y_pred, pos_label=0)
    precision_1 = precision_score(y_test, y_pred, pos_label=1)
    recall_0 = recall_score(y_test, y_pred, pos_label=0)
    recall_1 = recall_score(y_test, y_pred, pos_label=1)
    f1_0 = f1_score(y_test, y_pred, pos_label=0)
    f1_1 = f1_score(y_test, y_pred, pos_label=1)
    roc_auc = roc_auc_score(y_test, y_pred)
    cohen_kappa = cohen_kappa_score(y_test, y_pred)
    matthews_corr = matthews_corrcoef(y_test, y_pred)
    balanced_accuracy = balanced_accuracy_score(y_test, y_pred)
    g_mean = (recall_0*recall_1)**0.5
    classification_error = 1 - accuracy
    sensitivity_0 = recall_0
    sensitivity_1 = recall_1
    specificity_0 = 1 - recall_0
    specificity_1 = 1 - recall_1
    
    # Return dictionary with performance metrics
    return {
        'method_name': method_name,
        'accuracy': accuracy,
        'precision_0': precision_0,
        'precision_1': precision_1,
        'recall_0': recall_0,
        'recall_1': recall_1,
        'f1_0': f1_0,
        'f1_1': f1_1,
        'roc_auc': roc_auc,
        'cohen_kappa': cohen_kappa,
        'matthews_corr': matthews_corr,
        'balanced_accuracy': balanced_accuracy,
        'g_mean': g_mean,
        'classification_error': classification_error,
        'sensitivity_0': sensitivity_0,
        'sensitivity_1': sensitivity_1,
        'specificity_0': specificity_0,
        'specificity_1': specificity_1
    }


In [23]:
df_ans = applyLogisticRegression(df,method_name="df")
df_undersampling_ans = applyLogisticRegression(df_undersampling,method_name="under")
df_nearmiss_v1_ans = applyLogisticRegression(df_nearmiss_v1,method_name="NM1")
df_nearmiss_v3_ans = applyLogisticRegression(df_nearmiss_v3,method_name="NM2")
df_oversampling_ans = applyLogisticRegression(df_oversampling,method_name="over")
df_smote_ans = applyLogisticRegression(df_smote,method_name="smote")
df_weight_ans = applyLogisticRegression(df,method_name="weights",class_weights=[class_weights[0],class_weights[1]])


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [31]:

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, cohen_kappa_score, matthews_corrcoef, balanced_accuracy_score, classification_report

def getMetrics(y_pred,y_test,method_name=""):
    
    # Calculate performance metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision_0 = precision_score(y_test, y_pred, pos_label=0)
    precision_1 = precision_score(y_test, y_pred, pos_label=1)
    recall_0 = recall_score(y_test, y_pred, pos_label=0)
    recall_1 = recall_score(y_test, y_pred, pos_label=1)
    f1_0 = f1_score(y_test, y_pred, pos_label=0)
    f1_1 = f1_score(y_test, y_pred, pos_label=1)
    roc_auc = roc_auc_score(y_test, y_pred)
    cohen_kappa = cohen_kappa_score(y_test, y_pred)
    matthews_corr = matthews_corrcoef(y_test, y_pred)
    balanced_accuracy = balanced_accuracy_score(y_test, y_pred)
    g_mean = (recall_0*recall_1)**0.5
    classification_error = 1 - accuracy
    sensitivity_0 = recall_0
    sensitivity_1 = recall_1
    specificity_0 = 1 - recall_0
    specificity_1 = 1 - recall_1
    
    # Return dictionary with performance metrics
    return {
        'method_name': method_name,
        'accuracy': accuracy,
        'precision_0': precision_0,
        'precision_1': precision_1,
        'recall_0': recall_0,
        'recall_1': recall_1,
        'f1_0': f1_0,
        'f1_1': f1_1,
        'roc_auc': roc_auc,
        'cohen_kappa': cohen_kappa,
        'matthews_corr': matthews_corr,
        'balanced_accuracy': balanced_accuracy,
        'g_mean': g_mean,
        'classification_error': classification_error,
        'sensitivity_0': sensitivity_0,
        'sensitivity_1': sensitivity_1,
        'specificity_0': specificity_0,
        'specificity_1': specificity_1
    }


In [35]:
import numpy as np
df_e_rf_ans = getMetrics(ensemble_RF_y_pred,ensemble_RF_y_test,method_name="e_rf")
df_e_easy_ans = getMetrics(ensemble_easy_y_pred, ensemble_easy_y_test,method_name="e_easy")
df_e_bag_ans = getMetrics(ensemble_bag_y_pred, ensemble_bag_y_test,method_name="e_bag")
df_e_adab_ans = getMetrics(ensemble_adaB_y_pred, ensemble_adaB_y_test,method_name="e_adab")
df_e_gb_ans = getMetrics(ensemble_gB_y_pred, ensemble_gB_y_test,method_name="e_gb")

ensemble_xgB_y_pred_b = np.where(ensemble_xgB_y_pred > 0.5, 1, 0)

df_e_xgb_ans = getMetrics(ensemble_xgB_y_pred_b, ensemble_xgB_y_test,method_name="e_xgb")


In [36]:
# Performance comparison

ans = [df_ans,
       df_undersampling_ans,
       df_nearmiss_v1_ans,
       df_nearmiss_v3_ans,
       df_oversampling_ans,
       df_smote_ans,
       df_weight_ans,
       df_e_rf_ans,
       df_e_easy_ans,
       df_e_bag_ans,
       df_e_adab_ans,
       df_e_gb_ans,
       df_e_xgb_ans
       ]

n = len(ans)
name_str = 'method_name'
for score_name in [v for v in df_ans if v != name_str]:
    print(f'====== {score_name} ======')
    ans = sorted(ans, key = lambda x: x[score_name], reverse = True)
    print('score:',end=" ")
    for i in range(n):
        text = f"{ans[i][score_name]} ({ans[i][name_str]})"
        print(text,end='\t')
    print()
    print('improvement(from next/worst):',end=" ")
    for i in range(n-1):
        text = f"{round(100*((ans[i][score_name]/ans[i+1][score_name])-1),6)}%, {round(100*((ans[i][score_name]/ans[n-1][score_name])-1),6)}% ({ans[i][name_str]})"
        print(text,end='\t')
    print()


score: 0.9995259997893332 (e_xgb)	0.9989817773252344 (df)	0.9780377589033977 (e_rf)	0.9739021859557181 (smote)	0.9671356392223506 (e_easy)	0.9637157794576678 (e_bag)	0.9620690502691296 (e_gb)	0.9562164249850778 (weights)	0.949238578680203 (NM1)	0.9289340101522843 (under)	0.9178991611416915 (over)	0.9035532994923858 (NM2)	0.9010803807490687 (e_adab)	
improvement(from next/worst): 0.054478%, 10.925287% (e_xgb)	2.141432%, 10.86489% (df)	0.424639%, 8.540568% (e_rf)	0.699648%, 8.08161% (smote)	0.354862%, 7.330673% (e_easy)	0.171165%, 6.951144% (e_bag)	0.612061%, 6.768394% (e_gb)	0.735099%, 6.118882% (weights)	2.185792%, 5.344495% (NM1)	1.202185%, 3.091137% (under)	1.587716%, 1.866513% (over)	0.274439%, 0.274439% (NM2)	
score: 1.0 (e_rf)	1.0 (e_adab)	0.99999268781351 (e_gb)	0.9999854525881664 (e_easy)	0.9999854008737578 (e_bag)	0.9998529033207075 (weights)	0.999736226634076 (e_xgb)	0.9994724439911371 (df)	0.9661454721952573 (smote)	0.9320388349514563 (NM1)	0.9047619047619048 (under)	0.888090

  text = f"{round(100*((ans[i][score_name]/ans[i+1][score_name])-1),6)}%, {round(100*((ans[i][score_name]/ans[n-1][score_name])-1),6)}% ({ans[i][name_str]})"
  text = f"{round(100*((ans[i][score_name]/ans[i+1][score_name])-1),6)}%, {round(100*((ans[i][score_name]/ans[n-1][score_name])-1),6)}% ({ans[i][name_str]})"


For accuracy, ensemble XGBoost > df > others.  
For 1 precision, smote > NM1 > under > NM2 > over > ensemble XGBoost > df > others.  
For 1 recall, ensemble > smote > others. The worst was df.  
For 1 F1, smote > NM1 > under > over > ...  
For ROC AUX, ensemble > smote > others. The worst was df.  
For Cohen kappa and MCC, smote was the best.  
For balanced accuracy and g-mean, ensemble > smote > others.  
For classification error, ensemble adaBoost > NM2 > over > ...  
For 1 sensitivity, ensemble adaBoost > ensemble > smote > ...  
For 1 specificity, df >> (way better) others.  

The results are in favor of balancing. Nevertheless, there isn't a best method over all metrics. The best performance methods seemed to be smote, ensemble adaBoost and ensemble random forest. 

In [37]:
# store results after normalisation

import pickle

with open("data/balancing.pk",'wb') as f:
    pickle.dump(df_smote,f)