This notebook contains the calculations to compare the signicant difference between non-normalized and normalized data for each of the 12 possible baseline configurations

In [1]:
from scipy import stats
import scikit_posthocs as sp
import numpy as np
import pandas as pd

#https://www.statology.org/nemenyi-test-python/

### 1. Random Forests with None Rebalancing (F1)

In [2]:
rf_none = [0.48,0.567901,0.534161,0.522293,0.503311,0.490066,0.516556,0.5,0.538462,0.532468,0.440367,0.444444,0.388889,0.396226,0.35514,0.472727,0.368932,0.454545,0.365385,0.415094,0.347826,0.461538,0.56,0.583333,0.740741,0.380952,0.714286,0.642857,0.615385,0.692308]
rf_none_normalized = [0.559006,0.596273,0.466667,0.555556,0.512821,0.473684,0.566038,0.529032,0.576687,0.558442,0.262626,0.486486,0.3,0.4,0.333333,0.352941,0.349515,0.429907,0.333333,0.380952,0.615385,0.692308,0.3,0.5,0.583333,0.521739,0.380952,0.521739,0.3,0.689655]


#Friedman Test
stats.mannwhitneyu(rf_none, rf_none_normalized)

MannwhitneyuResult(statistic=395.0, pvalue=0.21014796392011814)

### 2. Random Forests with SMOTE Rebalancing (F1)

In [3]:
rf_smote = [0.628019,0.575916,0.595745,0.65285,0.564972,0.670157,0.637838,0.624339,0.666667,0.603175,0.633803,0.629371,0.571429,0.583942,0.559441,0.602941,0.556391,0.671233,0.68,0.697987,0.538462,0.705882,0.580645,0.580645,0.466667,0.571429,0.785714,0.689655,0.740741,0.689655]
rf_smote_normalized = [0.631579,0.586387,0.583784,0.645833,0.60733,0.707071,0.677083,0.653266,0.642857,0.550285,0.652778,0.536232,0.630872,0.676056,0.619355,0.609929,0.597403,0.591549,0.604317,0.643357,0.5,0.689655,0.538462,0.666667,0.571429,0.8,0.714286,0.571429,0.75,0.774194]

#Friedman Test
stats.mannwhitneyu(rf_smote, rf_smote_normalized)

MannwhitneyuResult(statistic=417.0, pvalue=0.31539779610502894)

### 3. Random Forests with Under Rebalancing (F1)

In [4]:
rf_under = [0.317817,0.26242,0.272069,0.289398,0.3,0.304348,0.310651,0.304821,0.314103,0.31348,0.126661,0.143767,0.160697,0.139655,0.145359,0.132045,0.128878,0.162839,0.127527,0.157171,0.064356,0.088083,0.067797,0.091153,0.090652,0.076696,0.072893,0.08076,0.088398,0.076336]
rf_under_normalized = [0.265583,0.316923,0.291176,0.335505,0.334944,0.309342,0.298273,0.280179,0.277174,0.348993,0.138796,0.136594,0.126883,0.121572,0.109188,0.124402,0.135225,0.113234,0.118902,0.118519,0.075188,0.095238,0.083117,0.052718,0.076577,0.076555,0.087464,0.071599,0.083551,0.091644]

#Friedman Test
stats.mannwhitneyu(rf_under, rf_under_normalized)

MannwhitneyuResult(statistic=420.0, pvalue=0.3313673790880535)

### 4. Random Forests with 5050 Rebalancing (F1)

In [5]:
rf_5050 = [0.675556,0.663366,0.655022,0.71028,0.690583,0.633028,0.651163,0.635071,0.712329,0.666667,0.590604,0.540541,0.576471,0.583851,0.535032,0.695652,0.571429,0.622754,0.581818,0.612717,0.594595,0.588235,0.705882,0.666667,0.555556,0.413793,0.714286,0.457143,0.645161,0.428571,]
rf_5050_normalized = [0.672566,0.632479,0.695279,0.700461,0.621622,0.654867,0.620087,0.651584,0.686957,0.646809,0.611111,0.593023,0.609756,0.556213,0.653595,0.662791,0.59887,0.608187,0.682635,0.658824,0.6,0.555556,0.666667,0.5625,0.645161,0.62069,0.6875,0.615385,0.722222,0.571429]

#Friedman Test
stats.mannwhitneyu(rf_5050, rf_5050_normalized)

MannwhitneyuResult(statistic=397.5, pvalue=0.22098675905123233)

### 5. XGBoost with None Rebalancing (F1)

In [6]:
xgboost_none = [0.731183,0.729064,0.764398,0.771574,0.723618,0.738462,0.72043,0.803828,0.721649,0.727273,0.629921,0.661765,0.680851,0.646154,0.712329,0.690647,0.637681,0.681159,0.725926,0.699301,0.714286,0.583333,0.666667,0.56,0.551724,0.48,0.733333,0.642857,0.8125,0.62069,]
xgboost_none_normalized = [0.744898,0.71875,0.747475,0.757895,0.761421,0.729167,0.764706,0.731183,0.73,0.743169,0.717241,0.621212,0.587302,0.656716,0.636364,0.732394,0.689655,0.695652,0.627737,0.632353,0.666667,0.571429,0.606061,0.714286,0.733333,0.785714,0.875,0.727273,0.709677,0.666667]

#Friedman Test
stats.mannwhitneyu(xgboost_none, xgboost_none_normalized)

MannwhitneyuResult(statistic=390.0, pvalue=0.18949166249526272)

### 6. XGBoost with smote Rebalancing (F1)

In [7]:
xgboost_smote = [0.736364,0.730435,0.746411,0.807339,0.75,0.693467,0.747664,0.663415,0.75122,0.753623,0.696774,0.695122,0.647059,0.707317,0.666667,0.654762,0.670886,0.705882,0.708075,0.77193,0.645161,0.666667,0.666667,0.769231,0.823529,0.592593,0.833333,0.789474,0.75,0.5625]
xgboost_smote_normalized = [0.729858,0.734884,0.757282,0.690909,0.787037,0.792453,0.790244,0.758929,0.738318,0.764706,0.671233,0.678363,0.670886,0.649682,0.721893,0.662921,0.682635,0.683544,0.745342,0.708861,0.727273,0.705882,0.727273,0.580645,0.645161,0.857143,0.758621,0.648649,0.774194,0.645161]

#Friedman Test
stats.mannwhitneyu(xgboost_smote, xgboost_smote_normalized)

MannwhitneyuResult(statistic=443.0, pvalue=0.46171470242808)

### 7. XGBoost with under Rebalancing (F1)

In [8]:
xgboost_under = [0.353741,0.321593,0.373057,0.378092,0.345178,0.323988,0.353333,0.380597,0.34202,0.340289,0.144473,0.143972,0.132662,0.164103,0.121752,0.148624,0.142214,0.126783,0.131886,0.165657,0.055556,0.079602,0.085791,0.052067,0.05102,0.055556,0.075377,0.06422,0.053125,0.035941]
xgboost_under_normalized = [0.368421,0.369128,0.326333,0.352542,0.356643,0.368601,0.342229,0.344942,0.365854,0.323391,0.13828,0.117221,0.14781,0.148571,0.133779,0.150459,0.125863,0.131621,0.136283,0.148216,0.057658,0.081081,0.069519,0.03908,0.081301,0.06,0.049911,0.04261,0.056106,0.036145]

#Friedman Test
stats.mannwhitneyu(xgboost_under, xgboost_under_normalized)

MannwhitneyuResult(statistic=442.0, pvalue=0.4558538768539766)

### 8. XGBoost with 5050 Rebalancing (F1)

In [9]:
xgboost_5050 = [0.766667,0.796537,0.715596,0.798319,0.793249,0.80531,0.755187,0.73251,0.752,0.761506,0.624339,0.662722,0.680412,0.656085,0.695187,0.677419,0.687179,0.656085,0.68,0.695652,0.666667,0.578947,0.55,0.594595,0.685714,0.625,0.685714,0.648649,0.666667,0.615385]
xgboost_5050_normalized = [0.697095,0.713693,0.711864,0.684825,0.732283,0.707819,0.741313,0.74477,0.733333,0.713693,0.663212,0.741117,0.646766,0.648936,0.605128,0.680851,0.659686,0.655914,0.683168,0.659686,0.511628,0.615385,0.594595,0.685714,0.555556,0.588235,0.540541,0.555556,0.585366,0.684211]

#Friedman Test
stats.mannwhitneyu(xgboost_5050, xgboost_5050_normalized)

MannwhitneyuResult(statistic=348.0, pvalue=0.06669736805860728)

### 9. LightGBM with none Rebalancing (F1)

In [10]:
lightgbm_none = [0.677249,0.727273,0.676617,0.728205,0.729167,0.648045,0.684211,0.732984,0.680412,0.783069,0.545455,0.467153,0.633094,0.466667,0.533333,0.618421,0.626667,0.513514,0.556291,0.387665,0.089888,0.322581,0.533333,0.645161,0.173913,0.5,0.175824,0.533333,0.37037,0.8]
lightgbm_none_normalized = [0.751323,0.76,0.708333,0.72549,0.728205,0.698413,0.704663,0.687831,0.729167,0.743719,0.483516,0.512195,0.596273,0.605263,0.64,0.6375,0.604938,0.539474,0.596026,0.547945,0.228571,0.62069,0.191781,0.125,0.625,0.615385,0.758621,0.5,0.466667,0.428571]

#Friedman Test
stats.mannwhitneyu(lightgbm_none, lightgbm_none_normalized)

MannwhitneyuResult(statistic=401.0, pvalue=0.2366486892541172)

### 10. LightGBM with smote Rebalancing (F1)

In [11]:
lightgbm_smote = [0.778846,0.752294,0.772093,0.730594,0.733945,0.673171,0.786026,0.7343,0.707547,0.738318,0.703297,0.647727,0.681818,0.654545,0.729282,0.686391,0.698225,0.638298,0.726257,0.613497,0.709677,0.685714,0.625,0.685714,0.645161,0.756757,0.6875,0.685714,0.742857,0.384615]
lightgbm_smote_normalized = [0.707965,0.697095,0.742358,0.763158,0.717949,0.752212,0.740088,0.712446,0.763158,0.728889,0.723404,0.578035,0.648352,0.611399,0.603175,0.644068,0.655556,0.666667,0.627027,0.642487,0.62069,0.625,0.625,0.588235,0.571429,0.666667,0.62069,0.5,0.666667,0.6875]

#Friedman Test
stats.mannwhitneyu(lightgbm_smote, lightgbm_smote_normalized)

MannwhitneyuResult(statistic=304.5, pvalue=0.01600889057736877)

### 11. LightGBM with under Rebalancing (F1)

In [12]:
lightgbm_under = [0.367857,0.389513,0.380107,0.364865,0.364273,0.331738,0.357751,0.328173,0.384892,0.339623,0.170706,0.133111,0.116901,0.127459,0.142489,0.140773,0.137584,0.123711,0.141176,0.149281,0.054054,0.056239,0.093151,0.05814,0.081013,0.049635,0.056856,0.059406,0.052202,0.04507]
lightgbm_under_normalized = [0.355476,0.358974,0.343053,0.395437,0.38961,0.378378,0.365812,0.33119,0.364286,0.345161,0.138218,0.152985,0.132901,0.132439,0.147033,0.146903,0.126621,0.136249,0.125984,0.149034,0.057495,0.078788,0.037641,0.069672,0.075188,0.041505,0.044099,0.076142,0.087671,0.064]

#Friedman Test
stats.mannwhitneyu(lightgbm_under, lightgbm_under_normalized)

MannwhitneyuResult(statistic=441.0, pvalue=0.4499975186181821)

### 12. LightGBM with 5050 Rebalancing (F1)

In [13]:
lightgbm_5050 = [0.755906,0.728972,0.726563,0.705882,0.718147,0.715447,0.779026,0.767932,0.773109,0.794872,0.698113,0.613861,0.640777,0.659794,0.633663,0.641148,0.648352,0.673575,0.577114,0.58,0.7,0.666667,0.6,0.666667,0.571429,0.636364,0.756757,0.647059,0.571429,0.684211]
lightgbm_5050_normalized = [0.72093,0.677165,0.708955,0.706827,0.701887,0.741313,0.709677,0.730038,0.755187,0.689139,0.641509,0.648402,0.657005,0.602041,0.583691,0.699507,0.589862,0.688995,0.653846,0.650246,0.529412,0.714286,0.604651,0.5,0.62069,0.571429,0.4,0.540541,0.571429,0.606061]

#Friedman Test
stats.mannwhitneyu(lightgbm_5050, lightgbm_5050_normalized)

MannwhitneyuResult(statistic=350.0, pvalue=0.07060835543640612)