## Imports

In [1]:
import pickle
from collections import defaultdict
import numpy as np
import scipy.stats as ss

In [2]:
import warnings
warnings.filterwarnings('ignore')

## Load Results

In [3]:
DATASETS_13 = [
    'ecoli',
    'satimage',
    'abalone',
    'us_crime',
    'yeast_ml8',
    'scene',
    'coil_2000',
    'solar_flare_m0',
    'oil',
    'wine_quality',
    'yeast_me2',
    'ozone_level',
    'abalone_19'
]

In [4]:
results = {
    "Baseline": "results\ex01_baseline.pk",
    "Majority Downsample": "results\ex02_majority_downsample.pk",
    "Minority Oversample": "results\ex03_minority_oversample.pk",
    "SMOTE": "results\ex04_smote.pk",
    "Zero Out Noise": "results\ex05_1_zero_out_0.10.pk",
    "Mean Fill": "results\ex05_2_mean_fill_0.10.pk",
    "Gaussian Noise": "results\ex05_3_gaussian_noise_0.10.pk",
    "Permutation": "results\ex05_4_permutation_0.10.pk"
}

In [5]:
gmeans_scores, f1_scores = defaultdict(list), defaultdict(list)
for experiment, result_file in results.items():
    with open(result_file, "rb") as f:
        result = pickle.load(f)
        for dataset, scores in result.items():
            if dataset in DATASETS_13:
                gmeans_scores[dataset].append(scores[0])
                f1_scores[dataset].append(scores[2])

## Display Results

In [6]:
print("Dataset\t\tBase\tMajDown\tMinOvr\tSMOTE\tZero\tMeanF\tGauss\tPerm")
for dataset, r in gmeans_scores.items():
    print("%-15s\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f" % (dataset, *(r))) 

Dataset		Base	MajDown	MinOvr	SMOTE	Zero	MeanF	Gauss	Perm
ecoli          	0.8795	0.8825	0.8818	0.8918	0.8637	0.8887	0.8898	0.8813
satimage       	0.8911	0.8827	0.8987	0.9008	0.8865	0.8938	0.8925	0.8930
abalone        	0.7876	0.7901	0.7816	0.7833	0.7812	0.7820	0.7806	0.7851
us_crime       	0.8581	0.8582	0.8533	0.8515	0.8573	0.8565	0.8616	0.8605
yeast_ml8      	0.5574	0.5738	0.5764	0.5668	0.5866	0.5826	0.5740	0.5759
scene          	0.6914	0.7268	0.7350	0.7290	0.6679	0.7350	0.7353	0.7325
coil_2000      	0.6530	0.6659	0.6591	0.6420	0.6245	0.6317	0.6579	0.6443
solar_flare_m0 	0.7257	0.7193	0.7118	0.7343	0.7192	0.7180	0.7139	0.7112
oil            	0.8448	0.8287	0.8728	0.8858	0.8737	0.8660	0.8744	0.8750
wine_quality   	0.8137	0.7841	0.8108	0.8048	0.8099	0.8042	0.8211	0.8186
yeast_me2      	0.8473	0.8403	0.8531	0.8570	0.8537	0.8653	0.8650	0.8573
ozone_level    	0.8183	0.8177	0.8153	0.8180	0.8189	0.8120	0.8294	0.8236
abalone_19     	0.6721	0.7330	0.6666	0.7314	0.6807	0.6960	0.7156	0.7196


In [7]:
method_ranks = np.zeros(8)
print("Dataset\t\tBase\tMajDown\tMinOvr\tSMOTE\tZero\tMeanF\tGauss\tPerm")
for dataset, r in gmeans_scores.items():  
    ranks = ss.rankdata((-1)*np.array(r))
    print("%-15s\t%.d\t%.d\t%d\t%d\t%d\t%d\t%d\t%d" % (dataset, *ranks)) 
    for i, rank in enumerate(ranks):
        method_ranks[i] += rank
method_ranks /= 13   
print("="*80)
print("%-15s\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f" % ("", *method_ranks))     

Dataset		Base	MajDown	MinOvr	SMOTE	Zero	MeanF	Gauss	Perm
ecoli          	7	4	5	1	8	3	2	6
satimage       	6	8	2	1	7	3	5	4
abalone        	2	1	6	4	7	5	8	3
us_crime       	4	3	7	8	5	6	1	2
yeast_ml8      	8	6	3	7	1	2	5	4
scene          	7	6	3	5	8	2	1	4
coil_2000      	4	1	2	6	8	7	3	5
solar_flare_m0 	2	3	7	1	4	5	6	8
oil            	7	8	5	1	4	6	3	2
wine_quality   	3	8	4	6	5	7	1	2
yeast_me2      	7	8	6	4	5	1	2	3
ozone_level    	4	6	7	5	3	8	1	2
abalone_19     	7	1	8	2	6	5	4	3
               	5.23	4.85	5.00	3.92	5.46	4.62	3.23	3.69


In [8]:
print("Dataset\t\tBase\tMajDown\tMinOvr\tSMOTE\tZero\tMeanF\tGauss\tPerm")
for dataset, r in f1_scores.items():
    print("%-15s\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f" % (dataset, *(r))) 

Dataset		Base	MajDown	MinOvr	SMOTE	Zero	MeanF	Gauss	Perm
ecoli          	0.6666	0.6128	0.6212	0.6531	0.6067	0.6178	0.6402	0.6191
satimage       	0.6932	0.6478	0.6920	0.6914	0.6810	0.6930	0.6786	0.6803
abalone        	0.3996	0.4037	0.4013	0.3915	0.3964	0.3925	0.3955	0.3943
us_crime       	0.5490	0.5247	0.5249	0.5193	0.5460	0.5302	0.5392	0.5357
yeast_ml8      	0.1608	0.1726	0.1705	0.1640	0.1743	0.1725	0.1683	0.1688
scene          	0.3015	0.3040	0.3178	0.3142	0.2740	0.3192	0.3269	0.3296
coil_2000      	0.2079	0.2196	0.2139	0.2012	0.1754	0.2004	0.2255	0.2053
solar_flare_m0 	0.2542	0.2368	0.2111	0.2454	0.2363	0.2532	0.2462	0.2360
oil            	0.5733	0.4647	0.6037	0.6232	0.5865	0.5954	0.6119	0.5978
wine_quality   	0.4520	0.3321	0.4264	0.3926	0.4469	0.4081	0.4424	0.4346
yeast_me2      	0.4322	0.3715	0.4592	0.4193	0.4120	0.4124	0.4389	0.4060
ozone_level    	0.3703	0.3381	0.3696	0.3618	0.3788	0.3943	0.4135	0.4007
abalone_19     	0.0767	0.0588	0.0894	0.0832	0.0705	0.0670	0.0608	0.0649


In [9]:
method_ranks = np.zeros(8)
print("Dataset\t\tBase\tMajDown\tMinOvr\tSMOTE\tZero\tMeanF\tGauss\tPerm")
for dataset, r in f1_scores.items():  
    ranks = ss.rankdata((-1)*np.array(r))
    print("%-15s\t%.d\t%.d\t%d\t%d\t%d\t%d\t%d\t%d" % (dataset, *ranks)) 
    for i, rank in enumerate(ranks):
        method_ranks[i] += rank
method_ranks /= 13   
print("="*80)
print("%-15s\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f" % ("", *method_ranks))      

Dataset		Base	MajDown	MinOvr	SMOTE	Zero	MeanF	Gauss	Perm
ecoli          	1	7	4	2	8	6	3	5
satimage       	1	8	3	4	5	2	7	6
abalone        	3	1	2	8	4	7	5	6
us_crime       	1	7	6	8	2	5	3	4
yeast_ml8      	8	2	4	7	1	3	6	5
scene          	7	6	4	5	8	3	2	1
coil_2000      	4	2	3	6	8	7	1	5
solar_flare_m0 	1	5	8	4	6	2	3	7
oil            	7	8	3	1	6	5	2	4
wine_quality   	1	8	5	7	2	6	3	4
yeast_me2      	3	8	1	4	6	5	2	7
ozone_level    	5	8	6	7	4	3	1	2
abalone_19     	3	8	1	2	4	5	7	6
               	3.46	6.00	3.85	5.00	4.92	4.54	3.46	4.77
