In [17]:
import pickle
import numpy as np
import pandas as pd
from scipy import stats

In [18]:
TRAIN_SIZE_LIST = [500, 5000, 50000, 162946]

In [47]:
def t_test(MODEL_LIST, result_dict):
    for TRAIN_SIZE_ID in range(4):
        a_list = []
        for MODEL_ID in range(len(MODEL_LIST)):
            a_list.append(result_dict[MODEL_ID]['mean'][TRAIN_SIZE_ID])
        first, second = np.argsort(a_list)[::-1][:2]
        p_value = stats.ttest_rel(result_dict[first][TRAIN_SIZE_ID], 
                              result_dict[second][TRAIN_SIZE_ID], alternative='greater')[1]
        if p_value > 0.05: 
            print('first:', MODEL_LIST[first], 'first:', MODEL_LIST[second], '__', np.round(p_value, 4))
        elif p_value > 0.01:
            print('first:', MODEL_LIST[first], 'first:', MODEL_LIST[second], '*', np.round(p_value, 4))
        else:
            print('first:', MODEL_LIST[first], 'first:', MODEL_LIST[second], '**', np.round(p_value, 4))

# Preliminary Experiments

## MFE compare

In [19]:
MODEL_LIST = ['LR', 'SVM', 'RF', 'GBM', 'FNN']

In [20]:
macro_dict = {MODEL_ID: {TRAIN_SIZE_ID: [] for TRAIN_SIZE_ID in [0,1,2,3, 'mean', 'std']} for MODEL_ID in range(5)}
micro_dict = {MODEL_ID: {TRAIN_SIZE_ID: [] for TRAIN_SIZE_ID in [0,1,2,3, 'mean', 'std']} for MODEL_ID in range(5)}
cm_dict = {MODEL_ID: {TRAIN_SIZE_ID: {'total':[],'summary': np.zeros((9,9))} for TRAIN_SIZE_ID in range(4)} for MODEL_ID in range(5)}

for MODEL_ID in range(5):
    for TRAIN_SIZE_ID in range(4):
        TRAIN_SIZE = TRAIN_SIZE_LIST[TRAIN_SIZE_ID]
        for REP_ID in range(10):
            filename = '../result/MFE/WMPC_'+'MFE_'+str(MODEL_ID)+'_'+str(TRAIN_SIZE)+'_'+str(REP_ID)+'_'
            with open(filename+'f1_score.pickle', 'rb') as f:
                macro, micro, cm = pickle.load(f)
            macro_dict[MODEL_ID][TRAIN_SIZE_ID].append(macro)
            micro_dict[MODEL_ID][TRAIN_SIZE_ID].append(micro)
            cm_dict[MODEL_ID][TRAIN_SIZE_ID]['total'].append(cm)
            cm_dict[MODEL_ID][TRAIN_SIZE_ID]['summary'] = cm_dict[MODEL_ID][TRAIN_SIZE_ID]['summary'][0] + cm
        macro_dict[MODEL_ID]['mean'].append(np.mean(macro_dict[MODEL_ID][TRAIN_SIZE_ID]))
        macro_dict[MODEL_ID]['std'].append(np.std(macro_dict[MODEL_ID][TRAIN_SIZE_ID]))
        micro_dict[MODEL_ID]['mean'].append(np.mean(micro_dict[MODEL_ID][TRAIN_SIZE_ID]))
        micro_dict[MODEL_ID]['std'].append(np.std(micro_dict[MODEL_ID][TRAIN_SIZE_ID]))     

In [21]:
macro_df = {'TS': TRAIN_SIZE_LIST}
for MODEL_ID in range(5):
    model_name = MODEL_LIST[MODEL_ID]
    macro_df[model_name] = [str(np.round(macro_dict[MODEL_ID]['mean'],4)[i]) +'+-'+ 
                            str(np.round(macro_dict[MODEL_ID]['std'],4)[i]) for i in range(4)]
pd.DataFrame(macro_df)

Unnamed: 0,TS,LR,SVM,RF,GBM,FNN
0,500,0.5787+-0.0259,0.3689+-0.0357,0.539+-0.0418,0.5065+-0.0234,0.5558+-0.0376
1,5000,0.7078+-0.0307,0.542+-0.0287,0.6655+-0.0176,0.6527+-0.0279,0.6983+-0.0371
2,50000,0.7988+-0.017,0.7636+-0.0166,0.7965+-0.0089,0.7797+-0.0138,0.831+-0.0109
3,162946,0.8143+-0.0133,0.8254+-0.0127,0.8345+-0.0126,0.8112+-0.0123,0.8599+-0.0117


In [22]:
for TRAIN_SIZE_ID in range(4):
    a_list = []
    for MODEL_ID in range(5):
        a_list.append(macro_dict[MODEL_ID]['mean'][TRAIN_SIZE_ID])
    first, second = np.argsort(a_list)[::-1][:2]
    p_value = stats.ttest_rel(macro_dict[first][TRAIN_SIZE_ID], 
                          macro_dict[second][TRAIN_SIZE_ID], alternative='greater')[1]
    if p_value > 0.05: 
        print('first:', MODEL_LIST[first], 'first:', MODEL_LIST[second], '__', np.round(p_value, 4))
    elif p_value > 0.01:
        print('first:', MODEL_LIST[first], 'first:', MODEL_LIST[second], '*', np.round(p_value, 4))
    else:
        print('first:', MODEL_LIST[first], 'first:', MODEL_LIST[second], '**', np.round(p_value, 4))

first: LR first: FNN * 0.0184
first: LR first: FNN __ 0.2906
first: FNN first: LR ** 0.0009
first: FNN first: RF ** 0.0006


In [23]:
micro_df = {'TS': TRAIN_SIZE_LIST}
for MODEL_ID in range(5):
    model_name = MODEL_LIST[MODEL_ID]
    micro_df[model_name] = [str(np.round(micro_dict[MODEL_ID]['mean'],4)[i]) +'+-'+ 
                            str(np.round(micro_dict[MODEL_ID]['std'],4)[i]) for i in range(4)]
pd.DataFrame(micro_df)

Unnamed: 0,TS,LR,SVM,RF,GBM,FNN
0,500,0.9401+-0.0033,0.9244+-0.0033,0.9377+-0.0036,0.9286+-0.0033,0.9415+-0.0044
1,5000,0.9581+-0.0013,0.9494+-0.0022,0.9555+-0.0021,0.9514+-0.0019,0.96+-0.0011
2,50000,0.9657+-0.0017,0.9662+-0.0013,0.966+-0.0018,0.965+-0.0011,0.9712+-0.001
3,162946,0.967+-0.0014,0.9718+-0.001,0.9703+-0.0014,0.9683+-0.0013,0.9741+-0.001


In [24]:
for TRAIN_SIZE_ID in range(4):
    a_list = []
    for MODEL_ID in range(5):
        a_list.append(micro_dict[MODEL_ID]['mean'][TRAIN_SIZE_ID])
    first, second = np.argsort(a_list)[::-1][:2]
    p_value = stats.ttest_rel(micro_dict[first][TRAIN_SIZE_ID], 
                          micro_dict[second][TRAIN_SIZE_ID], alternative='greater')[1]
    if p_value > 0.05: 
        print('first:', MODEL_LIST[first], 'first:', MODEL_LIST[second], '__', np.round(p_value, 5))
    elif p_value > 0.01:
        print('first:', MODEL_LIST[first], 'first:', MODEL_LIST[second], '*', np.round(p_value, 5))
    else:
        print('first:', MODEL_LIST[first], 'first:', MODEL_LIST[second], '**', np.round(p_value, 5))

first: FNN first: LR __ 0.1566
first: FNN first: LR ** 0.00082
first: FNN first: SVM ** 0.0
first: FNN first: SVM ** 0.0001


## CNN compare

In [25]:
MODEL_LIST = ['VGGNet', 'ResNet', 'DenseNet']

In [26]:
macro_dict = {MODEL_ID: {TRAIN_SIZE_ID: [] for TRAIN_SIZE_ID in [0,1,2,3, 'mean', 'std']} for MODEL_ID in range(3)}
micro_dict = {MODEL_ID: {TRAIN_SIZE_ID: [] for TRAIN_SIZE_ID in [0,1,2,3, 'mean', 'std']} for MODEL_ID in range(3)}
cm_dict = {MODEL_ID: {TRAIN_SIZE_ID: {'total':[],'summary': np.zeros((9,9))} for TRAIN_SIZE_ID in range(4)} for MODEL_ID in range(3)}

for MODEL_ID in range(3):
    for TRAIN_SIZE_ID in range(4):
        TRAIN_SIZE = TRAIN_SIZE_LIST[TRAIN_SIZE_ID]
        for REP_ID in range(9):
            filename = '../result/CNN/WMPC_'+'CNN_'+str(MODEL_ID)+'_'+str(TRAIN_SIZE)+'_'+str(REP_ID)+'_'
            with open(filename+'f1_score.pickle', 'rb') as f:
                macro, micro, cm = pickle.load(f)
            macro_dict[MODEL_ID][TRAIN_SIZE_ID].append(macro)
            micro_dict[MODEL_ID][TRAIN_SIZE_ID].append(micro)
            cm_dict[MODEL_ID][TRAIN_SIZE_ID]['total'].append(cm)
            cm_dict[MODEL_ID][TRAIN_SIZE_ID]['summary'] = cm_dict[MODEL_ID][TRAIN_SIZE_ID]['summary'][0] + cm
        macro_dict[MODEL_ID]['mean'].append(np.mean(macro_dict[MODEL_ID][TRAIN_SIZE_ID]))
        macro_dict[MODEL_ID]['std'].append(np.std(macro_dict[MODEL_ID][TRAIN_SIZE_ID]))
        micro_dict[MODEL_ID]['mean'].append(np.mean(micro_dict[MODEL_ID][TRAIN_SIZE_ID]))
        micro_dict[MODEL_ID]['std'].append(np.std(micro_dict[MODEL_ID][TRAIN_SIZE_ID]))     

In [27]:
macro_df = {'TS': TRAIN_SIZE_LIST}
for MODEL_ID in range(3):
    model_name = MODEL_LIST[MODEL_ID]
    macro_df[model_name] = [str(np.round(macro_dict[MODEL_ID]['mean'],4)[i]) +'+-'+ 
                            str(np.round(macro_dict[MODEL_ID]['std'],4)[i]) for i in range(4)]
pd.DataFrame(macro_df)

Unnamed: 0,TS,VGGNet,ResNet,DenseNet
0,500,0.491+-0.0672,0.3809+-0.0846,0.3931+-0.0481
1,5000,0.6996+-0.0422,0.6466+-0.0225,0.6767+-0.0425
2,50000,0.8398+-0.0225,0.8167+-0.0196,0.8267+-0.0127
3,162946,0.8685+-0.0131,0.8714+-0.0143,0.8802+-0.0171


In [46]:
t_test(MODEL_LIST, macro_dict)

first: VGGNet first: DenseNet __ 0.265
first: DenseNet first: VGGNet __ 0.3891
first: ResNet first: VGGNet __ 0.1527
first: ResNet first: VGGNet __ 0.0781


In [29]:
for TRAIN_SIZE_ID in range(4):
    a_list = []
    for MODEL_ID in range(3):
        a_list.append(macro_dict[MODEL_ID]['mean'][TRAIN_SIZE_ID])
    first, second = np.argsort(a_list)[::-1][:2]
    p_value = stats.ttest_rel(macro_dict[first][TRAIN_SIZE_ID], 
                          macro_dict[second][TRAIN_SIZE_ID], alternative='greater')[1]
    if p_value > 0.05: 
        print('first:', MODEL_LIST[first], 'first:', MODEL_LIST[second], '__', np.round(p_value, 4))
    elif p_value > 0.01:
        print('first:', MODEL_LIST[first], 'first:', MODEL_LIST[second], '*', np.round(p_value, 4))
    else:
        print('first:', MODEL_LIST[first], 'first:', MODEL_LIST[second], '**', np.round(p_value, 4))

first: VGGNet first: DenseNet ** 0.0008
first: VGGNet first: DenseNet __ 0.1134
first: VGGNet first: DenseNet __ 0.0736
first: DenseNet first: ResNet __ 0.1624


In [30]:
micro_df = {'TS': TRAIN_SIZE_LIST}
for MODEL_ID in range(3):
    model_name = MODEL_LIST[MODEL_ID]
    micro_df[model_name] = [str(np.round(micro_dict[MODEL_ID]['mean'],4)[i]) +'+-'+ 
                            str(np.round(micro_dict[MODEL_ID]['std'],4)[i]) for i in range(4)]
pd.DataFrame(micro_df)

Unnamed: 0,TS,VGGNet,ResNet,DenseNet
0,500,0.9285+-0.0095,0.9151+-0.0079,0.9171+-0.0056
1,5000,0.9607+-0.0028,0.9455+-0.0037,0.9516+-0.0044
2,50000,0.9729+-0.0024,0.9677+-0.0032,0.9693+-0.0019
3,162946,0.9776+-0.0013,0.9757+-0.0013,0.9766+-0.0016


In [31]:
for TRAIN_SIZE_ID in range(4):
    a_list = []
    for MODEL_ID in range(3):
        a_list.append(micro_dict[MODEL_ID]['mean'][TRAIN_SIZE_ID])
    first, second = np.argsort(a_list)[::-1][:2]
    p_value = stats.ttest_rel(micro_dict[first][TRAIN_SIZE_ID], 
                          micro_dict[second][TRAIN_SIZE_ID], alternative='greater')[1]
    if p_value > 0.05: 
        print('first:', MODEL_LIST[first], 'first:', MODEL_LIST[second], '__', np.round(p_value, 5))
    elif p_value > 0.01:
        print('first:', MODEL_LIST[first], 'first:', MODEL_LIST[second], '*', np.round(p_value, 5))
    else:
        print('first:', MODEL_LIST[first], 'first:', MODEL_LIST[second], '**', np.round(p_value, 5))

first: VGGNet first: DenseNet ** 0.002
first: VGGNet first: DenseNet ** 0.00076
first: VGGNet first: DenseNet ** 0.00437
first: VGGNet first: DenseNet * 0.01922


# Main Experiment

In [38]:
MODE_LIST = ['MFE+FNN', 'CNN', 'MultiNN', 'Stacking-DT', 'Stacking-FNN', 'Stacking-MLR']

In [40]:
macro_dict = {MODE_ID: {TRAIN_SIZE_ID: [] for TRAIN_SIZE_ID in [0,1,2,3, 'mean', 'std']} 
              for MODE_ID in range(6)}
micro_dict = {MODE_ID: {TRAIN_SIZE_ID: [] for TRAIN_SIZE_ID in [0,1,2,3, 'mean', 'std']} 
              for MODE_ID in range(6)}
cm_dict = {MODE_ID: {TRAIN_SIZE_ID: {'total':[],'summary': np.zeros((9,9))} for TRAIN_SIZE_ID in range(4)} 
           for MODE_ID in range(6)}

for MODE_ID in range(6):
    for TRAIN_SIZE_ID in range(4):
        TRAIN_SIZE = TRAIN_SIZE_LIST[TRAIN_SIZE_ID]
        for REP_ID in range(10):
            if MODE_ID == 0: MODE, MODEL_ID = 'MFE', 4
            elif MODE_ID == 1: MODE, MODEL_ID = 'CNN', 0
            elif MODE_ID == 2: MODE, MODEL_ID = 'MultiNN', 0
            elif MODE_ID == 3: MODE, MODEL_ID = 'Stacking', 1
            elif MODE_ID == 4: MODE, MODEL_ID = 'Stacking', 2
            elif MODE_ID == 5: MODE, MODEL_ID = 'Stacking', 0
            filename = '../result/'+ MODE +'/WMPC_'+ MODE +'_'+str(MODEL_ID)+'_'+str(TRAIN_SIZE)+'_'+str(REP_ID)+'_'
            with open(filename+'f1_score.pickle', 'rb') as f:
                macro, micro, cm = pickle.load(f)
            macro_dict[MODE_ID][TRAIN_SIZE_ID].append(macro)
            micro_dict[MODE_ID][TRAIN_SIZE_ID].append(micro)
            cm_dict[MODE_ID][TRAIN_SIZE_ID]['total'].append(cm)
            cm_dict[MODE_ID][TRAIN_SIZE_ID]['summary'] = cm_dict[MODE_ID][TRAIN_SIZE_ID]['summary'][0] + cm
        macro_dict[MODE_ID]['mean'].append(np.mean(macro_dict[MODE_ID][TRAIN_SIZE_ID]))
        macro_dict[MODE_ID]['std'].append(np.std(macro_dict[MODE_ID][TRAIN_SIZE_ID]))
        micro_dict[MODE_ID]['mean'].append(np.mean(micro_dict[MODE_ID][TRAIN_SIZE_ID]))
        micro_dict[MODE_ID]['std'].append(np.std(micro_dict[MODE_ID][TRAIN_SIZE_ID]))   

In [41]:
macro_df = {'TS': TRAIN_SIZE_LIST}
for MODE_ID in range(6):
    model_name = MODE_LIST[MODE_ID]
    macro_df[model_name] = [str(np.round(macro_dict[MODE_ID]['mean'],4)[i]) +'+-'+ 
                            str(np.round(macro_dict[MODE_ID]['std'],4)[i]) for i in range(4)]
pd.DataFrame(macro_df)

Unnamed: 0,TS,MFE+FNN,CNN,MultiNN,Stacking-DT,Stacking-FNN,Stacking-MLR
0,500,0.5558+-0.0376,0.4937+-0.0642,0.545+-0.0654,0.5068+-0.0427,0.5261+-0.0564,0.5872+-0.0542
1,5000,0.6983+-0.0371,0.6954+-0.042,0.7018+-0.0086,0.7184+-0.0203,0.7328+-0.0472,0.7599+-0.0202
2,50000,0.831+-0.0109,0.8397+-0.0213,0.8089+-0.0141,0.8459+-0.0141,0.8742+-0.0082,0.8686+-0.0078
3,162946,0.8599+-0.0117,0.8679+-0.0126,0.8372+-0.0247,0.88+-0.008,0.8991+-0.0098,0.8949+-0.0121


In [None]:

TS	MFE+FNN	CNN	MultiNN	Stacking-DT	Stacking-FNN	Stacking-MLR
0	500	0.5558+-0.0376	0.4937+-0.0642	0.545+-0.0654	0.5177+-0.043	0.5261+-0.0564	0.5872+-0.0542
1	5000	0.6983+-0.0371	0.6954+-0.042	0.7018+-0.0086	0.7213+-0.0233	0.7328+-0.0472	0.7599+-0.0202
2	50000	0.831+-0.0109	0.8397+-0.0213	0.8089+-0.0141	0.8422+-0.0118	0.8742+-0.0082	0.8686+-0.0078
3	162946	0.8599+-0.0117	0.8679+-0.0126	0.8372+-0.0247	0.8799+-0.0082	0.8991+-0.0098	0.8949+-0.0121

In [42]:
for TRAIN_SIZE_ID in range(4):
    a_list = []
    for MODE_ID in range(6):
        a_list.append(macro_dict[MODE_ID]['mean'][TRAIN_SIZE_ID])
    first, second = np.argsort(a_list)[::-1][:2]
    p_value = stats.ttest_rel(macro_dict[first][TRAIN_SIZE_ID], 
                          macro_dict[second][TRAIN_SIZE_ID], alternative='greater')[1]
    if p_value > 0.05: 
        print('first:', MODE_LIST[first], 'first:', MODE_LIST[second], '__', np.round(p_value, 4))
    elif p_value > 0.01:
        print('first:', MODE_LIST[first], 'first:', MODE_LIST[second], '*', np.round(p_value, 4))
    else:
        print('first:', MODE_LIST[first], 'first:', MODE_LIST[second], '**', np.round(p_value, 4))

first: Stacking-MLR first: MFE+FNN * 0.0414
first: Stacking-MLR first: Stacking-FNN * 0.0237
first: Stacking-FNN first: Stacking-MLR __ 0.0617
first: Stacking-FNN first: Stacking-MLR __ 0.1008


In [43]:
micro_df = {'TS': TRAIN_SIZE_LIST}
for MODE_ID in range(6):
    model_name = MODE_LIST[MODE_ID]
    micro_df[model_name] = [str(np.round(micro_dict[MODE_ID]['mean'],4)[i]) +'+-'+ 
                            str(np.round(micro_dict[MODE_ID]['std'],4)[i]) for i in range(4)]
pd.DataFrame(micro_df)

Unnamed: 0,TS,MFE+FNN,CNN,MultiNN,Stacking-DT,Stacking-FNN,Stacking-MLR
0,500,0.9415+-0.0044,0.9292+-0.0092,0.9367+-0.0062,0.9347+-0.0042,0.9447+-0.0035,0.9462+-0.0042
1,5000,0.96+-0.0011,0.9607+-0.0027,0.9578+-0.0017,0.9606+-0.0026,0.9674+-0.002,0.9664+-0.0018
2,50000,0.9712+-0.001,0.9731+-0.0023,0.9693+-0.002,0.9723+-0.0023,0.978+-0.0009,0.9772+-0.0013
3,162946,0.9741+-0.001,0.9775+-0.0014,0.9731+-0.0015,0.9757+-0.0012,0.9809+-0.0008,0.9801+-0.001


In [44]:
for TRAIN_SIZE_ID in range(4):
    a_list = []
    for MODE_ID in range(6):
        a_list.append(micro_dict[MODE_ID]['mean'][TRAIN_SIZE_ID])
    first, second = np.argsort(a_list)[::-1][:2]
    p_value = stats.ttest_rel(micro_dict[first][TRAIN_SIZE_ID], 
                          micro_dict[second][TRAIN_SIZE_ID], alternative='greater')[1]
    if p_value > 0.05: 
        print('first:', MODE_LIST[first], 'first:', MODE_LIST[second], '__', np.round(p_value, 5))
    elif p_value > 0.01:
        print('first:', MODE_LIST[first], 'first:', MODE_LIST[second], '*', np.round(p_value, 5))
    else:
        print('first:', MODE_LIST[first], 'first:', MODE_LIST[second], '**', np.round(p_value, 5))

first: Stacking-MLR first: Stacking-FNN __ 0.1405
first: Stacking-FNN first: Stacking-MLR * 0.01531
first: Stacking-FNN first: Stacking-MLR * 0.01724
first: Stacking-FNN first: Stacking-MLR * 0.01466
