# Import needed libraries

## Import libraries for manipulating the data and statistic

In [142]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
from statsmodels.stats.weightstats import  ttest_ind as t_test
from scipy.stats import ttest_1samp, wilcoxon, ttest_ind, mannwhitneyu
import scipy.special as special
import emoji
from math import pi
from statsmodels.stats.multicomp import pairwise_tukeyhsd, MultiComparison
from statsmodels.formula.api import ols
import statsmodels.stats.api as sms

## Import libraries for static ploting

In [143]:
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
%matplotlib inline
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('png', 'pdf')
# some nice colors from http://colorbrewer2.org/
COLOR1 = '#7fc97f'
COLOR2 = '#beaed4'
COLOR3 = '#fdc086'
COLOR4 = '#ffff99'
COLOR5 = '#386cb0'

# Statring the test and visulize the data

## Load the data for pruning the weights using random expoloration

In [144]:
datafile = "all.csv"
datafileLeNet = "LecunPruningWeights.csv"
df1 = pd.read_csv(datafile)
dfLcun = pd.read_csv(datafileLeNet)
df1

Unnamed: 0,Dataset,Model,E.Greedy,WSLS,UCB1,KLUCB,BayUCB,OBD,OBS,Thom. Sam,Magnitude,random
0,banknote authentication,0.01,0.01,0.04,0.01,0.01,0.01,0.01,0.02,0.01,3.23,5.13
1,Blood Tra. Service Centre,0.08,0.08,0.2,0.08,0.08,0.08,0.08,0.08,0.08,0.44,0.08
2,Credit Approval,0.08,0.1,0.08,0.08,0.11,0.11,0.08,8.62,0.78,2.55,22.19
3,Haberman's Survival,0.09,0.08,0.09,0.08,0.08,0.08,0.08,0.08,0.08,0.63,0.65
4,Liver Disorders,0.1,0.1,0.11,0.1,0.1,0.1,0.1,0.85,0.1,0.62,0.15
5,MAGIC Gamma Tele.,0.06,0.1,0.32,0.06,0.06,0.06,0.06,0.12,0.06,2.49,0.43
6,Mammographic Mass,0.09,0.09,0.09,0.09,0.09,0.09,0.09,0.09,0.09,2.59,0.13
7,MONK's Problems,0.1,0.12,0.29,0.1,0.1,0.1,0.1,5.28,0.1,0.15,0.13
8,Connectionist Bench,0.12,0.29,0.73,0.4,0.5,0.5,0.12,0.12,1.07,0.16,0.16
9,Spambase,0.08,0.1,0.09,0.64,0.64,0.64,0.08,4.37,0.09,1.67,5.01


In [145]:
dfLcun

Unnamed: 0,Layer,Model,TS Prune half the weights,EG Prune half the weights,UCB1 Prune half the weights
0,FC,0.9906,0.993,0.9908,0.994
1,Conv,0.9906,0.991,0.9907,0.992


## Using Nonparametric tests

I am not sure the data comes from Guassian distribution  and less than 30 sample 

### alternative to paired t-test when data has an ordinary scale or when not
### normally distributed

## Start comparining all pruning algorithms

### Compute friedman test test by ranks between pruning methods

In [146]:
from scipy.stats import mstats
H, pval = mstats.friedmanchisquare(df1['Thom. Sam'], df1['BayUCB'],  df1['OBD'], 
                               df1['E.Greedy'], df1['WSLS'], 
                               df1['Magnitude'])
print("H-statistic:", H)
print("P-Value:", pval)
if pval < 0.05:
    print("Reject NULL hypothesis - Significant differences exist between groups.")
if pval > 0.05:
    print("Accept NULL hypothesis - No significant difference between groups.")

H-statistic: 34.6617647059
P-Value: 1.75755369209e-06
Reject NULL hypothesis - Significant differences exist between groups.


# Compute friedman test test by UCB1 ranks between pruning methods

In [147]:
from scipy.stats import mstats
H, pval = mstats.friedmanchisquare(df1['UCB1'], df1['BayUCB'],  df1['OBD'], 
                               df1['OBS'], 
                               df1['Magnitude'])
print("H-statistic:", H)
print("P-Value:", pval)
if pval < 0.05:
    print("Reject NULL hypothesis - Significant differences exist between groups.")
if pval > 0.05:
    print("Accept NULL hypothesis - No significant difference between groups.")

H-statistic: 26.8602150538
P-Value: 2.12152413603e-05
Reject NULL hypothesis - Significant differences exist between groups.


# Compute friedman test test by Thompson Sampling ranks between pruning methods

In [148]:
from scipy.stats import mstats
H, pval = mstats.friedmanchisquare(df1['Thom. Sam'], df1['OBS'],  df1['OBD'], 
                               df1['WSLS'], 
                               df1['Magnitude'])
print("H-statistic:", H)
print("P-Value:", pval)
if pval < 0.05:
    print("Reject NULL hypothesis - Significant differences exist between groups.")
if pval > 0.05:
    print("Accept NULL hypothesis - No significant difference between groups.")

H-statistic: 26.7102803738
P-Value: 2.27480277719e-05
Reject NULL hypothesis - Significant differences exist between groups.


# Compute friedman test test by Spsilon Greedy and WSLS ranks between pruning methods

In [149]:
from scipy.stats import mstats
H, pval = mstats.friedmanchisquare(df1['E.Greedy'], df1['WSLS'], df1['Magnitude'], df1['OBD'], df1['OBS'])
print("H-statistic:", H)
print("P-Value:", pval)
if pval < 0.05:
    print("Reject NULL hypothesis - Significant differences exist between groups.")
if pval > 0.05:
    print("Accept NULL hypothesis - No significant difference between groups.")

H-statistic: 27.5023041475
P-Value: 1.57317296651e-05
Reject NULL hypothesis - Significant differences exist between groups.


## Between our method and other methods separately as both are independent

In [150]:
print('UCB vs random Pruning')
H, pval = stats.ranksums(df1['UCB1'], df1['random'])
print ("H-statistic:\t%s\nP-value:\t%s" % (str(H),str(pval)))
if pval < 0.05:
    print("Reject NULL hypothesis - Significant differences exist between groups.")
if pval > 0.05:
    print("Accept NULL hypothesis - No significant difference between groups.")

UCB vs random Pruning
H-statistic:	-2.39600361714
P-value:	0.0165749267499
Reject NULL hypothesis - Significant differences exist between groups.


In [151]:
print('UCB vs Optimal Brain Damage')
H, pval = stats.ranksums(df1['UCB1'], df1['OBD'])
print ("H-statistic:\t%s\nP-value:\t%s" % (str(H),str(pval)))
if pval < 0.05:
    print("Reject NULL hypothesis - Significant differences exist between groups.")
if pval > 0.05:
    print("Accept NULL hypothesis - No significant difference between groups.")

UCB vs Optimal Brain Damage
H-statistic:	0.923760430703
P-value:	0.355611061095
Accept NULL hypothesis - No significant difference between groups.


In [152]:
print('UCB vs Optimal Brain Surgeon')
H, pval = stats.ranksums(df1['UCB1'], df1['OBS'])
print ("H-statistic:\t%s\nP-value:\t%s" % (str(H),str(pval)))
if pval < 0.05:
    print("Reject NULL hypothesis - Significant differences exist between groups.")
if pval > 0.05:
    print("Accept NULL hypothesis - No significant difference between groups.")

UCB vs Optimal Brain Surgeon
H-statistic:	-1.29903810568
P-value:	0.193930852282
Accept NULL hypothesis - No significant difference between groups.


In [153]:
print('UCB vs Deep Compression')
H, pval = stats.ranksums(df1['UCB1'], df1['Magnitude'])
print ("H-statistic:\t%s\nP-value:\t%s" % (str(H),str(pval)))
if pval < 0.05:
    print("Reject NULL hypothesis - Significant differences exist between groups.")
if pval > 0.05:
    print("Accept NULL hypothesis - No significant difference between groups.")

UCB vs Deep Compression
H-statistic:	-3.63730669589
P-value:	0.000275503811434
Reject NULL hypothesis - Significant differences exist between groups.


## ranksums test between KLUCB and other pruning methods.

In [154]:
print('KLUCB vs random Pruning')
H, pval = stats.ranksums(df1['KLUCB'], df1['random'])
print ("H-statistic:\t%s\nP-value:\t%s" % (str(H),str(pval)))
if pval < 0.05:
    print("Reject NULL hypothesis - Significant differences exist between groups.")
if pval > 0.05:
    print("Accept NULL hypothesis - No significant difference between groups.")

KLUCB vs random Pruning
H-statistic:	-2.30940107676
P-value:	0.0209213353378
Reject NULL hypothesis - Significant differences exist between groups.


In [155]:
print('KLUCB vs Optimal Brain Damage')
H, pval = stats.ranksums(df1['KLUCB'], df1['OBD'])
print ("H-statistic:\t%s\nP-value:\t%s" % (str(H),str(pval)))
if pval < 0.05:
    print("Reject NULL hypothesis - Significant differences exist between groups.")
if pval > 0.05:
    print("Accept NULL hypothesis - No significant difference between groups.")

KLUCB vs Optimal Brain Damage
H-statistic:	1.2124355653
P-value:	0.225345693702
Accept NULL hypothesis - No significant difference between groups.


In [156]:
print('KLUCB vs Optimal Brain Surgeon')
H, pval = stats.ranksums(df1['KLUCB'], df1['OBS'])
print ("H-statistic:\t%s\nP-value:\t%s" % (str(H),str(pval)))
if pval < 0.05:
    print("Reject NULL hypothesis - Significant differences exist between groups.")
if pval > 0.05:
    print("Accept NULL hypothesis - No significant difference between groups.")

KLUCB vs Optimal Brain Surgeon
H-statistic:	-1.18356805184
P-value:	0.236584093322
Accept NULL hypothesis - No significant difference between groups.


In [157]:
print('KLUCB vs Deep Compression')
H, pval = stats.ranksums(df1['KLUCB'], df1['Magnitude'])
print ("H-statistic:\t%s\nP-value:\t%s" % (str(H),str(pval)))
if pval < 0.05:
    print("Reject NULL hypothesis - Significant differences exist between groups.")
if pval > 0.05:
    print("Accept NULL hypothesis - No significant difference between groups.")

KLUCB vs Deep Compression
H-statistic:	-3.57957166898
P-value:	0.000344157853847
Reject NULL hypothesis - Significant differences exist between groups.


## ranksums test between BayUCB and other pruning methods.

In [158]:
print('BayUCB vs random Pruning')
H, pval = stats.ranksums(df1['BayUCB'], df1['random'])
print ("H-statistic:\t%s\nP-value:\t%s" % (str(H),str(pval)))
if pval < 0.05:
    print("Reject NULL hypothesis - Significant differences exist between groups.")
if pval > 0.05:
    print("Accept NULL hypothesis - No significant difference between groups.")

BayUCB vs random Pruning
H-statistic:	-2.30940107676
P-value:	0.0209213353378
Reject NULL hypothesis - Significant differences exist between groups.


In [159]:
print('BayUCB vs Optimal Brain Damage')
H, pval = stats.ranksums(df1['BayUCB'], df1['OBD'])
print ("H-statistic:\t%s\nP-value:\t%s" % (str(H),str(pval)))
if pval < 0.05:
    print("Reject NULL hypothesis - Significant differences exist between groups.")
if pval > 0.05:
    print("Accept NULL hypothesis - No significant difference between groups.")

BayUCB vs Optimal Brain Damage
H-statistic:	1.2124355653
P-value:	0.225345693702
Accept NULL hypothesis - No significant difference between groups.


In [160]:
print('BayUCB vs Optimal Brain Surgeon')
H, pval = stats.ranksums(df1['BayUCB'], df1['OBS'])
print ("H-statistic:\t%s\nP-value:\t%s" % (str(H),str(pval)))
if pval < 0.05:
    print("Reject NULL hypothesis - Significant differences exist between groups.")
if pval > 0.05:
    print("Accept NULL hypothesis - No significant difference between groups.")

BayUCB vs Optimal Brain Surgeon
H-statistic:	-1.18356805184
P-value:	0.236584093322
Accept NULL hypothesis - No significant difference between groups.


In [161]:
print('BayUCB vs Deep Compression')
H, pval = stats.ranksums(df1['BayUCB'], df1['Magnitude'])
print ("H-statistic:\t%s\nP-value:\t%s" % (str(H),str(pval)))
if pval < 0.05:
    print("Reject NULL hypothesis - Significant differences exist between groups.")
if pval > 0.05:
    print("Accept NULL hypothesis - No significant difference between groups.")

BayUCB vs Deep Compression
H-statistic:	-3.57957166898
P-value:	0.000344157853847
Reject NULL hypothesis - Significant differences exist between groups.


In [162]:
# Get all models pairs
interstModel = ['BayUCB', 'UCB1', 'KLUCB',
               'E.Greedy', 'WSLS', 'Thom. Sam']
lst = list(df1.columns.values)
lst.remove('Dataset')
model_pairs = []

for m1 in range(len(df1.columns)-2):
    for m2  in range(m1+1,len(df1.columns)-1):
        model_pairs.append((lst[m1], lst[m2]))

pvalueList = []
new_model_pairs = []
for m1, m2 in model_pairs: 
    print('\n',m1, m2)
    pvalue = stats.ranksums(df1[m1], df1[m2])
    #print(pvalue[1])
    if (m1 in interstModel or m2 in interstModel):
        new_model_pairs.append((m1,m2))
        pvalueList.append(pvalue[1])
    print(pvalue)   


 Model E.Greedy
RanksumsResult(statistic=-1.0103629710818449, pvalue=0.31232142167621613)

 Model WSLS
RanksumsResult(statistic=-2.3960036171369468, pvalue=0.01657492674985327)

 Model UCB1
RanksumsResult(statistic=-0.808290376865476, pvalue=0.4189234409463829)

 Model KLUCB
RanksumsResult(statistic=-1.1258330249197701, pvalue=0.26023620261718883)

 Model BayUCB
RanksumsResult(statistic=-1.1258330249197701, pvalue=0.26023620261718883)

 Model OBD
RanksumsResult(statistic=0.14433756729740643, pvalue=0.88523391447320166)

 Model OBS
RanksumsResult(statistic=-2.0207259421636898, pvalue=0.043308142810791976)

 Model Thom. Sam
RanksumsResult(statistic=-0.57735026918962573, pvalue=0.5637028616507731)

 Model Magnitude
RanksumsResult(statistic=-4.1569219381653051, pvalue=3.2256414562437673e-05)

 Model random
RanksumsResult(statistic=-3.233161507461904, pvalue=0.0012242832644312096)

 E.Greedy WSLS
RanksumsResult(statistic=-1.7320508075688772, pvalue=0.083264516663550406)

 E.Greedy UCB1
Ran

In [163]:
for pair, p in zip(new_model_pairs, pvalueList):
    if p < 0.05:
        print('The pvalue between',pair, 'is', p, '< 0.05 then', 
              emoji.emojize('REJECT the NULL Hypothesis :thumbs_up_sign:'))
    else:
        print('The pvalue between',pair, 'is', p, '> 0.05 then',
              emoji.emojize('FAIL to REJECT the NULL Hypothesis :thumbs_down_sign:'))

The pvalue between ('Model', 'E.Greedy') is 0.312321421676 > 0.05 then FAIL to REJECT the NULL Hypothesis 👎
The pvalue between ('Model', 'WSLS') is 0.0165749267499 < 0.05 then REJECT the NULL Hypothesis 👍
The pvalue between ('Model', 'UCB1') is 0.418923440946 > 0.05 then FAIL to REJECT the NULL Hypothesis 👎
The pvalue between ('Model', 'KLUCB') is 0.260236202617 > 0.05 then FAIL to REJECT the NULL Hypothesis 👎
The pvalue between ('Model', 'BayUCB') is 0.260236202617 > 0.05 then FAIL to REJECT the NULL Hypothesis 👎
The pvalue between ('Model', 'Thom. Sam') is 0.563702861651 > 0.05 then FAIL to REJECT the NULL Hypothesis 👎
The pvalue between ('E.Greedy', 'WSLS') is 0.0832645166636 > 0.05 then FAIL to REJECT the NULL Hypothesis 👎
The pvalue between ('E.Greedy', 'UCB1') is 0.953959692772 > 0.05 then FAIL to REJECT the NULL Hypothesis 👎
The pvalue between ('E.Greedy', 'KLUCB') is 0.686105956996 > 0.05 then FAIL to REJECT the NULL Hypothesis 👎
The pvalue between ('E.Greedy', 'BayUCB') is 0.6

## Prune LeCun Model

In [164]:
# Get all models pairs
interstModel = ['TS Prune half the weights', 'EG Prune half the weights', 
                'UCB1 Prune half the weights']
lst = list(dfLcun.columns.values)
lst.remove('Layer')
model_pairs = []

for m1 in range(len(dfLcun.columns)-2):
    for m2  in range(m1+1,len(dfLcun_ranked.columns)-1):
        model_pairs.append((lst[m1], lst[m2]))

# Conduct t-test on each pair
pvalueList = []
new_model_pairs = []
for m1, m2 in model_pairs: 
    print('\n',m1,'<--- VS --->', m2)
    pvalue = stats.ranksums(dfLcun_ranked[m1], dfLcun_ranked[m2])
    #print(pvalue[1])
    if (m1 in interstModel or m2 in interstModel):
        new_model_pairs.append((m1,m2))
        pvalueList.append(pvalue[1])
    print(pvalue)   


 Model <--- VS ---> TS Prune half the weights
RanksumsResult(statistic=-1.5491933384829668, pvalue=0.12133525035848211)

 Model <--- VS ---> EG Prune half the weights
RanksumsResult(statistic=-1.5491933384829668, pvalue=0.12133525035848211)

 Model <--- VS ---> UCB1 Prune half the weights
RanksumsResult(statistic=-1.5491933384829668, pvalue=0.12133525035848211)

 TS Prune half the weights <--- VS ---> EG Prune half the weights
RanksumsResult(statistic=1.5491933384829668, pvalue=0.12133525035848211)

 TS Prune half the weights <--- VS ---> UCB1 Prune half the weights
RanksumsResult(statistic=-1.5491933384829668, pvalue=0.12133525035848211)

 EG Prune half the weights <--- VS ---> UCB1 Prune half the weights
RanksumsResult(statistic=-1.5491933384829668, pvalue=0.12133525035848211)


In [165]:
for pair, p in zip(new_model_pairs, pvalueList):
    if p < 0.05:
        print('The pvalue between',pair, 'is', p, '< 0.05 then', 
              emoji.emojize('REJECT the NULL Hypothesis :thumbs_up_sign:'))
    else:
        print('The pvalue between',pair, 'is', p, '> 0.05 then',
              emoji.emojize('FAIL to REJECT the NULL Hypothesis :thumbs_down_sign:'))

The pvalue between ('Model', 'TS Prune half the weights') is 0.121335250358 > 0.05 then FAIL to REJECT the NULL Hypothesis 👎
The pvalue between ('Model', 'EG Prune half the weights') is 0.121335250358 > 0.05 then FAIL to REJECT the NULL Hypothesis 👎
The pvalue between ('Model', 'UCB1 Prune half the weights') is 0.121335250358 > 0.05 then FAIL to REJECT the NULL Hypothesis 👎
The pvalue between ('TS Prune half the weights', 'EG Prune half the weights') is 0.121335250358 > 0.05 then FAIL to REJECT the NULL Hypothesis 👎
The pvalue between ('TS Prune half the weights', 'UCB1 Prune half the weights') is 0.121335250358 > 0.05 then FAIL to REJECT the NULL Hypothesis 👎
The pvalue between ('EG Prune half the weights', 'UCB1 Prune half the weights') is 0.121335250358 > 0.05 then FAIL to REJECT the NULL Hypothesis 👎
