# Statistics

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind

pd.options.mode.chained_assignment = None

## Search strategy comparison

### Ablation

In [2]:
df = pd.read_json('experiments/results/nasbench201/cifar10/tier1absent/accuracies.json')
print(df.mean())
print(ttest_ind(df.mtnas, df.re, alternative='greater'))
print(ttest_ind(df.mtnas, df.rs, alternative='greater'))
print()

df = pd.read_json('experiments/results/nasbench201/cifar10/tier2absent/accuracies.json')
print(df.mean())
print(ttest_ind(df.mtnas, df.re, alternative='greater'))
print(ttest_ind(df.mtnas, df.rs, alternative='greater'))

mtnas    91.426250
re       91.241500
rs       90.842125
dtype: float64
Ttest_indResult(statistic=6.042331524711918, pvalue=5.2559089977953775e-09)
Ttest_indResult(statistic=17.877307026359638, pvalue=4.312771850302285e-40)

mtnas    91.280625
re       91.241500
rs       90.842125
dtype: float64
Ttest_indResult(statistic=1.1070503891769659, pvalue=0.13497736015456394)
Ttest_indResult(statistic=11.796581338527181, pvalue=9.780309661115345e-24)


### Trials

#### default

In [3]:
df = pd.read_json('experiments/results/nasbench201/cifar10/default/accuracies.json')
print(df.mean())
print(ttest_ind(df.mtnas, df.re, alternative='greater'))
print(ttest_ind(df.mtnas, df.rs, alternative='greater'))

mtnas    91.331875
re       91.241500
rs       90.842125
dtype: float64
Ttest_indResult(statistic=2.70478360863588, pvalue=0.003792298242765802)
Ttest_indResult(statistic=13.85699587468626, pvalue=2.1877451179047022e-29)


#### cifar100default

In [4]:
df = pd.read_json('experiments/results/nasbench201/cifar100/default/accuracies.json')
print(df.mean())
print(ttest_ind(df.mtnas, df.re, alternative='greater'))
print(ttest_ind(df.mtnas, df.rs, alternative='greater'))

mtnas    73.10050
re       72.99750
rs       71.72375
dtype: float64
Ttest_indResult(statistic=1.7873404624603846, pvalue=0.037899716841888656)
Ttest_indResult(statistic=14.72350171449106, pvalue=9.621701229917489e-32)


#### in120default

In [5]:
df = pd.read_json('experiments/results/nasbench201/ImageNet16-120/default/accuracies.json')
print(df.mean())
print(ttest_ind(df.mtnas, df.re, alternative='greater'))
print(ttest_ind(df.mtnas, df.rs, alternative='greater'))

mtnas    47.062083
re       46.743333
rs       45.827083
dtype: float64
Ttest_indResult(statistic=4.9380237526085295, pvalue=9.946582569053761e-07)
Ttest_indResult(statistic=16.4280295919051, pvalue=2.6233442342414757e-36)


#### tse

In [6]:
df = pd.read_json('experiments/results/nasbench201/cifar10/tier2sum/accuracies.json')
print(df.mean())
print(ttest_ind(df.mtnas, df.re, alternative='greater'))
print(ttest_ind(df.mtnas, df.rs, alternative='greater'))

mtnas    91.351375
re       91.241500
rs       90.842125
dtype: float64
Ttest_indResult(statistic=3.2189864769011, pvalue=0.0007806978420492226)
Ttest_indResult(statistic=14.13602738367062, pvalue=3.793075729529861e-30)


#### tier1high

In [7]:
df = pd.read_json('experiments/results/nasbench201/cifar10/tier1high/accuracies.json')
print(df.mean())
print(ttest_ind(df.mtnas, df.re, alternative='greater'))
print(ttest_ind(df.mtnas, df.rs, alternative='greater'))

mtnas    91.332375
re       91.241500
rs       90.842125
dtype: float64
Ttest_indResult(statistic=2.707642794292826, pvalue=0.003761246514229957)
Ttest_indResult(statistic=13.815925177455476, pvalue=2.832398069243961e-29)


#### tier2high

In [8]:
df = pd.read_json('experiments/results/nasbench201/cifar10/tier2high/accuracies.json')
print(df.mean())
print(ttest_ind(df.mtnas, df.re, alternative='greater'))
print(ttest_ind(df.mtnas, df.rs, alternative='greater'))

mtnas    91.349875
re       91.241500
rs       90.842125
dtype: float64
Ttest_indResult(statistic=3.1921788577755947, pvalue=0.000852001849426781)
Ttest_indResult(statistic=14.162629557596885, pvalue=3.210216856460548e-30)


#### equal

In [9]:
df = pd.read_json('experiments/results/nasbench201/cifar10/equal/accuracies.json')
print(df.mean())
print(ttest_ind(df.mtnas, df.re, alternative='greater'))
print(ttest_ind(df.mtnas, df.rs, alternative='greater'))

mtnas    91.387125
re       91.241500
rs       90.842125
dtype: float64
Ttest_indResult(statistic=4.186463066247939, pvalue=2.3459116518486994e-05)
Ttest_indResult(statistic=14.87336450080814, pvalue=3.782575488584805e-32)


#### disjoint

In [10]:
df = pd.read_json('experiments/results/nasbench201/cifar10/disjoint/accuracies.json')
print(df.mean())
print(ttest_ind(df.mtnas, df.re, alternative='greater'))
print(ttest_ind(df.mtnas, df.rs, alternative='greater'))

mtnas    91.288750
re       91.241500
rs       90.842125
dtype: float64
Ttest_indResult(statistic=1.3143969670827855, pvalue=0.09530876290963342)
Ttest_indResult(statistic=11.831507621793492, pvalue=7.844047879388568e-24)


#### reevaluate

In [11]:
df = pd.read_json('experiments/results/nasbench201/cifar10/reevaluate/accuracies.json')
print(df.mean())
print(ttest_ind(df.mtnas, df.re, alternative='greater'))
print(ttest_ind(df.mtnas, df.rs, alternative='greater'))

mtnas    91.372750
re       91.241500
rs       90.842125
dtype: float64
Ttest_indResult(statistic=3.748931208197849, pvalue=0.00012435073223082557)
Ttest_indResult(statistic=14.397023513863479, pvalue=7.394124840632757e-31)


#### adjusted reevaluate

In [12]:
df = pd.read_json('experiments/results/nasbench201/cifar10/adj_reevaluate/accuracies.json')
print(df.mean())
print(ttest_ind(df.mtnas, df.re, alternative='greater'))
print(ttest_ind(df.mtnas, df.rs, alternative='greater'))

mtnas    91.305875
re       91.241500
rs       90.842125
dtype: float64
Ttest_indResult(statistic=1.9643504937546357, pvalue=0.025621897027183906)
Ttest_indResult(statistic=13.350150441221414, pvalue=5.3251860508014255e-28)


## Averages

In [13]:
def per_epoch_averages(paths):
    dfs = list()
    for path in paths:
        df = pd.read_json(path).dropna(axis=1, how='all')
        df.columns = pd.MultiIndex.from_tuples(df.columns.map(eval)).rename(['algorithm', 'seed'], level=[0, 1])
        df.index.rename('epoch', inplace=True)
        df = df.stack([0, 1]).groupby(['epoch', 'algorithm']).mean().unstack(1).drop(('estm', 're'), axis=1)
        df.true.re[50:50 + df.true.re.dropna().shape[0]] = df.true.re.dropna()
        df.true.re[:50] = np.nan
        dfs.append(df)
    return dfs

In [14]:
t2h, dflt = per_epoch_averages(('experiments/results/nasbench201/cifar10/tier2high/averages.json',
                                'experiments/results/nasbench201/cifar10/default/averages.json'))

In [15]:
ttest_ind(t2h.true.mtnas, dflt.true.mtnas)

Ttest_indResult(statistic=0.12343171690053982, pvalue=0.9018065780669378)