In [12]:
import os
import numpy as np 
import pandas as pd 
from subprocess import check_output
import matplotlib.pyplot as plt

# Data Load

In [13]:
sub_path = "data/submissions"
all_files = os.listdir(sub_path)
all_files

['fastai_256px_full_noclamp.csv',
 'stacks',
 'submission_352px_full_tta.csv',
 'submission_512px_full_noclamp.csv',
 'submission_efficientnet.csv',
 'submission_efficientnet2.csv',
 'submission_fastai_352px_full.csv',
 'submission_fastai_352px_full_noclamp.csv',
 'submission_fastai_512px_full.csv',
 'submission_fastai_original.csv']

In [14]:
selected_files = ['fastai_256px_full_noclamp.csv',
                  'submission_fastai_352px_full_noclamp.csv',
                  'submission_512px_full_noclamp.csv']
                  #'submission_efficientnet2.csv',
                  #'submission_efficientnet.csv'

In [15]:
# Read and concatenate submissions
outs = [pd.read_csv(os.path.join(sub_path, f), index_col=0) for f in selected_files]
concat_sub = pd.concat(outs, axis=1)
cols = list(map(lambda x: "rnsa" + str(x), range(len(concat_sub.columns))))
concat_sub.columns = cols
concat_sub.reset_index(inplace=True)
ncol = concat_sub.shape[1]
concat_sub.head()

Unnamed: 0,ID,rnsa0,rnsa1,rnsa2
0,ID_e3674b189_any,0.000629,0.000609,0.004737
1,ID_e3674b189_epidural,9.7e-05,9e-05,0.000212
2,ID_e3674b189_intraparenchymal,0.000208,0.000285,0.000339
3,ID_e3674b189_intraventricular,1.9e-05,1.9e-05,3e-05
4,ID_e3674b189_subarachnoid,5.2e-05,0.000144,0.000155


In [16]:
# check correlation
concat_sub.iloc[:,1:ncol].corr()

Unnamed: 0,rnsa0,rnsa1,rnsa2
rnsa0,1.0,0.971374,0.951366
rnsa1,0.971374,1.0,0.967945
rnsa2,0.951366,0.967945,1.0


In [17]:
concat_sub.rename(index=str, columns={"index": "ID"}, inplace=True)

In [18]:
# get the data fields ready for stacking
concat_sub['rsna_max'] = concat_sub.iloc[:, 1:ncol].max(axis=1)
concat_sub['rsna_min'] = concat_sub.iloc[:, 1:ncol].min(axis=1)
concat_sub['rsna_mean'] = concat_sub.iloc[:, 1:ncol].mean(axis=1)
concat_sub['rsna_median'] = concat_sub.iloc[:, 1:ncol].median(axis=1)

In [19]:
concat_sub.iloc[:, 1:ncol].describe()

Unnamed: 0,rnsa0,rnsa1,rnsa2
count,471270.0,471270.0,471270.0
mean,0.054521,0.050757,0.053082
std,0.190148,0.185376,0.18763
min,0.0,0.0,0.0
25%,4.1e-05,2.4e-05,6.5e-05
50%,0.000392,0.000191,0.000486
75%,0.004682,0.002463,0.00483
max,0.999894,0.999891,0.999999


# Median Stacking

In [18]:
concat_sub['Label'] = concat_sub['rsna_median']
concat_sub[['ID', 'Label']].to_csv('data/submissions/stacks/subs_median.csv', index=False)

# Mean Stacking

In [20]:
concat_sub['Label'] = concat_sub['rsna_mean']
concat_sub[['ID', 'Label']].to_csv('data/submissions/stacks/mean_stack_final1_fastai_only.csv', index=False)

### Weighted Average

In [58]:
selected_files

['submission_efficient_net.csv',
 'submission_inceptionv3.csv',
 'submission_original.csv']

In [59]:
sub_efficient_net = pd.read_csv(os.path.join(sub_path, selected_files[0]), index_col=0)
sub_inceptionv3 = pd.read_csv(os.path.join(sub_path, selected_files[1]), index_col=0)
sub_fastai = pd.read_csv(os.path.join(sub_path, selected_files[2]), index_col=0)

In [60]:
cc = sub_fastai.copy()

In [61]:
cc['Label'] = (0.2*sub_fastai['Label']
             + 0.4*sub_inceptionv3['Label']
             + 0.4*sub_efficient_net['Label'])

In [62]:
print(cc['Label'].mean()-sub_fastai['Label'].mean())
print(cc['Label'].mean()-sub_inceptionv3['Label'].mean())
print(cc['Label'].mean()-sub_efficient_net['Label'].mean())

0.004855821950315385
-0.000620620574337892
-0.0018072904008283944


In [63]:
cc.to_csv('data/submissions/stacks/leaderboard_weighted_average2.csv')

In [56]:
cc.head()

Unnamed: 0_level_0,Label
ID,Unnamed: 1_level_1
ID_e3674b189_any,0.002339
ID_e3674b189_epidural,2.8e-05
ID_e3674b189_intraparenchymal,0.000302
ID_e3674b189_intraventricular,0.000107
ID_e3674b189_subarachnoid,0.00134
