In [64]:
import os
import numpy as np 
import pandas as pd 
from subprocess import check_output
import matplotlib.pyplot as plt

# Data Load

In [65]:
sub_path = "data/submissions"
all_files = os.listdir(sub_path)
all_files

['leaderboard_subs_mean.csv',
 'leaderboard_subs_median.csv',
 'leaderboard_weighted_average.csv',
 'leaderboard_weighted_average2.csv',
 'submission_efficient_net.csv',
 'submission_inceptionv3.csv',
 'submission_original.csv']

In [66]:
selected_files = ['submission_efficient_net.csv',
                  'submission_inceptionv3.csv',
                  'submission_original.csv']

In [67]:
# Read and concatenate submissions
outs = [pd.read_csv(os.path.join(sub_path, f), index_col=0) for f in selected_files]
concat_sub = pd.concat(outs, axis=1)
cols = list(map(lambda x: "rnsa" + str(x), range(len(concat_sub.columns))))
concat_sub.columns = cols
concat_sub.reset_index(inplace=True)
ncol = concat_sub.shape[1]
concat_sub.head()

Unnamed: 0,ID,rnsa0,rnsa1
0,ID_000012eaf_any,0.126943,0.071976
1,ID_000012eaf_epidural,0.003692,0.001348
2,ID_000012eaf_intraparenchymal,0.01559,0.023166
3,ID_000012eaf_intraventricular,0.00417,0.005164
4,ID_000012eaf_subarachnoid,0.010137,0.011681


In [68]:
# check correlation
concat_sub.iloc[:,1:ncol].corr()

Unnamed: 0,rnsa0,rnsa1
rnsa0,1.0,0.954013
rnsa1,0.954013,1.0


In [69]:
concat_sub.rename(index=str, columns={"index": "ID"}, inplace=True)

In [70]:
# get the data fields ready for stacking
concat_sub['rsna_max'] = concat_sub.iloc[:, 1:ncol].max(axis=1)
concat_sub['rsna_min'] = concat_sub.iloc[:, 1:ncol].min(axis=1)
concat_sub['rsna_mean'] = concat_sub.iloc[:, 1:ncol].mean(axis=1)
concat_sub['rsna_median'] = concat_sub.iloc[:, 1:ncol].median(axis=1)

In [71]:
concat_sub.iloc[:, 1:ncol].describe()

Unnamed: 0,rnsa0,rnsa1
count,471270.0,471270.0
mean,0.05670364,0.055517
std,0.1875964,0.180802
min,4.730527e-10,0.0
25%,7.617355e-05,0.000262
50%,0.000854461,0.001766
75%,0.008308892,0.01188
max,0.9997467,0.999984


# Median Stacking

In [45]:
concat_sub['Label'] = concat_sub['rsna_median']
concat_sub[['ID', 'Label']].to_csv('data/submissions/leaderboard_subs_median.csv', index=False)

# Mean Stacking

In [72]:
concat_sub['Label'] = concat_sub['rsna_mean']
concat_sub[['ID', 'Label']].to_csv('data/submissions/leaderboard_subs_mean2.csv', index=False)

### Weighted Average

In [58]:
selected_files

['submission_efficient_net.csv',
 'submission_inceptionv3.csv',
 'submission_original.csv']

In [59]:
sub_efficient_net = pd.read_csv(os.path.join(sub_path, selected_files[0]), index_col=0)
sub_inceptionv3 = pd.read_csv(os.path.join(sub_path, selected_files[1]), index_col=0)
sub_fastai = pd.read_csv(os.path.join(sub_path, selected_files[2]), index_col=0)

In [60]:
cc = sub_fastai.copy()

In [61]:
cc['Label'] = (0.2*sub_fastai['Label']
             + 0.4*sub_inceptionv3['Label']
             + 0.4*sub_efficient_net['Label'])

In [62]:
print(cc['Label'].mean()-sub_fastai['Label'].mean())
print(cc['Label'].mean()-sub_inceptionv3['Label'].mean())
print(cc['Label'].mean()-sub_efficient_net['Label'].mean())

0.004855821950315385
-0.000620620574337892
-0.0018072904008283944


In [63]:
cc.to_csv('data/submissions/leaderboard_weighted_average2.csv')

In [56]:
cc.head()

Unnamed: 0_level_0,Label
ID,Unnamed: 1_level_1
ID_e3674b189_any,0.002339
ID_e3674b189_epidural,2.8e-05
ID_e3674b189_intraparenchymal,0.000302
ID_e3674b189_intraventricular,0.000107
ID_e3674b189_subarachnoid,0.00134
