In [None]:
import os
import re
import sys 
import datetime
import time
import math

import numpy as np
import pandas as pd
import seaborn as sns

%matplotlib inline
from matplotlib import pyplot as plt

%cd MEYER_CTAI_SEG_CNN

In [None]:
def merge(regex, filename):
    f_results = open(filename, "w")
    first = True
    for f in sorted([f for f in os.listdir('.') if re.match(regex, f)]):
        f_ = open(f, "r")
        line = f_.readline()
        if first:
            f_results.write(line)
            first = False
        while True: 
            line = f_.readline()
            if not line: 
                break
            else:
                f_results.write(line)
        f_.close()
    f_results.close()
    
merge(r'CTAI_SEG_CNN.*.csv', 'MERGE.csv')
merge(r'CTAI_SEG_CNN_I3_(?!ER|MI).*.csv', 'MERGE_I3.csv')
merge(r'CTAI_SEG_CNN_I3_MI.*.csv', 'MERGE_I3_MI.csv')
merge(r'CTAI_SEG_CNN_I3_ER.*.csv', 'MERGE_I3_ER.csv')

In [None]:
result = pd.read_csv("MERGE.csv", sep=",")
result_i3 = pd.read_csv("MERGE_I3.csv", sep=",")
result_i3_mi = pd.read_csv("MERGE_I3_MI.csv", sep=",")
result_i3_er = pd.read_csv("MERGE_I3_ER.csv", sep=",")

In [None]:
# Checking that we don't have missing results
# 2 (best / last) * 3 (I3, I3_MI, I3_ER) * 11 (10 SETUP + BASELINE) * 11 ([A-J] + Z TRAIN)
assert result.count()['ID'] / 2 / 3 // 11 // 11 == 1

cpt = 0
for D in ['I3', 'I3_MI', 'I3_ER']:
    for X in ['BEST', 'LAST']:
        for Y in result[(result.DATASET == D) & (result.BEST_OR_LAST == X)].groupby('SETUP').count()['ID']:
            cpt += 1
            assert Y == 11
assert(cpt == 2*3*11)

for X in ['BEST', 'LAST']:
    for Y in result_i3[(result_i3.BEST_OR_LAST == X)].groupby('SETUP').count()['ID']:
        assert Y == 11
    for Y in result_i3_mi[(result_i3_mi.BEST_OR_LAST == X)].groupby('SETUP').count()['ID']:
        assert Y == 11
    for Y in result_i3_er[(result_i3_er.BEST_OR_LAST == X)].groupby('SETUP').count()['ID']:
        assert Y == 11

In [None]:
# rename columns for better fit
rename_dict = {'BASELINE':               'Baseline',
               'AREA_2048_1048576':      'Area $\\alpha$',
               'AREA_4096':              'Area $\\beta$',
               'AREA_DUAL_256_262144':   'Area $\\gamma$',
               'COMPACITY_MAX_50-MAX_AREA_D_AREAN_H_D': 'Compacity$_{\\Delta_A}$',
               'COMPLEXITY-MAX_AREA_D_AREAN_H_D-LIMIT_AREA': 'Complexity$_{\\Delta_A}}$',
               'CONTRAST-MAX_AREA_D_AREAN_H_D-LIMIT_AREA': 'Contrast$_{\\Delta_A}}$',
               'CONTRAST_10_150':        'Contrast $\\alpha$',
               'CONTRAST_DUAL_10_150':   'Contrast $\\beta$',
               'MGB-MAX_MGB-LIMIT_AREA': 'MGB',
               'VOLUME-MAX_AREA_D_AREAN_H_D-LIMIT_AREA': 'Volume$_{\\Delta_A}$'}

result.SETUP = result.SETUP.replace(rename_dict)
result_i3.SETUP = result_i3.SETUP.replace(rename_dict)
result_i3_mi.SETUP = result_i3_mi.SETUP.replace(rename_dict)
result_i3_er.SETUP = result_i3_er.SETUP.replace(rename_dict)

# order array for similar tables
order_array = [rename_dict['BASELINE'],
               rename_dict['CONTRAST-MAX_AREA_D_AREAN_H_D-LIMIT_AREA'],
               rename_dict['COMPLEXITY-MAX_AREA_D_AREAN_H_D-LIMIT_AREA'],
               rename_dict['COMPACITY_MAX_50-MAX_AREA_D_AREAN_H_D'],
               rename_dict['VOLUME-MAX_AREA_D_AREAN_H_D-LIMIT_AREA'],
               rename_dict['MGB-MAX_MGB-LIMIT_AREA'],
               rename_dict['CONTRAST_10_150'],
               rename_dict['CONTRAST_DUAL_10_150'],
               rename_dict['AREA_2048_1048576'],
               rename_dict['AREA_4096'],
               rename_dict['AREA_DUAL_256_262144']]

In [None]:
def get_table(df, c_val=['F1_1'], c_sel='VAL_F1_1', n_sel=8, latex=False, caption='Result'):
    index = []
    for ind in df.groupby('SETUP')[c_sel].nlargest(n_sel).index:
        index.append(ind[1])
    
    df = df[df.index.isin(index)]
    
    for check in df.groupby('SETUP')[c_val[0]].count():
        assert check == n_sel
        
    mean = df.groupby('SETUP')[c_val].mean().round(decimals=4).applymap(lambda x: '{0:.3f}'.format(x))
    std = df.groupby('SETUP')[c_val].std().round(decimals=4).applymap(lambda x: '{0:.3f}'.format(x))
    
    table = mean.astype(str) + u" \u00B1" + std.astype(str)
    table = table.reindex(order_array)
    
    if latex:
        return to_latex(table, len(c_val), caption)
    else:
        return table

def to_latex(df, length, caption):
    return df.to_latex(column_format=f'|l|{"c|"*length}',
                       escape=False,
                       caption=caption).replace("\\\n", "\\ \hline\n").replace('\\toprule', '\\hline').replace('\\midrule', '\\hline').replace('\\bottomrule','\\hline')

df = result_i3_mi
df = df[df.BEST_OR_LAST == 'BEST']
print(get_table(df, c_val=['F1_1', 'ASSD_1'], c_sel='VAL_F1_1', latex=True, caption='Mitochondrion'))

df = result_i3
df = df[df.BEST_OR_LAST == 'BEST']
print(get_table(df, c_val=['F1_1', 'ASSD_1'], c_sel='VAL_F1_1', latex=True, caption='Mitochondrion multiclass'))

df = result_i3_er
df = df[df.BEST_OR_LAST == 'BEST']
print(get_table(df, c_val=['F1_1', 'ASSD_1'], c_sel='VAL_F1_1', latex=True, caption='Endoplasmic reticulum'))

df = result_i3
df = df[df.BEST_OR_LAST == 'BEST']
print(get_table(df, c_val=['F1_2', 'ASSD_2'], c_sel='VAL_F1_2', latex=True, caption='Endoplasmic reticulum multiclass'))

df = result_i3_mi
df = df[df.BEST_OR_LAST == 'BEST']
df_bin = get_table(df, c_val=['IOU_1', 'F1_1', 'ASSD_1'], c_sel='VAL_F1_1', latex=False)
df = result_i3
df = df[df.BEST_OR_LAST == 'BEST']
df_multi = get_table(df, c_val=['IOU_1', 'F1_1', 'ASSD_1'], c_sel='VAL_F1_1', latex=False)
print(to_latex(pd.concat([df_bin, df_multi], axis=1), 6, caption='Mitochondrion binary and multiclass'))

df = result_i3_er
df = df[df.BEST_OR_LAST == 'BEST']
df_bin = get_table(df, c_val=['IOU_1', 'F1_1', 'ASSD_1'], c_sel='VAL_F1_1', latex=False)
df = result_i3
df = df[df.BEST_OR_LAST == 'BEST']
df_multi = get_table(df, c_val=['IOU_2', 'F1_2', 'ASSD_2'], c_sel='VAL_F1_2', latex=False)
print(to_latex(pd.concat([df_bin, df_multi], axis=1), 6, caption='Endoplasmic reticulum binary and multiclass'))

In [None]:
hatches = ["/", "oo"]*11
whis = 10
df1 = result_i3[['SETUP', 'F1_1', 'DATASET']].rename({'F1_1': 'F1'}, axis=1)
df2 = result_i3_mi[['SETUP', 'F1_1', 'DATASET']].rename({'F1_1': 'F1'}, axis=1)
df = pd.concat([df1, df2])
df = df.replace({'I3_MI': 'Mitochondrion binary', 'I3': 'Mitochondrion multiclass'})
plt.figure(figsize=(18,8))
ax = sns.boxplot(x="SETUP",y="F1", hue="DATASET", data=df, order=order_array, whis=whis)
# ax.hlines(df1.groupby('SETUP')['F1'].median()['Baseline'], 0, 10.2, color='black')
# ax.hlines(df2.groupby('SETUP')['F1'].median()['Baseline'], 0, 10.2, color='black')
for hatch, patch in zip(hatches, ax.artists):
    patch.set_hatch(hatch)
for hatch, patch in zip(hatches, ax.patches):
    patch.set_hatch(hatch)
ax.set_xticklabels(ax.get_xticklabels(), rotation=30)
ax.set_xlabel('')
ax.tick_params(axis='x', which='major', labelsize=22)
ax.set_ylabel('F1', fontsize=18)
ax.tick_params(axis='y', which='major', labelsize=14)
ax.legend(fontsize=22, loc='lower right')
plt.tight_layout()
plt.savefig("../figures/result_i3_mi.eps", format="eps")
plt.show()

df1 = result_i3[['SETUP', 'F1_2', 'DATASET']].rename({'F1_2': 'F1'}, axis=1)
df2 = result_i3_er[['SETUP', 'F1_1', 'DATASET']].rename({'F1_1': 'F1'}, axis=1)
df = pd.concat([df1, df2])
df = df.replace({'I3_ER': 'Endoplasmic reticulum binary', 'I3': 'Endoplasmic reticulum multiclass'})
plt.figure(figsize=(18,8))
ax = sns.boxplot(x="SETUP",y="F1", hue="DATASET", data=df, order=order_array, whis=whis)
for hatch, patch in zip(hatches, ax.artists):
    patch.set_hatch(hatch)
for hatch, patch in zip(hatches, ax.patches):
    patch.set_hatch(hatch)
ax.set_xticklabels(ax.get_xticklabels(), rotation=30)
ax.set_xlabel('')
ax.tick_params(axis='x', which='major', labelsize=22)
ax.set_ylabel('F1', fontsize=18)
ax.tick_params(axis='y', which='major', labelsize=14)
ax.legend(fontsize=22, loc='lower right')
plt.tight_layout()
plt.savefig("../figures/result_i3_er.eps", format="eps")
plt.show()