### Dependencies

In [2]:
# Base / Native
import math
import os
from os.path import join
import pickle
import re
import warnings
warnings.filterwarnings('ignore')

# Numerical / Array
import numpy as np
import pandas as pd
import scipy
from scipy import interp
from scipy.stats import ttest_ind
from tqdm import tqdm

### Table 1

In [3]:
results_dir = './results_cvpr2022_class/'
summary_df_all = []
for task in ['tcga_brca_subtype', 'tcga_lung_subtype', 'tcga_kidney_subtype']: # os.listdir(os.path.join(results_dir))
    props = [0.25, 1.0]
    for prop in props:
        models = ['mil', 'clam_sb', 'mi_fcn', 'dsmil', 'dgcn',
                  'hipt_lgp[vit4k_xs_dino]_freeze_[None]']
        model_names = ['MIL', 'CLAM', 'DeepAttnMISL', 'DSMIL', 'DGCN',
                       'HIPT (SA-DINO-Freeze, SA-None)']
        features = ['vits_tcga_pancancer_dino']

        exps = []
        for feature in features:
            for model in models:
                exp_name = '%s_%s_%s_%0.2f_none_s1' % (task, model, feature, prop)
                exps.append(exp_name)
        
        summary_df = []
        for exp in exps:
            summary_path = os.path.join(results_dir, task, exp, 'summary.csv')
            if os.path.isfile(summary_path):
                results_df = pd.read_csv(summary_path, index_col=0)
                exp_auc = '%0.3f +/- %0.3f' % (results_df['test_auc'].mean(), results_df['test_auc'].std())
                summary_df.append(exp_auc)
            else:
                summary_df.append('-')
        summary_df_all.append(summary_df)

summary_df_all = pd.DataFrame(summary_df_all).T
summary_df_all.columns = ['BRCA (25%)', 'BRCA (100%)', 'Lung (25%)', 'Lung (100%)',
                          'Kidney (25%)', 'Kidney (100%)']
model_names = [ 'MIL', 'CLAM',  'DeepAttnMISL', 'DS-MIL', 'DGCN',
               'HIPT (ViT-256-F + ViT-4096)']
summary_df_all.index = model_names
print(summary_df_all.to_latex())
summary_df_all

\begin{tabular}{lllllll}
\toprule
{} &       BRCA (25\%) &      BRCA (100\%) &       Lung (25\%) &      Lung (100\%) &     Kidney (25\%) &    Kidney (100\%) \\
\midrule
MIL                         &  0.673 +/- 0.112 &  0.778 +/- 0.091 &  0.857 +/- 0.059 &  0.892 +/- 0.042 &  0.904 +/- 0.055 &  0.959 +/- 0.015 \\
CLAM                        &  0.796 +/- 0.063 &  0.858 +/- 0.067 &  0.852 +/- 0.034 &  0.928 +/- 0.021 &  0.957 +/- 0.012 &  0.973 +/- 0.017 \\
DeepAttnMISL                &  0.685 +/- 0.110 &  0.784 +/- 0.061 &  0.663 +/- 0.077 &  0.778 +/- 0.045 &  0.904 +/- 0.024 &  0.943 +/- 0.016 \\
DS-MIL                      &  0.760 +/- 0.088 &  0.838 +/- 0.074 &  0.787 +/- 0.073 &  0.920 +/- 0.024 &  0.949 +/- 0.028 &  0.971 +/- 0.016 \\
DGCN                        &  0.727 +/- 0.076 &  0.840 +/- 0.073 &  0.748 +/- 0.050 &  0.831 +/- 0.034 &  0.923 +/- 0.012 &  0.957 +/- 0.012 \\
HIPT (ViT-256-F + ViT-4096) &  0.821 +/- 0.069 &  0.874 +/- 0.060 &  0.923 +/- 0.020 &  0.952 +/- 0.021 & 

Unnamed: 0,BRCA (25%),BRCA (100%),Lung (25%),Lung (100%),Kidney (25%),Kidney (100%)
MIL,0.673 +/- 0.112,0.778 +/- 0.091,0.857 +/- 0.059,0.892 +/- 0.042,0.904 +/- 0.055,0.959 +/- 0.015
CLAM,0.796 +/- 0.063,0.858 +/- 0.067,0.852 +/- 0.034,0.928 +/- 0.021,0.957 +/- 0.012,0.973 +/- 0.017
DeepAttnMISL,0.685 +/- 0.110,0.784 +/- 0.061,0.663 +/- 0.077,0.778 +/- 0.045,0.904 +/- 0.024,0.943 +/- 0.016
DS-MIL,0.760 +/- 0.088,0.838 +/- 0.074,0.787 +/- 0.073,0.920 +/- 0.024,0.949 +/- 0.028,0.971 +/- 0.016
DGCN,0.727 +/- 0.076,0.840 +/- 0.073,0.748 +/- 0.050,0.831 +/- 0.034,0.923 +/- 0.012,0.957 +/- 0.012
HIPT (ViT-256-F + ViT-4096),0.821 +/- 0.069,0.874 +/- 0.060,0.923 +/- 0.020,0.952 +/- 0.021,0.974 +/- 0.012,0.980 +/- 0.013


In [4]:
results_dir = './results_cvpr2022_class/'
summary_df_all = []
for task in ['tcga_brca_subtype', 'tcga_lung_subtype', 'tcga_kidney_subtype']: # os.listdir(os.path.join(results_dir))
    props = [0.25, 1.0]
    for prop in props:
        models = ['mil', 'clam_sb', 'mi_fcn', 'dsmil', 'dgcn',
                  'hipt_lgp[vit4k_xs_dino]_freeze_[None]']
        model_names = ['MIL', 'CLAM', 'DeepAttnMISL', 'DSMIL', 'DGCN',
                       'HIPT (SA-DINO-Freeze, SA-None)']
        features = ['vits_tcga_pancancer_dino']

        exps = []
        for feature in features:
            for model in models:
                exp_name = '%s_%s_%s_%0.2f_none_s1' % (task, model, feature, prop)
                exps.append(exp_name)
        
        summary_df = []
        for exp in exps:
            summary_path = os.path.join(results_dir, task, exp, 'summary.csv')
            if os.path.isfile(summary_path):
                results_df = pd.read_csv(summary_path, index_col=0)
                exp_auc = '%0.3f' % (results_df['test_auc'][0])
                summary_df.append(exp_auc)
            else:
                summary_df.append('-')
        summary_df_all.append(summary_df)

summary_df_all = pd.DataFrame(summary_df_all).T
summary_df_all.columns = ['BRCA (25%)', 'BRCA (100%)', 'Lung (25%)', 'Lung (100%)',
                          'Kidney (25%)', 'Kidney (100%)']
model_names = [ 'MIL', 'CLAM',  'DeepAttnMISL', 'DS-MIL', 'DGCN',
               'HIPT (ViT-256-F + ViT-4096)']
summary_df_all.index = model_names
print(summary_df_all.to_latex())
summary_df_all

\begin{tabular}{lllllll}
\toprule
{} & BRCA (25\%) & BRCA (100\%) & Lung (25\%) & Lung (100\%) & Kidney (25\%) & Kidney (100\%) \\
\midrule
MIL                         &      0.640 &       0.830 &      0.822 &       0.831 &        0.904 &         0.953 \\
CLAM                        &      0.850 &       0.943 &      0.832 &       0.936 &        0.947 &         0.969 \\
DeepAttnMISL                &      0.702 &       0.817 &      0.752 &       0.800 &        0.867 &         0.941 \\
DS-MIL                      &      0.778 &       0.904 &      0.760 &       0.916 &        0.937 &         0.940 \\
DGCN                        &      0.698 &       0.923 &      0.673 &       0.841 &        0.915 &         0.951 \\
HIPT (ViT-256-F + ViT-4096) &      0.843 &       0.902 &      0.920 &       0.942 &        0.975 &         0.977 \\
\bottomrule
\end{tabular}



Unnamed: 0,BRCA (25%),BRCA (100%),Lung (25%),Lung (100%),Kidney (25%),Kidney (100%)
MIL,0.64,0.83,0.822,0.831,0.904,0.953
CLAM,0.85,0.943,0.832,0.936,0.947,0.969
DeepAttnMISL,0.702,0.817,0.752,0.8,0.867,0.941
DS-MIL,0.778,0.904,0.76,0.916,0.937,0.94
DGCN,0.698,0.923,0.673,0.841,0.915,0.951
HIPT (ViT-256-F + ViT-4096),0.843,0.902,0.92,0.942,0.975,0.977


### Table 4

In [5]:
results_dir = './results_cvpr2022_class/'
summary_df_all = []
for task in ['tcga_brca_subtype', 'tcga_lung_subtype', 'tcga_kidney_subtype']: # os.listdir(os.path.join(results_dir))
    props = [0.25, 1.0]
    for prop in props:
        models = [
                  'hipt_n',
                  'hipt_lgp[None]_[None]',
                  'hipt_lgp[vit4k_xs_dino]_[None]',
                  'hipt_lgp[vit4k_xs_dino]_freeze_[None]']
        model_names = [
                       'HIPT (No ViT)',
                       'HIPT (ViT-256 + ViT-4096)',
                       'HIPT (ViT-256-P + ViT-4096)',
                       'HIPT (ViT-256-PF + ViT-4096)']
        features = ['vits_tcga_pancancer_dino']

        exps = []
        for feature in features:
            for model in models:
                exp_name = '%s_%s_%s_%0.2f_none_s1' % (task, model, feature, prop)
                exps.append(exp_name)
        
        summary_df = []
        for exp in exps:
            summary_path = os.path.join(results_dir, task, exp, 'summary.csv')
            if os.path.isfile(summary_path):
                results_df = pd.read_csv(summary_path, index_col=0)
                exp_auc = '%0.3f +/- %0.3f' % (results_df['test_auc'].mean(), results_df['test_auc'].std())
                summary_df.append(exp_auc)
            else:
                summary_df.append('-')
        summary_df_all.append(summary_df)

summary_df_all = pd.DataFrame(summary_df_all).T
summary_df_all.columns = ['BRCA (25%)', 'BRCA (100%)', 'Lung (25%)', 'Lung (100%)',
                          'Kidney (25%)', 'Kidney (100%)']
model_names = [
              'HIPT (No ViT)',
               'HIPT (ViT-256 + ViT-4096)',
               'HIPT (ViT-256-P + ViT-4096)',
               'HIPT (ViT-256-PF + ViT-4096)']
summary_df_all.index = model_names
print(summary_df_all.to_latex())
summary_df_all

\begin{tabular}{lllllll}
\toprule
{} &       BRCA (25\%) &      BRCA (100\%) &       Lung (25\%) &      Lung (100\%) &     Kidney (25\%) &    Kidney (100\%) \\
\midrule
HIPT (No ViT)                &  0.784 +/- 0.061 &  0.837 +/- 0.062 &  0.835 +/- 0.050 &  0.928 +/- 0.023 &  0.955 +/- 0.016 &  0.965 +/- 0.013 \\
HIPT (ViT-256 + ViT-4096)    &  0.758 +/- 0.076 &  0.823 +/- 0.071 &  0.695 +/- 0.069 &  0.786 +/- 0.096 &                - &  0.956 +/- 0.016 \\
HIPT (ViT-256-P + ViT-4096)  &  0.762 +/- 0.089 &  0.827 +/- 0.069 &  0.652 +/- 0.076 &  0.820 +/- 0.047 &  0.935 +/- 0.022 &  0.956 +/- 0.013 \\
HIPT (ViT-256-PF + ViT-4096) &  0.821 +/- 0.069 &  0.874 +/- 0.060 &  0.923 +/- 0.020 &  0.952 +/- 0.021 &  0.974 +/- 0.012 &  0.980 +/- 0.013 \\
\bottomrule
\end{tabular}



Unnamed: 0,BRCA (25%),BRCA (100%),Lung (25%),Lung (100%),Kidney (25%),Kidney (100%)
HIPT (No ViT),0.784 +/- 0.061,0.837 +/- 0.062,0.835 +/- 0.050,0.928 +/- 0.023,0.955 +/- 0.016,0.965 +/- 0.013
HIPT (ViT-256 + ViT-4096),0.758 +/- 0.076,0.823 +/- 0.071,0.695 +/- 0.069,0.786 +/- 0.096,-,0.956 +/- 0.016
HIPT (ViT-256-P + ViT-4096),0.762 +/- 0.089,0.827 +/- 0.069,0.652 +/- 0.076,0.820 +/- 0.047,0.935 +/- 0.022,0.956 +/- 0.013
HIPT (ViT-256-PF + ViT-4096),0.821 +/- 0.069,0.874 +/- 0.060,0.923 +/- 0.020,0.952 +/- 0.021,0.974 +/- 0.012,0.980 +/- 0.013
