In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
from scipy import stats

from keras_transfer_learning.utils import visualize, utils

Using TensorFlow backend.


In [2]:
regex_for = utils.model_name_regex

def result_list_for_regex(exp_reg, metric='ap_dsb2018#mean'):
    selected_models = visualize.get_models(exp_reg)
    results_df = visualize._get_results_last_df(selected_models)
    return results_df[metric].values

def welch_test(exp_reg_1, exp_reg_2, metric='ap_dsb2018#mean'):
    results_1 = result_list_for_regex(exp_reg_1, metric)
    results_2 = result_list_for_regex(exp_reg_2, metric)
    return stats.ttest_ind(results_1, results_2, equal_var=False)

def welch_test_transfer(exp_reg_1, exp_reg_2, metric='ap_dsb2018#mean'):
    num_trains = [2, 5, 10, 50, 200]
    
    results_1 = visualize._get_results_last_df(visualize.get_models(exp_reg_1))
    results_2 = visualize._get_results_last_df(visualize.get_models(exp_reg_2))
    
    res = {}
    for num_train in num_trains:
        results_1_list = results_1[results_1['Num Train'] == num_train][metric].values
        results_2_list = results_2[results_2['Num Train'] == num_train][metric].values
        tstat, pval = stats.ttest_ind(results_1_list, results_2_list, equal_var=False)
        res[num_train] = {'tstat': tstat, 'pval': pval}
    return res

# Experiment "resunet"

## Res-U-Net vs U-Net on DSB2018

In [3]:
exp_reg_1 = regex_for('R', 'none', 'dsb2018', 'stardist', 'resnet-unet', 'F')
exp_reg_2 = regex_for('R', 'none', 'dsb2018', 'stardist', 'unet', 'F')
tstat, pval = welch_test(exp_reg_1, exp_reg_2)
print(f'tstat={tstat}, pval={pval}')

tstat=6.565784596262767, pval=0.0001419430620745522


## Res-U-Net vs U-Net on STARDIST_DSB2018

In [4]:
exp_reg_1 = regex_for('R', 'none', 'stardist-dsb2018', 'stardist', 'resnet-unet', 'F')
exp_reg_2 = regex_for('R', 'none', 'stardist-dsb2018', 'stardist', 'unet', 'F')
tstat, pval = welch_test(exp_reg_1, exp_reg_2)
print(f'tstat={tstat}, pval={pval}')

tstat=1.997768136468581, pval=0.09521728059100179


# Experiment "noise"

## HL60_LOW_NOISE -> HL60_HIGH_NOISE

In [5]:
exp_reg_1 = regex_for('P', 'hl60low', 'hl60high', 'stardist', 'unet')
exp_reg_2 = regex_for('R', 'none', 'hl60high', 'stardist', 'unet')
welch_test_transfer(exp_reg_1, exp_reg_2)

{2: {'tstat': 1.4095541582703315, 'pval': 0.20463545208851633},
 5: {'tstat': 3.669906975596099, 'pval': 0.011341625606191716},
 10: {'tstat': 1.4767762735296883, 'pval': 0.18411904479424993},
 50: {'tstat': 1.5422046912893852, 'pval': 0.1615997035722803},
 200: {'tstat': 2.248294126513173, 'pval': 0.07542753401560591}}

##  HL60_HIGH_NOISE -> HL60_LOW_NOISE

In [6]:
exp_reg_1 = regex_for('P', 'hl60high', 'hl60low', 'stardist', 'unet')
exp_reg_2 = regex_for('R', 'none', 'hl60low', 'stardist', 'unet')
welch_test_transfer(exp_reg_1, exp_reg_2)

{2: {'tstat': 5.837045501220411, 'pval': 0.0010003641637779353},
 5: {'tstat': 6.2114283129127275, 'pval': 0.0007873367036317932},
 10: {'tstat': 4.171637981920288, 'pval': 0.007363825309171871},
 50: {'tstat': 0.6925474689194567, 'pval': 0.5159098520926949},
 200: {'tstat': -0.11359730578082836, 'pval': 0.9122324338722978}}

# Experiment "natural"

##  CITYSCAPES -> HL60_LOW_NOISE

In [7]:
exp_reg_1 = regex_for('P', 'cityscapes', 'hl60low', 'stardist', 'resnet-unet')
exp_reg_2 = regex_for('R', 'none', 'hl60low', 'stardist', 'resnet-unet')
welch_test_transfer(exp_reg_1, exp_reg_2)

{2: {'tstat': -1.2401778090668358, 'pval': 0.26137036072426095},
 5: {'tstat': -3.0775731578319547, 'pval': 0.015177219391470998},
 10: {'tstat': -3.018766115673872, 'pval': 0.01707083634218986},
 50: {'tstat': 0.3313109753118702, 'pval': 0.7544945699718135},
 200: {'tstat': -0.8275089296980899, 'pval': 0.4384101696477022}}

##  CITYSCAPES -> DSB2018

In [8]:
exp_reg_1 = regex_for('P', 'cityscapes', 'dsb2018', 'stardist', 'resnet-unet')
exp_reg_2 = regex_for('R', 'none', 'dsb2018', 'stardist', 'resnet-unet')
welch_test_transfer(exp_reg_1, exp_reg_2)

{2: {'tstat': 6.240875607787007, 'pval': 0.0003129348780865917},
 5: {'tstat': 2.1730973552001527, 'pval': 0.061538461617891185},
 10: {'tstat': 0.9567773462465909, 'pval': 0.36774450619583654},
 50: {'tstat': -0.3955468215565188, 'pval': 0.703453431912259},
 200: {'tstat': 0.5794859673285163, 'pval': 0.5797201078009582}}

## IMAGENET -> DSB2018

In [9]:
exp_reg_1 = regex_for('P', 'imagenet', 'dsb2018', 'stardist', 'imagenet-resnet-unet')
exp_reg_2 = regex_for('R', 'none', 'dsb2018', 'stardist', 'imagenet-resnet-unet')
welch_test_transfer(exp_reg_1, exp_reg_2)

{2: {'tstat': 6.7622351010661585, 'pval': 0.008573847368841866},
 5: {'tstat': 0.3485888832475987, 'pval': 0.7451862390819357},
 10: {'tstat': -0.1807110383970501, 'pval': 0.8660024784080693},
 50: {'tstat': 0.7986941563816746, 'pval': 0.5037747024246484},
 200: {'tstat': 0.13603497952113433, 'pval': 0.8983678675564895}}

# Experiment "simulated"

## HL60_LOW_NOISE -> DSB2018

In [10]:
exp_reg_1 = regex_for('P', 'hl60low', 'dsb2018', 'stardist', 'unet')
exp_reg_2 = regex_for('R', 'none', 'dsb2018', 'stardist', 'unet')
welch_test_transfer(exp_reg_1, exp_reg_2)

{2: {'tstat': -2.082341958052606, 'pval': 0.05882971858628898},
 5: {'tstat': 1.5300211228451428, 'pval': 0.14892644559452387},
 10: {'tstat': 2.4405310931561996, 'pval': 0.027417651166113496},
 50: {'tstat': 0.3911617386174003, 'pval': 0.7011190800034659},
 200: {'tstat': 0.4827606743250054, 'pval': 0.6368512237306375}}

## HL60_HIGH_NOISE -> DSB2018

In [11]:
exp_reg_1 = regex_for('P', 'hl60high', 'dsb2018', 'stardist', 'unet')
exp_reg_2 = regex_for('R', 'none', 'dsb2018', 'stardist', 'unet')
welch_test_transfer(exp_reg_1, exp_reg_2)

{2: {'tstat': -6.281602337575954, 'pval': 0.0002532797414096654},
 5: {'tstat': -2.7590979083362486, 'pval': 0.020142012037750612},
 10: {'tstat': -0.6274235630711913, 'pval': 0.5423948180584326},
 50: {'tstat': -1.0963276963194024, 'pval': 0.2981935922682462},
 200: {'tstat': -3.5616286124337755, 'pval': 0.006231655462025591}}

## GRANULOCYTE -> DSB2018

In [12]:
exp_reg_1 = regex_for('P', 'granulocyte', 'dsb2018', 'stardist', 'unet')
exp_reg_2 = regex_for('R', 'none', 'dsb2018', 'stardist', 'unet')
welch_test_transfer(exp_reg_1, exp_reg_2)

{2: {'tstat': -0.49648350039731587, 'pval': 0.6265335477903049},
 5: {'tstat': 2.1071193597195803, 'pval': 0.054363002979493966},
 10: {'tstat': 3.71632574626126, 'pval': 0.002632774332886333},
 50: {'tstat': 3.1492493470455307, 'pval': 0.006096777498749658},
 200: {'tstat': 0.5547555255105506, 'pval': 0.586904491829275}}

# Experiment "dsb2018"

## DSB2018 -> HL60_LOW_NOISE

In [13]:
exp_reg_1 = regex_for('P', 'dsb2018', 'hl60low', 'stardist', 'unet')
exp_reg_2 = regex_for('R', 'none', 'hl60low', 'stardist', 'unet')
welch_test_transfer(exp_reg_1, exp_reg_2)

{2: {'tstat': 3.681471233261329, 'pval': 0.0015014321451298222},
 5: {'tstat': 4.99345499412356, 'pval': 0.0001127226261482432},
 10: {'tstat': 7.4437596535156825, 'pval': 1.819191013554962e-07},
 50: {'tstat': 2.236078043005938, 'pval': 0.03543393108907091},
 200: {'tstat': 0.2690685603663977, 'pval': 0.7903456606002771}}

## DSB2018 -> HL60_HIGH_NOISE

In [14]:
exp_reg_1 = regex_for('P', 'dsb2018', 'hl60high', 'stardist', 'unet')
exp_reg_2 = regex_for('R', 'none', 'hl60high', 'stardist', 'unet')
welch_test_transfer(exp_reg_1, exp_reg_2)

{2: {'tstat': 0.008601030825975403, 'pval': 0.9933493605683203},
 5: {'tstat': -0.6638983476320488, 'pval': 0.5262439804668415},
 10: {'tstat': 1.075641110115368, 'pval': 0.3196011826586579},
 50: {'tstat': -0.1988152206512906, 'pval': 0.8491092239695547},
 200: {'tstat': -0.41829021951338013, 'pval': 0.6874054002565521}}

## DSB2018 -> GRANULOCYTE

In [15]:
exp_reg_1 = regex_for('P', 'dsb2018', 'granulocyte', 'stardist', 'unet')
exp_reg_2 = regex_for('R', 'none', 'granulocyte', 'stardist', 'unet')
welch_test_transfer(exp_reg_1, exp_reg_2)

{2: {'tstat': 4.341421802247028, 'pval': 0.00041923084066682135},
 5: {'tstat': 3.5611437314183734, 'pval': 0.002853019778394442},
 10: {'tstat': 0.8609457314578913, 'pval': 0.4005918768061144},
 50: {'tstat': 0.32836364279021657, 'pval': 0.7464731505975704},
 200: {'tstat': 2.0182259321313207, 'pval': 0.06295344317558316}}

# Experiment "multi"

## SIMULATED -> DSB2018

In [16]:
exp_reg_1 = regex_for('P', 'hl60low-hl60high-granulocyte', 'dsb2018', 'stardist', 'resnet-unet')
exp_reg_2 = regex_for('R', 'none', 'dsb2018', 'stardist', 'resnet-unet')
welch_test_transfer(exp_reg_1, exp_reg_2)

{2: {'tstat': 5.979715074865375, 'pval': 0.001255481421300519},
 5: {'tstat': 4.836934676484098, 'pval': 0.00315860618409932},
 10: {'tstat': 3.4398069864215537, 'pval': 0.008828937990187915},
 50: {'tstat': 1.464424682424375, 'pval': 0.1816888206460144},
 200: {'tstat': -0.3230997615863624, 'pval': 0.7565745001047921}}

## SIMULATED -> DSB2018

In [17]:
exp_reg_1 = regex_for('P', 'hl60low-granulocyte-aug', 'dsb2018', 'stardist', 'resnet-unet')
exp_reg_2 = regex_for('R', 'none', 'dsb2018', 'stardist', 'resnet-unet')
welch_test_transfer(exp_reg_1, exp_reg_2)

{2: {'tstat': 6.251458597807002, 'pval': 0.0005591971220201877},
 5: {'tstat': 6.741008029121024, 'pval': 0.0006102769423317721},
 10: {'tstat': 6.7733644077168735, 'pval': 0.000196788536225865},
 50: {'tstat': 2.879122192449447, 'pval': 0.020670738043891874},
 200: {'tstat': 0.3578591389935312, 'pval': 0.7298098884721189}}