In [1]:
import glob
import os

import numpy as np
import pandas as pd
import plotly.express as px
from scipy.stats import ttest_ind

In [2]:
markers = [
    'Atubulin',
    'Bcatenin',
    'CoxIV',
    'DAPI',
    'Factin',
    'Fibrillarin',
    'GM130',
    'NFkB',
]

In [3]:
# load results from all experiments
path = r'./iqm_results/'
all_files = glob.glob(os.path.join(path, f"*/multiplex/*/*/*/test_results/results.csv"))

image_df = pd.DataFrame()

for f in all_files:
    image_metrics_results = pd.read_csv(f)
    image_metrics_results['fold'] = f.split('\\')[-4][-1]
    image_metrics_results['marker'] = f.split('\\')[-5].split('-')[-1]
    image_metrics_results['input'] = f.split('\\')[-5].split('-')[-2]
    image_metrics_results['model'] = f.split('\\')[-7]

    image_df = pd.concat([image_df, image_metrics_results], ignore_index=True)

image_df['in_num'] = image_df.input.str.split('_').str.len()


In [None]:
# find additional marker with most improvement for SSIM and PCC
best_performing_pairs = image_df[image_df.model == 'ResViT'][['in_num', 'input', 'marker', 'fold', 'SSIM', 'PCC', 'L2']]
best_performing_pairs = best_performing_pairs[best_performing_pairs.in_num == 2].groupby(['input', 'marker']).agg(np.median).reset_index()

# best_performing_pairs.loc[best_performing_pairs.groupby('marker')['SSIM'].idxmax()][['marker', 'input', 'SSIM']]
best_performing_pairs.loc[best_performing_pairs.groupby('marker')['PCC'].idxmax()][['marker', 'input', 'PCC']]

In [None]:
# only best performing pairs
filtered_df = image_df[['in_num', 'input', 'marker', 'fold', 'model', 'SSIM', 'PCC']].groupby(['in_num', 'input', 'marker', 'fold', 'model']).agg(np.median).reset_index()

filter_conditions = (filtered_df['marker'] == 'Atubulin') & (filtered_df['input'] == 'TD_GM130') | \
    (filtered_df['marker'] == 'Bcatenin') & (filtered_df['input'] == 'TD_Factin') | \
    (filtered_df['marker'] == 'CoxIV') & (filtered_df['input'] == 'TD_NFkB') |\
    (filtered_df['marker'] == 'DAPI') & (filtered_df['input'] == 'TD_Fibrillarin') |\
    (filtered_df['marker'] == 'Factin') & (filtered_df['input'] == 'TD_Fibrillarin') |\
    (filtered_df['marker'] == 'Fibrillarin') & (filtered_df['input'] == 'TD_DAPI')|\
    (filtered_df['marker'] == 'GM130') & (filtered_df['input'] == 'TD_Atubulin')|\
    (filtered_df['marker'] == 'NFkB') & (filtered_df['input'] == 'TD_Atubulin')|\
    (filtered_df.in_num != 2)

filtered_df = filtered_df[filter_conditions]
filtered_df = filtered_df[~((filtered_df.in_num == 1) & (filtered_df.input != 'TD'))]
filtered_df = filtered_df.sort_values(['marker', 'in_num'])

fig = px.box(
    filtered_df.melt(id_vars=['fold','marker', 'input', 'in_num', 'model']),
    x='marker',
    y='value',
    facet_row = 'variable',
    facet_col='model',
    color='in_num',
)

fig.update_layout(yaxis_range=[0.2,1])


fig.update_traces(boxmean=True)
fig.update_traces(boxpoints=False)
fig.show() 


In [None]:
#perform Welch's t-test
iqm = 'SSIM'
# iqm = 'PCC'
for model in ['ResViT', 'UNet', 'cGAN']:
    for m in markers:
        test_pop = filtered_df[filtered_df['marker']==m]
        test_pop = test_pop[test_pop['model']==model]

        group1 = test_pop[test_pop['in_num']==1]
        group2 = test_pop[test_pop['in_num']==2]
        group8 = test_pop[test_pop['in_num']==8]

        test = ttest_ind(group1[iqm], group2[iqm], equal_var=False)
        print(f'{model} 1v2 {m}, : {test.pvalue}')

        test = ttest_ind(group2[iqm], group8[iqm], equal_var=False)
        print(f'{model} 2v8 {m}, : {test.pvalue}')

In [None]:
# GM130 and B-cat case study for multi inputs (resvit)
doubles = image_df[((image_df.marker == 'GM130') | (image_df.marker == 'Bcatenin')) & ((image_df.in_num == 2) | (image_df.input == 'TD')) & (image_df.model == 'ResViT')]
doubles['addition'] = doubles.input.str.split('_').str[-1]
doubles = doubles[['fold', 'addition', 'marker', 'PCC', 'SSIM']].groupby(['fold', 'addition', 'marker']).agg(np.median).reset_index()

fig = px.box(
    doubles.melt(id_vars=['fold', 'addition','marker']),
    x='addition',
    y='value',
    facet_row='variable',
    facet_col='marker',
    category_orders={'addition': ['TD', 'Atubulin', 'Bcatenin', 'CoxIV', 'DAPI', 'Factin', 'Fibrillarin', 'GM130', 'NFkB']}
)
fig.update_traces(boxpoints=False)
fig.update_traces(boxmean=True)
fig

In [None]:
#perform Welch's t-test against DIC only input

# iqm = 'SSIM'
iqm = 'PCC'
# marker_of_interest = 'Bcatenin'
marker_of_interest = 'GM130'

for m in markers:
    test_pop = doubles[doubles['marker']==marker_of_interest]
    group1 = test_pop[test_pop['addition']=='TD'][iqm].astype('float')
    group2 = test_pop[test_pop['addition']==m][iqm].astype('float')

    test = ttest_ind(group1, group2, equal_var=False)
    print(f'{m}, : {test.pvalue}')

