In [None]:
# Import all relevant packages
import pandas as pd
import numpy as np
from fcsy import DataFrame
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns
import os
from utils import load_data
from scipy.stats import pearsonr
from statistics import mean, median, stdev
from statannot import add_stat_annotation
%load_ext autoreload
%autoreload 2
plt.style.use('plotstyle.mplstyle')

In [None]:
imputed_path = ""
gt_path = ""
figures_path = ""

backbone_results = ""

In [None]:
data = load_data(gt_path, imputed_path)
data = data[data['imp_state']==0]
data = data[data['method']=='CyTOFmerge']

In [None]:
all_markers = ['FITC-A', 'APC-A', 'BV605-A', 'BV786-A', 'PE-A', 'PE-CF594-A', 'BV711-A', 
                  'PC7-A', "HV500c-A", "BUV395-A", "PerCP-Cy5-5-A", 
                  "BUV737-A", "BUV496-A", "BV421-A", "APC-R700-A"]

# Define how the different channels should be renamed in plots
flow_rename = {'APC-A':'KLRG1',
               'BV711-A':'TIM-3',
               'FITC-A':'CD57',
               'BV786-A':'CD27',
               'PE-A':'CD28',
               'PE-CF594-A':'CD95',
               'PC7-A':'TIGIT',
               'BV605-A':'PD-1',
               'PerCP-Cy5-5-A':'CD14',
               'BV421-A': 'CCR7',
               'HV500c-A': 'CD45',
               'BUV395-A': 'CD3',
               'BUV496-A': 'CD8',
               'BUV737-A': 'CD4'}

In [None]:
results = []
for i in all_markers:
    if i not in ['BUV395-A', 'APC-R700-A', 'PerCP-Cy5-5-A']:
        for sample_id in data['sample_id'].unique():
            sample_data = data[data['sample_id']==sample_id]
            results.append({'Marker':flow_rename[i],
                            'Sample': sample_id,
                            'Absolute Pearson correlation': abs(pearsonr(sample_data[i], sample_data['APC-R700-A'])[0])})

results = pd.DataFrame(results)
results = results.sort_values('Absolute Pearson correlation')

In [None]:
imp_results = []
for root, dirs, files in os.walk(backbone_results):
    for file in files:
        if file.endswith('_exprs.fcs'):
            print(file.split('_')[0])
            ff = DataFrame.from_fcs(root+'/'+file)
            ff = ff[ff['dataset']==1]
            gt = DataFrame.from_fcs(root+'/'+file.split('/')[-1].replace('CyTOFmerge_exprs', 'gt'))
            gt = gt[gt['dataset']==1]
                        
            if file.split('_')[0][0:4] == 'Seed':
                experiment = 'Sampled'
            elif file.split('_')[0][0:4] == 'CD28':
                experiment = 'Descending order'
            else:
                experiment = 'Ascending order'
            
            if experiment == 'Sampled':
                seed = file.split('_')[0].split('|')[0]
                n_sampled_markers = file.split('_')[0].split('|')[1].split('Sampled')[1]
            else:
                seed = None
                n_sampled_markers = None

            imp_results.append({'Sample': root.split('/')[-1],
                                'Backbone': file.split('_')[0],
                                'Seed': seed,
                                'Sampled backbone size': n_sampled_markers,
                                'Experiment': experiment,
                                'Pearson correlation': pearsonr(ff['APC-R700-A'], gt['APC-R700-A'])[0]})
imp_results = pd.DataFrame(imp_results)

In [None]:
fig = plt.figure(figsize=(14, 8))

gs = fig.add_gridspec(nrows=2, ncols=2)

# Random permutations
ax = fig.add_subplot(gs[0, 0])
sampled = imp_results[~imp_results['Sampled backbone size'].isna()]
sampled = sampled.sort_values('Sampled backbone size')
PROPS = {'boxprops':{'facecolor':'none', 'edgecolor':'black'}, 'medianprops':{'color':'black'},
         'whiskerprops':{'color':'black'},'capprops':{'color':'black'}}
sns.boxplot(data=sampled, x='Sampled backbone size', y='Pearson correlation', color='white', 
            saturation=1, **PROPS, ax=ax)

# Ranked correlations of markers
ax = fig.add_subplot(gs[0, 1])
order = results.groupby('Marker').median().sort_values('Absolute Pearson correlation', ascending=True)
order = list(order.index)
cellText = [order, [str(1+i) for i in reversed(range(len(order)))]]
sns.barplot(data=results, x='Marker', y='Absolute Pearson correlation', color='#009cb4', order=order,
            capsize=.05, errwidth=1.25, saturation=1, estimator=median, errorbar=('pi', 50), ax=ax)
ax.set_ylabel('Absolute correlation with CD45RA')
ax.set_xlabel('')
ax.set_yticks([0.1, 0.2, 0.3, 0.4, 0.5])
ax.set_xticks([])
table = plt.table(cellText=cellText, rowLabels=['Marker', 'Rank'], bbox=[0, -0.22, 1, 0.2], 
          cellLoc='center', edges='open')
table.auto_set_font_size(False)
table.set_fontsize(10)

# Sort the data
imp_results = imp_results.sort_values(by="Backbone", key=lambda x: x.str.len())

ax = fig.add_subplot(gs[1, 0])
sns.barplot(data=imp_results[imp_results['Experiment']=='Ascending order'], 
            order=imp_results[imp_results['Experiment']=='Ascending order']['Backbone'].unique(),
            x='Backbone', y='Pearson correlation', color='#009cb4', capsize=.05, 
            errwidth=1.25, saturation=1, estimator=median, errorbar=('pi', 50), ax=ax)
descending_ranks = ['12', '11-12', '10-12', '9-12', '8-12', '7-12', '6-12', '5-12', '4-12', '3-12', '2-12', '1-12']
ax.set_xticklabels(descending_ranks)
ax.set_xlabel('Backbone composition (ranks)')

# Descending order
ax = fig.add_subplot(gs[1, 1])
sns.barplot(data=imp_results[imp_results['Experiment']=='Descending order'], 
            order=imp_results[imp_results['Experiment']=='Descending order']['Backbone'].unique(),
            x='Backbone', y='Pearson correlation', color='#009cb4', capsize=.05, 
            errwidth=1.25, saturation=1, estimator=median, errorbar=('pi', 50), ax=ax)
ax.set_xlabel('Rank')
ascending_ranks = ['1', '1-2', '1-3', '1-4', '1-5', '1-6', '1-7', '1-8', '1-9', '1-10', '1-11', '1-12']
ax.set_xticklabels(ascending_ranks)
ax.set_xlabel('Backbone composition (ranks)')

plt.subplots_adjust(wspace=0.2, hspace=0.3)