Author: Tim Mocking

Contact: t.r.mocking@amsterdamumc.nl

In [None]:
# Import all relevant packages
import os
import argparse
import pandas as pd
import numpy as np
from fcsy import DataFrame
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns
from sklearn.metrics import recall_score
from matplotlib.patches import Patch
from matplotlib.lines import Line2D
plt.style.use('plotstyle.mplstyle')

In [None]:
# File locations
kNN_path = ''
imp_path = ''
Aggregate_path = ''
figures_path =  ''

In [None]:
populations = ['CD27-, TIGIT subset',
               'SCM',
               'TIGIT+, KLRG1+ CM',
               'TIM3 positive EM CD4',
               'CD57, CD28 subset',
               'KLRG1, TIGIT subset']

rename = {'TIM3 positive EM CD4+': 'TIM-3+ \n CD4+ $T_{EM}$',
          'TIM3 positive EM CD4-': 'TIM-3- \n CD4+ $T_{EM}$',
          'SCM+': 'CD95+ \n CD4+ $T_{SCM}$',
          'SCM-': 'CD95- \n CD4+ $T_{SCM}$',
          'TIGIT+, KLRG1+ CM+': 'TIGIT+/KLRG1+ \n CD8+ $T_{CM}$',
          'TIGIT+, KLRG1+ CM-': 'TIGIT- and/or KLRG1- \n CD8+ $T_{CM}$',
          
          'CD57, CD28 subset-': 'CD57- and/or CD28+ CD8+ $T_{cell}$',
          'KLRG1, TIGIT subset-': 'KLRG1- and/or TIGIT- CD8+/CD57+/CD28- $T_{cell}$',
          'CD27-, TIGIT subset+': 'CD27- CD8+/CD57+/CD28-/KLRG1+/TIGIT+ ($T_{SN}$)',
          'CD27-, TIGIT subset-': 'CD27+ CD8+/CD57+/CD28-/KLRG1+/TIGIT+ $T_{cell}$'}

subsets = {'CD4+ $T_{EM}$': ['TIM-3+ \n CD4+ $T_{EM}$', 
                             'TIM-3- \n CD4+ $T_{EM}$'],
           'CD4+ $T_{SCM}$': ['CD95+ \n CD4+ $T_{SCM}$', 
                              'CD95- \n CD4+ $T_{SCM}$'],
           'CD8+ $T_{CM}$': ['TIGIT+/KLRG1+ \n CD8+ $T_{CM}$', 
                             'TIGIT- and/or KLRG1- \n CD8+ $T_{CM}$'],
           'CD8+ $T_{SN}$': ['CD27- CD8+/CD57+/CD28-/KLRG1+/TIGIT+ ($T_{SN}$)',
                            'CD27+ CD8+/CD57+/CD28-/KLRG1+/TIGIT+ $T_{cell}$']}

In [None]:
dfs = []

for root, dirs, files in os.walk(kNN_path):
    for file in files:
        r = pd.read_csv(root+file, index_col=0)
        # The R-data is not shuffled
        dataset1 = r[:int((len(r)/2))]
        dataset2 = r[int((len(r)/2)):]
        # Use PE-A (CD28) for dataset1
        ds1_counts = pd.DataFrame(dataset1['PE-A'].value_counts())
        ds1_counts = ds1_counts.rename(columns={'PE-A':'Duplicates'})
        ds1_counts['dataset'] = 1
        # Use FITC-A (CD57) for dataset2
        ds2_counts = pd.DataFrame(dataset2['FITC-A'].value_counts())
        ds2_counts = ds2_counts.rename(columns={'FITC-A':'Duplicates'})
        ds2_counts['dataset'] = 2
        combined = pd.concat([ds1_counts, ds2_counts])
        combined['Patient'] = file[:7]
        combined['Method'] = 'R (FNN)'
        dfs.append(combined)


for root, dirs, files in os.walk(imp_path):
    for file in files:
        if file.endswith('Infinicyt_exprs.fcs'):
            infinicyt = DataFrame.from_fcs(root+'/'+file)
            infinicyt = infinicyt[infinicyt['imp_state']==1]
            dataset1 = infinicyt[infinicyt['dataset']==1]
            dataset2 = infinicyt[infinicyt['dataset']==2]
            
            # Use BV786-A (CD27) for dataset1
            ds1_counts = pd.DataFrame(dataset1['BV786-A'].value_counts())
            ds1_counts = ds1_counts.rename(columns={'BV786-A':'Duplicates'})

            # Use FITC-A (CD57) for dataset2
            ds2_counts = pd.DataFrame(dataset2['FITC-A'].value_counts())
            ds2_counts = ds2_counts.rename(columns={'FITC-A':'Duplicates'})

            combined = pd.concat([ds1_counts, ds2_counts])            
            combined['Patient'] = file[:7]
            combined['Method'] = 'Infinicyt'
            dfs.append(combined)
            
df = pd.concat(dfs)

In [None]:
exprs = DataFrame.from_fcs(agg_path + 'Flow_Infinicyt_agg.fcs')
exprs = exprs.reset_index(drop=True)
gates = pd.read_csv(agg_path + 'Flow_Infinicyt_labels.csv', index_col=0)

gates = gates.reset_index(drop=True)
exprs = pd.concat([exprs, gates], axis=1)
exprs['Population'] = [i.split('/')[-1] for i in exprs['V1']]

flow_variable1 = ['FITC-A', 'APC-A', 'BV605-A', 'BV786-A']
flow_variable2 = ['PE-A', 'PE-CF594-A', 'BV711-A', 'PC7-A']
variable = flow_variable1 + flow_variable2

for i in populations:
    exprs['Population'] = exprs['Population'].replace(i+'-', rename[i+'-'])
    try:
        exprs['Population'] = exprs['Population'].replace(i+'+', rename[i+'+'])
    except KeyError:
        continue

results = []
for file in exprs['File'].unique():
    subset = exprs[exprs['File']==file]
    gt_data = subset[subset['imp_state']==0]
    imp_data = subset[subset['imp_state']==1]
    imp_data = imp_data.sort_values('original_ID')
    gt_data = gt_data.sort_values('original_ID')

    for subset in subsets:
        gt_temp = gt_data[gt_data['Population'] == subsets[subset][0]]
        imp_temp = imp_data[imp_data['Population'] == subsets[subset][0]]
        for channel in variable:
            gt_MFI = gt_temp[channel].mean()
            imp_MFI = imp_temp[channel].mean()
            results.append({'File':file,
                            'Population': subset,
                            'Channel': channel,
                            'GT MFI': gt_MFI,
                            'IMP MFI': imp_MFI})
data = pd.DataFrame(results)

In [None]:
data = data[data['Population']=='CD8+ $T_{SN}$']
cols = ['FITC-A', 'PE-A', 'APC-A', 'PC7-A', 'BV786-A']
data = data[data['Channel'].isin(cols)]

In [None]:
file3 = exprs[exprs['File']==3]
file3 = file3[file3['Population']=='CD27- CD8+/CD57+/CD28-/KLRG1+/TIGIT+ ($T_{SN}$)']

fig = plt.figure(figsize=(9, 3))
gs = fig.add_gridspec(nrows=1, ncols=3)

ax = fig.add_subplot(gs[0, 0])
palette = ['dimgray', '#f47f2a']
order = ['R (FNN)', 'Infinicyt']
df = df[df['Duplicates']!= 1]
sns.boxplot(data=df, y='Duplicates', x='Method', order=order, palette=palette, saturation=1, ax=ax)
ax.set_yscale('log')
ax.set_ylabel('Number of duplicate cells')
ax.set_xlabel('')

ax = fig.add_subplot(gs[0, 1])
melt = pd.melt(data, id_vars=['File', 'Population', 'Channel'], value_vars=['GT MFI', 'IMP MFI'])
channel = 'PE-A'
for file in melt['File'].unique():
    temp = melt[melt['File']==file]
    sns.pointplot(data=temp[temp['Channel']==channel], x='variable', y='value', ax=ax, 
                  color='black', scale=0.5)
sns.boxplot(data=melt[melt['Channel']==channel], x='variable', y='value', ax=ax, 
            saturation=1, palette=['dimgray', '#f47f2a'])
ax.set_xlabel('')
ax.set_ylabel('CD28 MFI on senescent T-cells')
ax.set_xticklabels(['Ground truth', 'Infinicyt imputed'], rotation=15, ha='right')
ax.set_ylim(-0.7, 0.5)
circle = plt.Circle((1, 0.4), 0.08, color='r', fill=False)
ax.add_patch(circle)

ax = fig.add_subplot(gs[0, 2])
sns.scatterplot(data=file3, x='BUV395-A', y='PE-A', color='black', s=10, 
                palette=['dimgray', '#f47f2a'], hue='imp_state')
ax.set_ylabel('CD28 expression')
ax.set_xlabel('CD3 expression')
ax.set_title('CD8+ Senescent T-cells')
legend_elements = [Line2D([0], [0], marker='o', color='w', label='Ground truth',
                          markerfacecolor='dimgray', markersize=10),
                      Line2D([0], [0], marker='o', color='w', label='Infinicyt imputed',
                          markerfacecolor='#f47f2a', markersize=10)]
ax.legend(handles=legend_elements, loc='lower center', facecolor='white', framealpha=1)

plt.subplots_adjust(wspace=0.4, hspace=0)

plt.savefig(figures_path + 'Supplemental Figure 4.png', dpi=300, bbox_inches='tight')
plt.savefig(figures_path + 'Supplemental Figure 4.tiff', dpi=300, bbox_inches='tight')