In [None]:
# Import all relevant packages
import os
import argparse
import pandas as pd
import numpy as np
from fcsy import DataFrame
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from matplotlib.colors import ListedColormap
import seaborn as sns
from statistics import mean, median, stdev
from scipy.stats import iqr
from math import log2
from sklearn.metrics import confusion_matrix, recall_score, precision_score, f1_score
import umap
plt.style.use('plotstyle.mplstyle')

In [None]:
imputed_path = ""

In [None]:
order = ['CyTOFmerge', 'CytoBackBone', 'cyCombine', 'Infinicyt']
scores = []

for name in order:
    print(name)
    # Load the concatenated expression data
    exprs = DataFrame.from_fcs(imputed_path + 'Mosmann_rare_' + name + '_exprs.fcs')
    exprs = exprs.reset_index(drop=True)
    gates = pd.read_csv(imputed_path + name + '_labels.csv', index_col=0)
    gates = gates.reset_index(drop=True)
    exprs = pd.concat([exprs, gates], axis=1)
    
    subset = exprs[exprs['/CD14_CD3pos']==1]
    subset = subset[subset['/CD14_CD3pos/CD8neg_CD4pos']==1]
    subset = subset[subset['/CD14_CD3pos/CD8neg_CD4pos/CD45RAneg_CD69pos']==1]

    # Populations
    pops = ['/CD14_CD3pos/CD8neg_CD4pos/CD45RAneg_CD69pos/TNFapos_IFNgneg']
    
    if name != 'CytoBackBone':
        gt_data = subset[subset['imp_state']==0]
        gt_data = gt_data.sort_values('original_ID')
        imp_data = subset[subset['imp_state']==1]
        imp_data = imp_data.sort_values('original_ID')
    
    for gate in pops:
        gt_filtered = gt_data.copy()
        imp_filtered = imp_data.copy()
        # Recursively apply the gates
        g = ''
        for subgate in gate.split('/'):
            if subgate == '':
                continue
            g = g + '/' + subgate
            
            # Filter the cells
            gt_filtered = gt_filtered[gt_filtered[g]==1]
            imp_filtered = imp_filtered[imp_filtered[g]==1]

        gt_map = {}
        imp_map = {}
        for i in gt_filtered['original_ID']:
            gt_map[i] = 1
        for i in imp_filtered['original_ID']:
            imp_map[i] = 1
        
        for i in gt_data[~gt_data['original_ID'].isin(gt_filtered['original_ID'])]['original_ID']:
            gt_map[i] = 0
        for i in imp_data[~imp_data['original_ID'].isin(imp_filtered['original_ID'])]['original_ID']:
            imp_map[i] = 0
        
        gate = gate.split('/')[-1]
        gt_data[gate] = gt_data['original_ID'].map(gt_map)
        imp_data[gate] = imp_data['original_ID'].map(imp_map)

        gt_temp = gt_data.copy()
        imp_temp = imp_data.copy()
        
        # Store counts/proportions
        gt_count = len(gt_temp[gt_temp[gate]==1])
        gt_prop = gt_count / len(gt_temp)
        imp_count = len(imp_temp[imp_temp[gate]==1])
        imp_prop = imp_count / len(imp_temp)
        if gt_count != 0:
            perc_change = ((imp_prop - gt_prop) / gt_prop) * 100
        else:
            perc_change = None
            
        sensitivity = None
        specificity = None
        f1 = None
        if name != 'CytoBackBone':
            # Calculate classification statistics if sufficient cells are present
            if len(gt_temp[gt_temp[gate]==1]) != 0 and len(imp_temp[imp_temp[gate]==1]) != 0:
                sensitivity = recall_score(gt_temp[gate], imp_temp[gate], pos_label=1)
                specificity = recall_score(gt_temp[gate], imp_temp[gate], pos_label=0)
                f1 = f1_score(gt_temp[gate], imp_temp[gate], pos_label=1)
            elif len(gt_temp[gt_temp[gate]==1]) != 0 and len(imp_temp[imp_temp[gate]==0]) == 0:
                sensitivity = 0
                specificity = 0
                f1 = 0

        # Save results
        result = {'Method': name,
                  'Population': gate.split('/')[-1],
                  'Sensitivity': sensitivity,
                  'Specificity': specificity,
                  'F-score': f1,
                  'Ground-truth count': gt_count,
                  'Ground-truth proportion': gt_prop,
                  'Imputed count': imp_count,
                  'Imputed proportion': imp_prop,
                  '% Change': perc_change}
        scores.append(result)
                              
statistics = pd.DataFrame(scores)