Author: Tim Mocking

Contact: t.r.mocking@amsterdamumc.nl

In [None]:
# File locations
imputed_path = ''
gt_path = ''
figures_path =  ''

In [None]:
# Import all relevant packages
import pandas as pd
import numpy as np
from fcsy import DataFrame
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns
from scipy.stats import gaussian_kde, pearsonr
from utils import load_data
%load_ext autoreload
%autoreload 2
plt.style.use('plotstyle.mplstyle')

In [None]:
data = load_data(gt_path, imputed_path)

# Figure 3

In [None]:
# Some global settings
bins = 200
cmap = "Spectral_r"
n = 10000

def plot_bivariate_density(ax, data, n, x, y, xlim, ylim, hide_x, hide_y, x_name, y_name):
    kernel = gaussian_kde(np.vstack([data[x].sample(n=10000, random_state=1), 
                                     data[y].sample(n=10000, random_state=1)]))
    c = kernel(np.vstack([data[x], data[y]]))
    ax.scatter(data[x], data[y], s=1, c=c, edgecolor='none', rasterized=True, 
                cmap=sns.color_palette("Spectral_r", as_cmap=True))
    # sns.histplot(data=data.sample(n=n), x=x, y=y, bins=bins, cmap=cmap, ax=ax)
    ax.set_xlabel(x_name)
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    if hide_x:
        ax.spines.bottom.set_visible(False)
        ax.axes.get_xaxis().set_visible(False)
    if hide_y:
        ax.spines.left.set_visible(False)
        ax.axes.get_yaxis().set_visible(False)
    else:
        ax.set_ylabel(y_name)


def plot_methods(data, patient_id):
    # Prepare data for plotting
    patient_data = data[data['sample_id'] == patient_id]
    infinicyt_data = patient_data.loc[(patient_data['method'] == 'Infinicyt') & (patient_data['imp_state'] == 1)]
    cytofmerge_data = patient_data.loc[(patient_data['method'] == 'CyTOFmerge') & (patient_data['imp_state'] == 1)]
    cycombine_data = patient_data.loc[(patient_data['method'] == 'cyCombine') & (patient_data['imp_state'] == 1)]
    cytobackbone_data = patient_data.loc[(patient_data['method'] == 'CytoBackBone') & (patient_data['imp_state'] == 1)]
    # We also get the gt data from infinicyt, this is arbitrary
    gt_data = patient_data.loc[(patient_data['method'] == 'Infinicyt') & (patient_data['imp_state'] == 0)]
    # We filter data that was not imputed in cyCombine
    cycombine_data = cycombine_data[cycombine_data['cyCombine_NA'] == False]

    # Set fluorochrome and marker nmae
    x = 'BUV737-A'
    x_name = 'CD4 (backbone)'
    y = 'FITC-A'
    y_name = 'CD57 (imputed)'
    xlim = (-2, 5.5)
    ylim = (-1.5, 7)

    # Other parameters
    n = len(cytobackbone_data)
    if len(cytobackbone_data) < n:
        print(True)
        n = len(cytobackbone_data)
    n = n - 1000
    
    # Set up plot
    cm = 1/2.54
    fig = plt.figure(figsize=(16*cm, 10*cm))
    fig = plt.figure(figsize=(16, 6))

    gs = fig.add_gridspec(nrows=4, ncols=8)
    # First row
    ax = fig.add_subplot(gs[0, 0])
    plt.title('Ground truth', fontsize=12)
    plot_bivariate_density(ax, gt_data, n, x, y, xlim, ylim, True, False, x_name, y_name)
    ax = fig.add_subplot(gs[0, 1])
    plt.title('Infinicyt',fontsize=12)
    plot_bivariate_density(ax, infinicyt_data[infinicyt_data['dataset']==2], 
                           n, x, y, xlim, ylim, True, True, x_name, y_name)
    ax = fig.add_subplot(gs[0, 2])
    plt.title('CyTOFmerge',fontsize=12)
    plot_bivariate_density(ax, cytofmerge_data[cytofmerge_data['dataset']==2], 
                           n, x, y, xlim, ylim, True, True, x_name, y_name)
    ax = fig.add_subplot(gs[0, 3])
    plt.title('cyCombine',fontsize=12)
    plot_bivariate_density(ax, cycombine_data[cycombine_data['dataset']==2], 
                           n, x, y, xlim, ylim, True, True, x_name, y_name)
    ax = fig.add_subplot(gs[0, 4])
    plt.title('CytoBackBone',fontsize=12)
    plot_bivariate_density(ax, cytobackbone_data, 
                           n, x, y, xlim, ylim, True, True, x_name, y_name)
    # Second row
    x = 'BUV737-A'
    x_name = 'CD4 (backbone)'
    y = 'PE-CF594-A'
    y_name = 'CD95 (imputed)'
    ylim = (0, 5.5)
    ax = fig.add_subplot(gs[1, 0])
    plot_bivariate_density(ax, gt_data, 
                           n, x, y, xlim, ylim, False, False, x_name, y_name)
    ax = fig.add_subplot(gs[1, 1])
    plot_bivariate_density(ax, infinicyt_data[infinicyt_data['dataset']==1], 
                           n, x, y, xlim, ylim, False, True, x_name, y_name)
    ax = fig.add_subplot(gs[1, 2])
    plot_bivariate_density(ax, cytofmerge_data[cytofmerge_data['dataset']==1], 
                           n, x, y, xlim, ylim, False, True, x_name, y_name)
    ax = fig.add_subplot(gs[1, 3])
    plot_bivariate_density(ax, cycombine_data[cycombine_data['dataset']==1], 
                           n, x, y, xlim, ylim, False, True, x_name, y_name)
    ax = fig.add_subplot(gs[1, 4])
    plot_bivariate_density(ax, cytobackbone_data, 
                           n, x, y, xlim, ylim, False, True, x_name, y_name)
    plt.subplots_adjust(wspace=0.2, hspace=0.05)

In [None]:
plot_methods(data, "08B_008")
plt.savefig(figures_path + 'Figure 3.png', dpi=300, bbox_inches='tight')
plt.savefig(figures_path + 'Figure 3.tiff', dpi=300, bbox_inches='tight')

# Figure 4

In [None]:
def density_kernel(x, y):
    kernel = gaussian_kde(np.vstack([
        x.sample(n=500, random_state=1), 
        y.sample(n=500, random_state=1)]))
    c = kernel(np.vstack([x, y]))
    ax.scatter(x, y, s=1, c=c, edgecolor='none', rasterized=True, 
                cmap=sns.color_palette("Spectral_r", as_cmap=True))

In [None]:
cm = 1/2.54
# fig = plt.figure(figsize=(16*cm, 10*cm))
fig = plt.figure(figsize=(8, 5))

gs = fig.add_gridspec(nrows=2, ncols=3, height_ratios=[0.7, 0.3])

ax = fig.add_subplot(gs[0, 0])
method_data = data[data['method']=='CyTOFmerge']
gt = method_data[method_data['imp_state'] == 0]
imp = method_data[method_data['imp_state'] == 1]
gt['PE-CF594-A_imp'] = list(imp['PE-CF594-A'])
gt = gt[gt['dataset']==1]
density_kernel(x=gt['PE-CF594-A'], y=gt['PE-CF594-A_imp'])

ax.plot([0,1],[0,1], transform=plt.gca().transAxes, color='black', linestyle='--')
ax.set_xlim(-1.2, 5.5)
ax.set_ylim(-1.2, 5.5)
ax.set_title('CD95 expression', fontweight='bold')
ax.set_ylabel('Imputed')
ax.set_xlabel('Ground truth')
# plt.text(-2.5, 5.4, 'A', fontweight='bold', fontsize=16)

ax = fig.add_subplot(gs[1, 0])
sns.histplot(data=method_data[method_data['dataset']==1], x='PE-CF594-A', hue='imp_state', element='step', 
             stat="count", fill=False, bins=250, ax=ax, legend=False, palette=['#003a44', '#ee1d24'],
             hue_order=[0, 1], rasterized=True)
ax.set_ylabel('Count')
ax.set_yticks([])
ax.set_xlabel('CD95 expression')
ax.set_xlim(-1.2, 5.5)

ax = fig.add_subplot(gs[0, 1])
patient_id = "15B_014"
method_data = data[data['method']=='cyCombine']
method_data = method_data[method_data['cyCombine_NA']==False]
gt = method_data[method_data['imp_state'] == 0]
imp = method_data[method_data['imp_state'] == 1]
gt['FITC-A_imp'] = list(imp['FITC-A'])
gt = gt[gt['dataset']==2]
density_kernel(x=gt['FITC-A'], y=gt['FITC-A_imp'])
ax.set_xlim(-2, 7)
ax.set_ylim(-2, 7)
ax.plot([0,1],[0,1], transform=plt.gca().transAxes, color='black', linestyle='--')
ax.set_title('CD57 expression', fontweight='bold')
ax.set_ylabel('Imputed')
ax.set_xlabel('Ground truth')
# plt.text(-3.8, 6.8, 'B', fontweight='bold', fontsize=16)

ax = fig.add_subplot(gs[1, 1])
sns.histplot(data=method_data[method_data['dataset']==2], x='FITC-A', hue='imp_state', element='step', 
             stat="count", fill=False, bins=250, ax=ax, legend=False, palette=['#003a44', '#ee1d24'],
             hue_order=[0, 1], rasterized=True)
ax.set_ylabel('Count')
ax.set_yticks([])
ax.set_xlabel('CD57 expression')
ax.set_xlim(-2, 7)
legend_elements = [Line2D([0], [0], label='Ground truth', lw=4, color='#003a44'),
                   Line2D([0], [0], label='Imputed', lw=4, color='#ee1d24')]
ax.legend(handles=legend_elements, bbox_to_anchor=(1.2, -0.5), facecolor='white', framealpha=1, ncol=2)

ax = fig.add_subplot(gs[0, 2])
method_data = data[data['method']=='cyCombine']
method_data = method_data[method_data['cyCombine_NA']==False]
gt = method_data[method_data['imp_state'] == 0]
imp = method_data[method_data['imp_state'] == 1]
gt['BV711-A_imp'] = list(imp['BV711-A'])
gt = gt[gt['dataset']==1]
density_kernel(x=gt['BV711-A'], y=gt['BV711-A_imp'])
ax.plot([0,1],[0,1], transform=plt.gca().transAxes, color='black', linestyle='--')
ax.set_xlim(-1.75, 2.75)
ax.set_ylim(-1.75, 2.75)
ax.set_title('TIM-3 expression', fontweight='bold')
ax.set_ylabel('Imputed')
ax.set_xlabel('Ground truth')
# plt.text(-2.6, 2.65, 'C', fontweight='bold', fontsize=16)

ax = fig.add_subplot(gs[1, 2])
sns.histplot(data=method_data[method_data['dataset']==1], x='BV711-A', hue='imp_state', element='step', 
             stat="count", fill=False, bins=250, ax=ax, legend=False, palette=['#003a44', '#ee1d24'],
             hue_order=[0, 1], rasterized=True)
ax.set_ylabel('Count')
ax.set_yticks([])
ax.set_xlabel('TIM-3 expression')
ax.set_xlim(-2, 2.8)

plt.subplots_adjust(wspace=0.5, hspace=0.35)

plt.savefig(figures_path + 'Figure 4.png', dpi=300, bbox_inches='tight')
plt.savefig(figures_path + 'Figure 4.tiff', dpi=300, bbox_inches='tight')