# Analysis Part V - Figures for Paper

In [None]:
%load_ext autoreload
%matplotlib inline

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings(action='ignore')
import os
import scanpy as sc
import scirpy as ir
import anndata as ann
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib
import matplotlib.backend_bases
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mudata import MuData
import mudata

import tarfile
import warnings
from glob import glob

import anndata
import muon as mu
import pandas as pd
import scanpy as sc
import scirpy as ir

%autoreload 2
import sys
sys.path.append('..')
import utility.annotation as utils_annotation
import utility.representation as utils_representation
import utility.visualisation as utils_vis

from sklearn.mixture import GaussianMixture

from scipy.stats import zscore

In [None]:
sc.settings.set_figure_params(dpi=150)
sc.settings.verbosity = 3
sc.set_figure_params(vector_friendly=True, color_map='viridis', transparent=False)
sb.set_style('whitegrid')

from matplotlib.colors import LinearSegmentedColormap
colors = ['darkblue', 'blue', 'lightblue', '#FFEA00', 'orange', 'red', 'darkred']
cust = LinearSegmentedColormap.from_list('custom_cmap', colors)
colormap = cust

In [None]:
colors_viridis = ['#FDE725', '#5EC962', '#21918C', '#3B528B', '#440154']
cust_viridis = LinearSegmentedColormap.from_list('custom_cmap', colors_viridis)

In [None]:
colors_katha = ['#33378F', '#669AC7', '#BFE0EC', '#FDF8C0', '#FBBC6C', '#EB5638', '#A71D2B']
cust_katha = LinearSegmentedColormap.from_list('custom_cmap', colors_katha)

In [None]:
mdata = mu.read('/Users/mimi/Sina/data_specificity_annotated_final_pseudotime_cite.h5mu')

## Figure 1

### UMAP showing leiden clustering

In [None]:
sb.set(rc={'figure.figsize':(6,5)})
sb.set_style('whitegrid')
sc.pl.umap(mdata["gex"], color=['leiden'], show=False, size=30)
plt.tight_layout()
plt.savefig("Figure1/UMAP_leiden_all.pdf", dpi=300)
plt.show()

### Gene expression heatmap

In [None]:
#top 20 deregulated genes per cluster
marker = pd.DataFrame(mdata["gex"].uns['rank_genes_groups_leiden']['names'])
marker.to_csv('Figure1/Top20_DEG_genes_per_cluster.csv')

In [None]:
markers_short = {'Naive': ['RPL32', 'RPS13', 'SELL'],
              'CM': ['IL7R', 'PABPC1', 'RPLP1', 'RPS12', 'FTH1', 'S100A11', 'RPL8'],
              'EM': ['GZMK', 'HLA-A', 'CLIC3', 'TRAV12-2'],
              'EF': ['APOBEC3G', 'GZMH', 'GNLY', 'GZMB'],
              'IFN EF': ['IFITM1', 'LY6E', 'ISG20', 'IFI6', 'MX1'],
              'Cycling': ['CD74', 'ACTG1', 'ACTB', 'PFN1'],
              'Resting memory': ['LTB', 'EIF1', 'NFKB2', 'ZFAS1', 'BTG1', 'RELB', 'CREM'] }

genes = ['SELL', 'LTB', 'CCR7',	'TCF7', 'IL7R',	'CXCR3', 'GZMK', 'HLA-DRB1', 'GZMA', 'NKG7', 'GZMH', 'GZMB', 'FGFBP2',
         'CCL5', 'LY6E', 'IFITM1', 'IRF7', 'MX1', 'JUNB', 'FOS', 'JAML', 'KLRB1', 'ALOX5AP', 'CREM', 'SNHG15', 'SNHG1', 'NFKBIA', 'PIM3']

sc.pl.dotplot(mdata["gex"], genes, groupby='leiden', dendrogram=False,
             standard_scale='var', cmap=cust_katha, show=False)
plt.savefig("Figure1/DEG_Dotplot_marker_genes_labelled_v1.pdf", dpi=300)

sc.pl.dotplot(mdata["gex"], markers_short, groupby='leiden', dendrogram=False,
             standard_scale='var', var_group_positions=[(0,3)], cmap=cust_katha, show=False)
plt.savefig("Figure1/DEG_Dotplot_marker_genes_labelled_v2.pdf", dpi=300)

### Specific cells over time - UMAP

In [None]:
for time in (mdata['gex'].obs['time']).unique():
    ax = sc.pl.umap(mdata["gex"], show=False, size=30)
    sc.pl.umap(mdata["gex"][(mdata["gex"].obs['specific_new']=='NS4B214') & (mdata["gex"].obs['time']==time)], 
                       color='specific_new', ax = ax, show=False, size=30)
    plt.title('NS4B214 '+str(time))
    plt.tight_layout()
    plt.savefig(f'Figure1/Specific_cells_NS4B214_{time}.pdf', dpi=300)
    plt.figsize=(10,10)
    plt.show()

In [None]:
pal = {'NS4B214': 'blue', 'NS2B117': 'lightblue', 'NS3293': 'lightblue', 'NS3286': 'lightblue',
       'NS324': 'lightblue', 'NS5672': 'lightblue', 'NS2A97': 'lightblue', 'NS4B165': 'lightblue'}

In [None]:
for time in (mdata['gex'].obs['time']).unique():
    ax = sc.pl.umap(mdata["gex"], show=False, size=30)
    sc.pl.umap(mdata["gex"][(mdata["gex"].obs['specific_new'].isin(['NS4B214', 'NS2B117', 'NS3293', 'NS3286', 'NS324', 'NS5672',
       'NS2A97', 'NS4B165'])) & (mdata["gex"].obs['time']==time)], color='specific_new', ax = ax, show=False, size=30)
    plt.title('Specific cells '+str(time))
    plt.tight_layout()
    plt.savefig(f'Figure1/Specific_cells_all_YF_{time}.pdf', dpi=300)
    plt.figsize=(10,10)
    plt.show()

In [None]:
for time in (mdata['gex'].obs['time']).unique():
    ax = sc.pl.umap(mdata["gex"], show=False, size=30)
    sc.pl.umap(mdata["gex"][(mdata["gex"].obs['specific_new'].isin(['NS4B214', 'NS2B117', 'NS3293', 'NS3286', 'NS324', 'NS5672',
       'NS2A97', 'NS4B165'])) & (mdata["gex"].obs['time']==time)], color='specific_new', ax = ax, show=False, size=30, palette=pal)
    plt.title('Specific cells '+str(time))
    plt.tight_layout()
    plt.savefig(f'Figure1/Specific_cells_all_YF_{time}_different_color.pdf', dpi=300)
    plt.figsize=(10,10)
    plt.show()

### NS4B214 specific cells over time per leiden cluster

In [None]:
table = pd.DataFrame(mdata["gex"].obs.groupby(['time', 'leiden'])['specific_new'].value_counts().reset_index())

In [None]:
table = table[(table['specific_new']=='NS4B214') & (table['count']!=0)]

In [None]:
table['percent_cluster']=0
for i in table.index:
    table['percent_cluster'][i] = table['count'][i]*100/sum(table['count'][table['time']==table['time'][i]])

In [None]:
table.to_csv('Figure1/specific_cells_per_cluster_over_time.csv')

### Pseudotime

In [None]:
utils_representation.calculate_diffmap(mdata["gex"], n_high_var=5000, remove_tcr_genes=True)

In [None]:
nrows = 20
ncols = 5
fig, axes = plt.subplots(ncols=ncols, nrows=nrows, figsize=(ncols * 3, nrows * 3))
axes = axes.reshape(-1)

for i, ax in zip(range(mdata['gex'].obsm['X_diffmap'].shape[1]), axes):
    root_ixs = mdata['gex'].obsm['X_diffmap'][:, i].argmin()
    root_umap = mdata['gex'].obsm['X_umap'][root_ixs]

    sc.pl.umap(mdata['gex'], show=False, title=str(i), ax=ax)
    ax.plot(root_umap[0], root_umap[1],  marker='o', markersize=5, color="red")
fig.tight_layout()
plt.show()

In [None]:
root_nr = 69
root_ixs = mdata['gex'].obsm['X_diffmap'][:, root_nr].argmin()
mdata['gex'].uns['iroot'] = root_ixs
utils_representation.calculate_dpt(mdata['gex'], n_high_var=5000, remove_tcr_genes=True)
mdata.obs['dpt_pseudotime'] = mdata['gex'].obs['dpt_pseudotime']
mdata["gex"].obs['dpt_pseudotime'] = mdata.obs['dpt_pseudotime']

In [None]:
sc.pl.umap(mdata["gex"], color='dpt_pseudotime', show=False, size=30, cmap=cust_katha, vmax=0.5)
plt.title('Pseudotime')
plt.tight_layout()
plt.savefig(f'Figure1/UMAP_Pseudotime_root_69_cluster_4_scale_05_katha_smaller_dots.pdf', dpi=300)
plt.show()

In [None]:
#UMAP without cluster 7, 8, 12, 13
ax = sc.pl.umap(mdata["gex"], show=False, size=30)
sc.pl.umap(mdata["gex"][mdata['gex'].obs['leiden'].isin(['0', '1', '2', '3', '4', '5', '6', '9', '10', '11'])], color='dpt_pseudotime', show=False, size=30, cmap=cust_katha, vmax=0.5, ax=ax)
plt.title('Pseudotime')
plt.tight_layout()
plt.savefig(f'Figure1/UMAP_Pseudotime_root_69_cluster_4_selected_clusters.pdf', dpi=300)
plt.show()

In [None]:
sc.pl.violin(mdata['gex'][mdata['gex'].obs['leiden'].isin(['0', '1', '2', '3', '4', '5', '6', '9', '10', '11'])], 
             keys='dpt_pseudotime', groupby='leiden', show=False,
            order = ['4', '10', '9', '3', '11', '2', '1', '5', '6', '0'])
plt.savefig(f'Figure1/Violin_Pseudotime_root_69_cluster_4_selected_clusters.pdf', dpi=300)
plt.show()

In [None]:
x = mdata["gex"][(mdata["gex"].obs['specific_new']=='NS4B214')&(mdata['gex'].obs['leiden'].isin(['0', '1', '2', '3', '4', '5', '6', '9', '10', '11']))]
ax = sc.pl.violin(x, keys='dpt_pseudotime', groupby='time', show=False, rotation=90, jitter=0.1, scale='area')
ax.set_ylim(bottom=0,top=0.65)
plt.savefig(f'Figure1/Time_vs_Pseudotime_NS4B214_specific_root_69_cluster_4_selected_clusters_violin_scale.pdf', dpi=300)
plt.show()

In [None]:
x = mdata["gex"].obs[(mdata["gex"].obs['specific_new']=='NS4B214')&(mdata['gex'].obs['leiden'].isin(['0', '1', '2', '3', '4', '5', '6', '9', '10', '11']))]
ax = sb.stripplot(data=x, y='dpt_pseudotime', x='time')
ax.set_ylim(bottom=0,top=0.65)
plt.savefig(f'Figure1/Time_vs_Pseudotime_NS4B214_specific_root_69_cluster_4_selected_clusters_stripplot.pdf', dpi=300)
plt.show()

### Cite-Seq Gating strategy

In [None]:
#kdeplot of CD45RA vs CD62L
sb.kdeplot(data=mdata["gex"].obs, x=mdata["gex"].obs['clr_CD62L'], y=mdata["gex"].obs['clr_CD45RA'], 
               legend=False, fill=False, color='grey', levels=30, bw_adjust=0.6)

a = (len(mdata["gex"].obs[(mdata["gex"].obs['clr_CD45RA'] > 1.3) & (mdata["gex"].obs['clr_CD62L'] <=1.6)])/len(mdata["gex"].obs))*100
b = (len(mdata["gex"].obs[(mdata["gex"].obs['clr_CD45RA'] > 1.3) & (mdata["gex"].obs['clr_CD62L'] > 1.6)])/len(mdata["gex"].obs))*100
c = (len(mdata["gex"].obs[(mdata["gex"].obs['clr_CD45RA'] <=1.3) & (mdata["gex"].obs['clr_CD62L'] > 1.6)])/len(mdata["gex"].obs))*100
d = (len(mdata["gex"].obs[(mdata["gex"].obs['clr_CD45RA'] <=1.3) & (mdata["gex"].obs['clr_CD62L'] <=1.6)])/len(mdata["gex"].obs))*100

plt.axvline(x=1.6, color='black', linestyle='dashed')
plt.axhline(y=1.3, color='black', linestyle='dashed')
plt.xlabel('CD62L')
plt.ylabel('CD45RA')
plt.xlim(right=4)
plt.ylim(-0.4,3)

plt.text(-0.25, 2.75, f'{a:.2f}%', bbox=dict(facecolor='white', alpha=0, edgecolor='black'))
plt.text(2.8, 2.75, f'{b:.2f}%', bbox=dict(facecolor='white', alpha=0, edgecolor='black'))
plt.text(2.8, -0.35, f'{c:.2f}%', bbox=dict(facecolor='white', alpha=0, edgecolor='black'))
plt.text(-0.25, -0.35, f'{d:.2f}%', bbox=dict(facecolor='white', alpha=0, edgecolor='black'))

plt.savefig(f'Figure1/Gating_strategy_CD45RA_CD62L_contour_FACS_v3.pdf', dpi=300)

In [None]:
#kdeplot of CD45RA vs CD62L for Donor D1/D2 at all times or d14

#subset data accordingly:
#x = mdata["gex"].obs[mdata["gex"].obs['donor']=='D1']
#x = mdata["gex"].obs[mdata["gex"].obs['donor']=='D2']
#x = mdata["gex"].obs[(mdata["gex"].obs['donor']=='D1') & (mdata["gex"].obs['time']=='d14')]
#x = mdata["gex"].obs[(mdata["gex"].obs['donor']=='D2') & (mdata["gex"].obs['time']=='d14')]
#x = mdata["gex"].obs[(mdata["gex"].obs['time']=='d14')]
x = mdata["gex"].obs[(mdata["gex"].obs['time']=='d365')]

sb.kdeplot(data=x, x=x['clr_CD62L'], y=x['clr_CD45RA'], 
               legend=False, fill=False, color='grey', levels=30, bw_adjust=0.6)

a = (len(x[(x['clr_CD45RA'] > 1.3) & (x['clr_CD62L'] <=1.6)])/len(x))*100
b = (len(x[(x['clr_CD45RA'] > 1.3) & (x['clr_CD62L'] > 1.6)])/len(x))*100
c = (len(x[(x['clr_CD45RA'] <=1.3) & (x['clr_CD62L'] > 1.6)])/len(x))*100
d = (len(x[(x['clr_CD45RA'] <=1.3) & (x['clr_CD62L'] <=1.6)])/len(x))*100

plt.axvline(x=1.6, color='black', linestyle='dashed')
plt.axhline(y=1.3, color='black', linestyle='dashed')
plt.xlabel('CD62L')
plt.ylabel('CD45RA')
plt.xlim(right=4)
plt.ylim(-0.4,3)

plt.text(-0.25, 2.75, f'{a:.2f}%', bbox=dict(facecolor='white', alpha=0, edgecolor='black'))
plt.text(2.8, 2.75, f'{b:.2f}%', bbox=dict(facecolor='white', alpha=0, edgecolor='black'))
plt.text(2.8, -0.35, f'{c:.2f}%', bbox=dict(facecolor='white', alpha=0, edgecolor='black'))
plt.text(-0.25, -0.35, f'{d:.2f}%', bbox=dict(facecolor='white', alpha=0, edgecolor='black'))

#plt.savefig(f'Figure1/Gating_strategy_CD45RA_CD62L_contour_FACS_v3_D1.pdf', dpi=300)
#plt.savefig(f'Figure1/Gating_strategy_CD45RA_CD62L_contour_FACS_v3_D2.pdf', dpi=300)
#plt.savefig(f'Figure1/Gating_strategy_CD45RA_CD62L_contour_FACS_v3_D1_day14.pdf', dpi=300)
#plt.savefig(f'Figure1/Gating_strategy_CD45RA_CD62L_contour_FACS_v3_D2_day14.pdf', dpi=300)
#plt.savefig(f'Figure1/Gating_strategy_CD45RA_CD62L_contour_FACS_v3_day14.pdf', dpi=300)
plt.savefig(f'Figure1/Gating_strategy_CD45RA_CD62L_contour_FACS_v3_day365.pdf', dpi=300)

In [None]:
#Histograms for CD95
#pregating on CD45RA and CD62L
adata = mdata['gex'][(mdata['gex'].obs['clr_CD45RA']>1.3)&(mdata['gex'].obs['clr_CD62L']>1.6)]
sb.kdeplot(data=adata.obs, 
            x=adata.obs['clr_CD95'], bw_adjust=0.2, fill=True, color='#d4d4d4')
plt.axvline(x=1, color='black', linestyle='dashed')
plt.savefig(f'Figure1/Gating_strategy_CD95_histogram_FACS_v3_pregated.pdf', dpi=300)
plt.show()

#### Table FACS Phenotypes over time - specific cells

In [None]:
table = pd.DataFrame(mdata["gex"].obs.groupby(['time', 'FACS_Phenotype_v3'])['specific_new'].value_counts().reset_index())

In [None]:
table = table[(table['specific_new']=='NS4B214') & (table['count']!=0)]

In [None]:
table['percent_FACS']=0
for i in table.index:
    table['percent_FACS'][i] = table['count'][i]*100/sum(table['count'][table['time']==table['time'][i]])

In [None]:
table.to_csv('Figure1/specific_cells_per_FACS_Phenotype_v3_over_time.csv')

#### FACS Phenotypes over time (d0, d14, d365, dx) UMAP - specific cells

In [None]:
pal = {'N': 'black',
      'SCM': '#90bff9',
      'CM': '#0e4d92',
      'EM': '#21918c',
      'EF': '#00c000'}

In [None]:
for time in ['d0', 'd14', 'd365', 'dx']:
    for ep in ['NS4B214']:
        ax = sc.pl.umap(mdata["gex"], show=False, size=30)
        sc.pl.umap(mdata["gex"][(mdata["gex"].obs['time']==time)&((mdata["gex"].obs['specific_new']=='NS4B214'))], 
                       color='FACS_Phenotype_v3', ax = ax, show=False, size=30, palette=pal)
        plt.title(str(time))
        plt.tight_layout()
        plt.savefig(f'Figure1/Specific_cells_NS4B214_{time}_colored_by_FACS.pdf', dpi=300)
        plt.show()

## Figure 2

### Protein versus RNA 

In [None]:
#CD45RA - specific cells
ax = sc.pl.umap(mdata["gex"], size=30, cmap=cust_katha, show=False)
sc.pl.umap(mdata["gex"][(mdata['gex'].obs['specific_new']=='NS4B214')], color='clr_CD45RA',
           size=30, cmap=cust_katha, show=False, ax = ax)
plt.savefig(f'Figure2/CD45RA_protein_expression_UMAP_specific_cells_kathas_color.pdf', dpi=600)

#CD45RA - all cells
sc.pl.umap(mdata["gex"], color='clr_CD45RA',
           size=30, cmap=cust_katha, show=False)
plt.savefig(f'Figure2/CD45RA_protein_expression_UMAP_all_cells_kathas_color.pdf', dpi=600)

#PTPRC - specific cells
ax = sc.pl.umap(mdata["gex"], size=30, cmap=cust_katha, show=False)
sc.pl.umap(mdata["gex"][(mdata['gex'].obs['specific_new']=='NS4B214')], color='PTPRC',
           size=30, cmap=cust_katha, show=False, ax = ax)
plt.savefig(f'Figure2/PTPRC_RNA_expression_UMAP_specific_cells_kathas_color.pdf', dpi=600)

#PTPRC - all cells
sc.pl.umap(mdata["gex"], color='PTPRC',
           size=30, cmap=cust_katha, show=False)
plt.savefig(f'Figure2/PTPRC_RNA_expression_UMAP_all_cells_kathas_color.pdf', dpi=600)


#CD95 - specific cells
ax = sc.pl.umap(mdata["gex"], size=30, cmap=cust_katha, show=False)
sc.pl.umap(mdata["gex"][(mdata['gex'].obs['specific_new']=='NS4B214')], color='clr_CD95',
           size=30, cmap=cust_katha, show=False, ax = ax)
plt.savefig(f'Figure2/CD95_protein_expression_UMAP_specific_cells_kathas_color.pdf', dpi=600)

#CD95 - all cells
sc.pl.umap(mdata["gex"], color='clr_CD95',
           size=30, cmap=cust_katha, show=False)
plt.savefig(f'Figure2/CD95_protein_expression_UMAP_all_cells_kathas_color.pdf', dpi=600)

#FAS - specific cells
ax = sc.pl.umap(mdata["gex"], size=30, cmap=cust_katha, show=False)
sc.pl.umap(mdata["gex"][(mdata['gex'].obs['specific_new']=='NS4B214')], color='FAS',
           size=30, cmap=cust_katha, show=False, ax = ax)
plt.savefig(f'Figure2/FAS_RNA_expression_UMAP_specific_cells_kathas_color.pdf', dpi=600)

#FAS - all cells
sc.pl.umap(mdata["gex"], color='FAS',
           size=30, cmap=cust_katha, show=False)
plt.savefig(f'Figure2/FAS_RNA_expression_UMAP_all_cells_kathas_color.pdf', dpi=600)


#CD62L - specific cells
ax = sc.pl.umap(mdata["gex"], size=30, cmap=cust_katha, show=False)
sc.pl.umap(mdata["gex"][(mdata['gex'].obs['specific_new']=='NS4B214')], color='clr_CD62L',
           size=30, cmap=cust_katha, show=False, ax = ax)
plt.savefig(f'Figure2/CD62L_protein_expression_UMAP_specific_cells_kathas_color.pdf', dpi=600)

#CD62L - all cells
sc.pl.umap(mdata["gex"], color='clr_CD62L',
           size=30, cmap=cust_katha, show=False)
plt.savefig(f'Figure2/CD62L_protein_expression_UMAP_all_cells_kathas_color.pdf', dpi=600)

#SELL - specific cells
ax = sc.pl.umap(mdata["gex"], size=30, cmap=cust_katha, show=False)
sc.pl.umap(mdata["gex"][(mdata['gex'].obs['specific_new']=='NS4B214')], color='SELL',
           size=30, cmap=cust_katha, show=False, ax = ax)
plt.savefig(f'Figure2/SELL_RNA_expression_UMAP_specific_cells_kathas_color.pdf', dpi=600)

#SELL - all cells
sc.pl.umap(mdata["gex"], color='SELL',
           size=30, cmap=cust_katha, show=False)
plt.savefig(f'Figure2/SELL_RNA_expression_UMAP_all_cells_kathas_color.pdf', dpi=600)

### Ki67 UMAPs & Quantifizierung

In [None]:
ax = sc.pl.umap(mdata["gex"], size=30, cmap=cust, show=False)
sc.pl.umap(mdata["gex"][(mdata['gex'].obs['specific_new']=='NS4B214') & mdata['gex'].obs['time'].isin(['d7', 'd11', 'd14'])], color='MKI67',
           size=30, cmap=cust_katha, show=False, ax = ax)
plt.savefig(f'Figure2/MKI67_expression_UMAP_d7-14_kathas_color.pdf', dpi=600)

In [None]:
ax = sc.pl.umap(mdata["gex"], size=30, cmap=cust, show=False)
sc.pl.umap(mdata["gex"][(mdata['gex'].obs['specific_new']=='NS4B214') & mdata['gex'].obs['time'].isin(['d90', 'd365', 'dx'])], color='MKI67',
           size=30, cmap=cust_katha, show=False, ax = ax)
plt.savefig(f'Figure2/MKI67_expression_UMAP_d90-x_kathas_color.pdf', dpi=600)

In [None]:
ax = sc.pl.umap(mdata["gex"], size=30, cmap=cust, show=False)
sc.pl.umap(mdata["gex"][(mdata['gex'].obs['specific_new']=='NS4B214')], color='MKI67',
           size=30, cmap=cust_katha, show=False, ax = ax)
plt.savefig(f'Figure2/MKI67_expression_UMAP_specific_kathas_color.pdf', dpi=600)

In [None]:
ax = sc.pl.umap(mdata["gex"], size=30, cmap=cust, show=False)
sc.pl.umap(mdata["gex"], color='MKI67',
           size=30, cmap=cust_katha, show=False, ax = ax)
plt.savefig(f'Figure2/MKI67_expression_UMAP_all_kathas_color.pdf', dpi=600)

In [None]:
mdata['gex'].obs['MKI67'] = mdata['gex'][:, 'MKI67'].X.toarray().flatten()

In [None]:
table = pd.DataFrame(mdata["gex"].obs[['time', 'leiden', 'donor', 'FACS_Phenotype_v3', 'specific_new', 'MKI67']])

In [None]:
table.to_csv('Figure2/MKI67_expression.csv')

### Metabolism etc --> Notebook 8 (due to own environment)

## Supplement

### Table

In [None]:
table = pd.DataFrame(mdata["gex"].obs.groupby(['time', 'donor', 'leiden'])['specific_new'].value_counts().reset_index())

In [None]:
table = table[(table['specific_new']=='NS4B214') & (table['count']!=0)]

In [None]:
table['percent_cluster_per_donor']=0
for i in table.index:
    table['percent_cluster_per_donor'][i] = table['count'][i]*100/sum(table['count'][(table['time']==table['time'][i]) & (table['donor']==table['donor'][i])])

In [None]:
table.to_csv('specific_cells_percent_leiden_per_donor.csv')

### Strip plots
Strip plots für ene Auswahl an respräsentativen TCRs (auf jeden Fall 7, 22, 67, 489, 1256, 1807)

In [None]:
pal2 = {'NS4B214' : 'green',
        'NS4B214_c' : 'lightgreen',
        'NS2B117' : 'black',
        'NS3286' : 'black',
        'NS3293' : 'black',
        'NS2A97' : 'black',
        'COV' : 'black',
        'EBV1': 'black',
        'EBV2' : 'black',
        'FLU' : 'black',
        'HHV' : 'black',
        'no_binding': 'purple'}

In [None]:
sb.set(rc={'figure.figsize':(27,5)})
sb.set_style("whitegrid")

clones = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 
          11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
          21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
          67, 489, 1256, 1807]
old = mdata['gex'].obs[mdata['gex'].obs['clone_id'].isin(clones)]

sb.stripplot(data=old, 
             x=old['clone_id'], 
             y=old['NS4B214'], 
             hue=old['specific_new'], 
                 palette=pal2, legend=False, jitter=True)
plt.figsize=(150,10)
plt.savefig(f'/Users/mimi/Sina/5_Visualisations/Supplement/NS4B214_clones_0-30_plus_tested.pdf', dpi=300)
plt.show()

### Specific cells over time per epitope

In [None]:
for epitope in (mdata['gex'].uns['epitopes']):
    ax = sc.pl.umap(mdata["gex"], show=False, size=30)
    sc.pl.umap(mdata["gex"][(mdata["gex"].obs['specific_new']==epitope)], 
                       color='time', ax = ax, show=False, size=30)
    plt.title(epitope)
    plt.tight_layout()
    plt.savefig(f'Supplement/Specific_cells_over_time_{epitope}.pdf', dpi=300)
    plt.figsize=(10,10)
    plt.show()

## CCR7 in CD62L gated subsets

In [None]:
#CCR7 amount of cell subsets over time, gated according to CD62L, boxplots to see potentially naive-like cells
data_tmp = mdata["gex"].obs[mdata["gex"].obs['specific_new'].isin(['NS4B214', 'no_binding'])][['FACS_Phenotype_v3', 'clr_CCR7-1', 'specific_new']]
custom_categories = ['NS4B214', 'no_binding']
data_tmp['specific_new'] = data_tmp['specific_new'].astype(pd.CategoricalDtype(categories=custom_categories))

plt.figure(figsize=(8, 6))

sb.boxplot(data = data_tmp, 
           x = 'FACS_Phenotype_v3', y = 'clr_CCR7-1', 
           hue = 'specific_new', dodge = True, order=['N', 'SCM', 'CM', 'EM', 'EF'])
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.xlabel('')
plt.ylabel('CCR7')
plt.title('CCR7 in CD62L gated populations')
plt.savefig(f'/media/agschober/HDD12/3_scRNA-Seq_Sina/5_Visualisations/Figures_Paper/CCR7_in_CD62L_gated_populations.pdf', dpi=600)

In [None]:
#CD62L amount of cell subsets over time, gated according to CD62L, boxplots to see potentially naive-like cells
data_tmp = mdata["gex"].obs[mdata["gex"].obs['specific_new'].isin(['NS4B214', 'no_binding'])][['FACS_Phenotype_v3', 'clr_CD62L', 'specific_new']]
custom_categories = ['NS4B214', 'no_binding']
data_tmp['specific_new'] = data_tmp['specific_new'].astype(pd.CategoricalDtype(categories=custom_categories))

plt.figure(figsize=(8, 6))

sb.boxplot(data = data_tmp, 
           x = 'FACS_Phenotype_v3', y = 'clr_CD62L', 
           hue = 'specific_new', dodge = True, order=['N', 'SCM', 'CM', 'EM', 'EF'])
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.xlabel('')
plt.ylabel('CD62L')
plt.title('CD62L in CD62L gated populations')
plt.savefig(f'/media/agschober/HDD12/3_scRNA-Seq_Sina/5_Visualisations/Figures_Paper/CD62L_in_CD62L_gated_populations.pdf', dpi=600)

### Gates with CCR7

In [None]:
#kdeplot of CD45RA vs CD62L, color CCR7
sb.scatterplot(data=mdata["gex"].obs, x=mdata["gex"].obs['clr_CD62L'], y=mdata["gex"].obs['clr_CD45RA'],
                 alpha=0.5, hue=mdata["gex"].obs['clr_CCR7-1'], palette='Spectral_r', legend=False)
sb.kdeplot(data=mdata["gex"].obs, x=mdata["gex"].obs['clr_CD62L'], y=mdata["gex"].obs['clr_CD45RA'], 
               legend=False, fill=False, color='grey', levels=10, bw_adjust=0.6)

a = (len(mdata["gex"].obs[(mdata["gex"].obs['clr_CD45RA'] > 1.3) & (mdata["gex"].obs['clr_CD62L'] <=1.6)])/len(mdata["gex"].obs))*100
b = (len(mdata["gex"].obs[(mdata["gex"].obs['clr_CD45RA'] > 1.3) & (mdata["gex"].obs['clr_CD62L'] > 1.6)])/len(mdata["gex"].obs))*100
c = (len(mdata["gex"].obs[(mdata["gex"].obs['clr_CD45RA'] <=1.3) & (mdata["gex"].obs['clr_CD62L'] > 1.6)])/len(mdata["gex"].obs))*100
d = (len(mdata["gex"].obs[(mdata["gex"].obs['clr_CD45RA'] <=1.3) & (mdata["gex"].obs['clr_CD62L'] <=1.6)])/len(mdata["gex"].obs))*100

plt.axvline(x=1.6, color='black', linestyle='dashed')
plt.axhline(y=1.3, color='black', linestyle='dashed')
plt.xlabel('CD62L')
plt.ylabel('CD45RA')
plt.xlim(right=4)
plt.ylim(-0.4,3)

plt.text(-0.25, 2.75, f'{a:.2f}%', bbox=dict(facecolor='white', alpha=0, edgecolor='black'))
plt.text(2.8, 2.75, f'{b:.2f}%', bbox=dict(facecolor='white', alpha=0, edgecolor='black'))
plt.text(2.8, -0.35, f'{c:.2f}%', bbox=dict(facecolor='white', alpha=0, edgecolor='black'))
plt.text(-0.25, -0.35, f'{d:.2f}%', bbox=dict(facecolor='white', alpha=0, edgecolor='black'))

plt.savefig(f'Gating_strategy_CD45RA_CD62L_version_3_hue_CCR7.pdf', dpi=300)

In [None]:
#kdeplot of CD45RA vs CCR7
sb.scatterplot(data=mdata["gex"].obs, x=mdata["gex"].obs['clr_CCR7-1'], y=mdata["gex"].obs['clr_CD45RA'],
                 alpha=0.5, hue=mdata["gex"].obs['clr_CCR7-1'], palette='Spectral_r', legend=False)
sb.kdeplot(data=mdata["gex"].obs, x=mdata["gex"].obs['clr_CCR7-1'], y=mdata["gex"].obs['clr_CD45RA'], 
               legend=False, fill=False, color='grey', levels=10, bw_adjust=0.6)

#plt.axvline(x=1.6, color='black', linestyle='dashed')
plt.axhline(y=1.3, color='black', linestyle='dashed')
plt.xlabel('CCR7')
plt.ylabel('CD45RA')
plt.xlim(right=4)
plt.ylim(-0.4,3)

plt.savefig(f'Gating_strategy_CD45RA_CCR7_version_3_hue_CCR7.pdf', dpi=300)

In [None]:
#Histograms for CCR7 after pregating on double positive
#pregating
adata = mdata['gex'][(mdata['gex'].obs['clr_CD45RA']>1.3)&(mdata['gex'].obs['clr_CD62L']>1.6)]
sb.kdeplot(data=adata.obs, 
            x=adata.obs['clr_CCR7-1'], bw_adjust=0.2, fill=True, color='#d4d4d4')
#plt.axvline(x=1, color='black', linestyle='dashed')
plt.savefig(f'Gating_strategy_CCR7_histogram_version_3_pregated.pdf', dpi=300)
plt.show()

In [None]:
#Histograms for CCR7 without pregating on double positive
#pregating
adata = mdata['gex']
sb.kdeplot(data=adata.obs, 
            x=adata.obs['clr_CCR7-1'], bw_adjust=0.2, fill=True, color='#d4d4d4')
#plt.axvline(x=1, color='black', linestyle='dashed')
plt.savefig(f'Gating_strategy_CCR7_histogram_version_3_not_pregated.pdf', dpi=300)
plt.show()