# Analysis Part IV - CiteSeq annotations

In [None]:
%load_ext autoreload
%matplotlib inline

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings(action='ignore')
import os
import scanpy as sc
import scirpy as ir
import anndata as ann
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mudata import MuData
import mudata

import tarfile
import warnings
from glob import glob

import anndata
import matplotlib.pyplot as plt
import muon as mu
import pandas as pd
import scanpy as sc
import scirpy as ir

%autoreload 2
import sys
sys.path.append('..')
import utility.annotation as utils_annotation
import utility.representation as utils_representation
import utility.visualisation as utils_vis

from sklearn.mixture import GaussianMixture
from matplotlib.colors import LinearSegmentedColormap

In [None]:
sc.settings.set_figure_params(dpi=150)
sc.settings.verbosity = 3
sc.set_figure_params(vector_friendly=True, color_map='viridis', transparent=True)
sb.set_style('whitegrid')

colors = ['darkblue', 'blue', 'lightblue', '#FFEA00', 'orange', 'red', 'darkred']
cust = LinearSegmentedColormap.from_list('custom_cmap', colors)
colormap = cust

## Get input data

In [None]:
mdata = mu.read('/Users/mimi/Sina/data_specificity_annotated_final_pseudotime_cite.h5mu')

In [None]:
## Exclude false positive cells from B15 and B20 for NS4B214!!! 
mdata["gex"].obs['specific_new'][(mdata['gex'].obs['donor'].isin(['B15', 'B20']))&(mdata['gex'].obs['specific_new'].isin(['NS4B214']))] = 'no_binding'

## Cite Seq

In [None]:
def clr(x):
    x = x/np.exp(np.log1p(x).sum() / x.shape[0])
    x = np.log1p(x)
    return x

In [None]:
custom_cite_ids = ['CD45RA', 'CCR7-1', 'CD95', 'CD62L', 'CXCR3-1', 'CD27']

In [None]:
mdata.uns['cite_ids'] = custom_cite_ids

In [None]:
for c in custom_cite_ids:
    mdata["gex"].obs.loc[~mdata["gex"].obs[c].isna(), f'clr_{c}'] = clr(mdata["gex"][~mdata["gex"].obs[c].isna()].obs[c].values)

In [None]:
mdata_cite = ann.AnnData(X=mdata["gex"].obs[[f'clr_{el}' for el in custom_cite_ids]
                                    ].values, obs=mdata["gex"].obs[['leiden'] + custom_cite_ids])
mdata_cite.var_names = custom_cite_ids
mdata_cite = mdata_cite[~mdata_cite.obs[custom_cite_ids[0]].isna()]

sc.tl.rank_genes_groups(mdata_cite, groupby='leiden', n_genes=20)
sc.pl.rank_genes_groups(mdata_cite, groubpy='leiden', show=False)
plt.tight_layout()
plt.show()
mdata.uns['rank_genes_groups_leiden_cite'] = mdata_cite.uns['rank_genes_groups']

### 'Gating' using the Cite-Seq signals

In [None]:
#kdeplot of CD45RA vs CD62L
#sb.scatterplot(data=mdata["gex"].obs, x=mdata["gex"].obs['clr_CD62L'], y=mdata["gex"].obs['clr_CD45RA'],
                # alpha=0.1, hue=mdata["gex"].obs['clr_CD95'], palette='Spectral_r', legend=False)
sb.kdeplot(data=mdata["gex"].obs, x=mdata["gex"].obs['clr_CD62L'], y=mdata["gex"].obs['clr_CD45RA'], 
               legend=False, fill=False, color='grey', levels=30, bw_adjust=0.6)

a = (len(mdata["gex"].obs[(mdata["gex"].obs['clr_CD45RA'] > 1.3) & (mdata["gex"].obs['clr_CD62L'] <=2)])/len(mdata["gex"].obs))*100
b = (len(mdata["gex"].obs[(mdata["gex"].obs['clr_CD45RA'] > 1.3) & (mdata["gex"].obs['clr_CD62L'] > 2)])/len(mdata["gex"].obs))*100
c = (len(mdata["gex"].obs[(mdata["gex"].obs['clr_CD45RA'] <=1.3) & (mdata["gex"].obs['clr_CD62L'] > 2)])/len(mdata["gex"].obs))*100
d = (len(mdata["gex"].obs[(mdata["gex"].obs['clr_CD45RA'] <=1.3) & (mdata["gex"].obs['clr_CD62L'] <=2)])/len(mdata["gex"].obs))*100

plt.axvline(x=2, color='black', linestyle='dashed')
plt.axhline(y=1.3, color='black', linestyle='dashed')
plt.xlabel('CD62L')
plt.ylabel('CD45RA')
plt.xlim(right=4)
plt.ylim(-0.4,3)

plt.text(-0.25, 2.75, f'{a:.2f}%', bbox=dict(facecolor='white', alpha=0, edgecolor='black'))
plt.text(2.8, 2.75, f'{b:.2f}%', bbox=dict(facecolor='white', alpha=0, edgecolor='black'))
plt.text(2.8, -0.35, f'{c:.2f}%', bbox=dict(facecolor='white', alpha=0, edgecolor='black'))
plt.text(-0.25, -0.35, f'{d:.2f}%', bbox=dict(facecolor='white', alpha=0, edgecolor='black'))

plt.savefig(f'Gating_strategy_CD45RA_CD62L_version_4_contour.png', dpi=600)

In [None]:
#kdeplot of CD95 vs CD62L - all cells
sb.scatterplot(data=mdata["gex"].obs, 
               x=mdata["gex"].obs['clr_CD62L'], 
               y=mdata["gex"].obs['clr_CD95'],
                 alpha=0.2, hue=mdata["gex"].obs['clr_CD45RA'], 
               palette='Spectral_r', legend=False)
sb.kdeplot(data=mdata["gex"].obs, 
           x=mdata["gex"].obs['clr_CD62L'], 
           y=mdata["gex"].obs['clr_CD95'], 
               legend=False, fill=False, color='grey', levels=20, bw_adjust=0.6)

plt.axvline(x=2, color='black', linestyle='dashed')
plt.axhline(y=1, color='black', linestyle='dashed')
plt.xlabel('CD62L')
plt.ylabel('CD95')
plt.xlim(right=4)
plt.ylim(-0.4,3)
plt.savefig(f'Gating_strategy_CD95_CD62L_version_4.png', dpi=600)

In [None]:
#Histograms for CD95
#pregating
adata = mdata['gex'][(mdata['gex'].obs['clr_CD45RA']>1)&(mdata['gex'].obs['clr_CD62L']>1.6)]
sb.kdeplot(data=adata.obs, 
            x=adata.obs['clr_CD95'], bw_adjust=0.2, fill=True, color='#d4d4d4')
plt.axvline(x=0.5, color='black', linestyle='dashed')
plt.savefig(f'Gating_strategy_CD95_histogram_version_1_N.png', dpi=600)
plt.show()

In [None]:
sc.pl.umap(mdata['gex'], color=['clr_CD27'], size=50, show=False,
                 cmap=cust, vmax=2, vmin=0)
plt.tight_layout()
plt.savefig(f'UMAP_CD27.pdf', dpi=600)

In [None]:
sc.pl.umap(mdata['gex'], color=['clr_CXCR3-1'], size=50, show=False,
                 cmap=cust, vmax=2, vmin=0)
plt.tight_layout()
plt.savefig(f'UMAP_CXCR3.pdf', dpi=600)

In [None]:
sc.pl.umap(mdata['gex'], color=['clr_CCR7-1'], size=50, show=False,
                 cmap=cust, vmax=2, vmin=0)
plt.tight_layout()
plt.savefig(f'UMAP_CCR7.pdf', dpi=600)

In [None]:
sc.pl.umap(mdata['gex'], color=['clr_CD95'], size=50, show=False,
                 cmap=cust, vmax=2, vmin=0)
plt.tight_layout()
plt.savefig(f'UMAP_CD95.pdf', dpi=600)

In [None]:
sc.pl.umap(mdata['gex'], color=['clr_CD45RA'], size=50, show=False,
                 cmap=cust, vmax=2, vmin=0)
plt.tight_layout()
plt.savefig(f'UMAP_CD45RA.pdf', dpi=600)

In [None]:
sc.pl.umap(mdata['gex'], color=['clr_CD62L'], size=50, show=False,
                 cmap=cust, vmax=2, vmin=0)
plt.tight_layout()
plt.savefig(f'UMAP_CD62L.pdf', dpi=600)

In [None]:
#add definitions to mdata - version 1
mdata["gex"].obs['CD45RA+v1'] = mdata["gex"].obs['clr_CD45RA'].apply(lambda x: 1 if x>=1 else 0)
mdata["gex"].obs['CD62L+v1'] = mdata["gex"].obs['clr_CD62L'].apply(lambda x: 1 if x>=1.6 else 0)
mdata["gex"].obs['CD95+v1'] = mdata["gex"].obs['clr_CD95'].apply(lambda x: 1 if x>=0.5 else 0)

#add definitions to mdata - version 2
mdata["gex"].obs['CD45RA+v2'] = mdata["gex"].obs['clr_CD45RA'].apply(lambda x: 1 if x>=1 else 0)
mdata["gex"].obs['CD62L+v2'] = mdata["gex"].obs['clr_CD62L'].apply(lambda x: 1 if x>=1.6 else 0)
mdata["gex"].obs['CD95+v2'] = mdata["gex"].obs['clr_CD95'].apply(lambda x: 1 if x>=1 else 0)

#add definitions to mdata - version 3
mdata["gex"].obs['CD45RA+v3'] = mdata["gex"].obs['clr_CD45RA'].apply(lambda x: 1 if x>=1.3 else 0)
mdata["gex"].obs['CD62L+v3'] = mdata["gex"].obs['clr_CD62L'].apply(lambda x: 1 if x>=1.6 else 0)
mdata["gex"].obs['CD95+v3'] = mdata["gex"].obs['clr_CD95'].apply(lambda x: 1 if x>=1 else 0)

#add definitions to mdata - version 4
mdata["gex"].obs['CD45RA+v4'] = mdata["gex"].obs['clr_CD45RA'].apply(lambda x: 1 if x>=1.3 else 0)
mdata["gex"].obs['CD62L+v4'] = mdata["gex"].obs['clr_CD62L'].apply(lambda x: 1 if x>=2 else 0)
mdata["gex"].obs['CD95+v4'] = mdata["gex"].obs['clr_CD95'].apply(lambda x: 1 if x>=1 else 0)

In [None]:
#add FACS Phenotype column
mdata["gex"].obs['FACS_Phenotype_v1'] = 'none'
mdata["gex"].obs['FACS_Phenotype_v2'] = 'none'
mdata["gex"].obs['FACS_Phenotype_v3'] = 'none'
mdata["gex"].obs['FACS_Phenotype_v4'] = 'none'

for i in range(len(mdata["gex"].obs)):
    for j in ['v1', 'v2', 'v3', 'v4']:
        if ((mdata["gex"].obs['CD45RA+'+j][i]==1) & (mdata["gex"].obs['CD62L+'+j][i]==0)):
            mdata["gex"].obs['FACS_Phenotype_'+j][i] = 'EF'
        else:
            if ((mdata["gex"].obs['CD45RA+'+j][i]==0) & (mdata["gex"].obs['CD62L+'+j][i]==0)):
                mdata["gex"].obs['FACS_Phenotype_'+j][i] = 'EM'
            else:
                if ((mdata["gex"].obs['CD45RA+'+j][i]==0) & (mdata["gex"].obs['CD62L+'+j][i]==1)):
                    mdata["gex"].obs['FACS_Phenotype_'+j][i] = 'CM'
                else:
                    if ((mdata["gex"].obs['CD45RA+'+j][i]==1) & (mdata["gex"].obs['CD62L+'+j][i]==1) & (mdata["gex"].obs['CD95+'+j][i]==1)):
                        mdata["gex"].obs['FACS_Phenotype_'+j][i] = 'SCM'
                    else:
                        if ((mdata["gex"].obs['CD45RA+'+j][i]==1) & (mdata["gex"].obs['CD62L+'+j][i]==1) & (mdata["gex"].obs['CD95+'+j][i]==0)):
                            mdata["gex"].obs['FACS_Phenotype_'+j][i] = 'N'
                        else:
                            mdata["gex"].obs['FACS_Phenotype_'+j][i] = 'NONE'

In [None]:
for j in ['v1', 'v2', 'v3', 'v4']:
    ax = sc.pl.umap(mdata["gex"], show=False, size=30)
    ax = sc.pl.umap(mdata["gex"][mdata["gex"].obs['CD45RA+'+j]==1], color='CD95+'+j, ax=ax, show=False, size=30, 
                cmap=cust)
    plt.title('CD45RA+ colored by CD95 '+str(j))
    plt.savefig(f'UMAP_CD45RA_colored_by_CD95_{j}.png', dpi=600)
    plt.show()

In [None]:
for j in ['v1', 'v2', 'v3', 'v4']:
    ax = sc.pl.umap(mdata["gex"], show=False, size=30)
    ax = sc.pl.umap(mdata["gex"][mdata["gex"].obs['CD45RA+'+j]==1], color='CD62L+'+j, ax=ax, show=False, size=30, 
                cmap=cust)
    plt.title('CD45RA+ colored by CD62L '+str(j))
    plt.savefig(f'UMAP_CD45RA_colored_by_CD62L_{j}.png', dpi=600)
    plt.show()

In [None]:
for j in ['v1', 'v2', 'v3', 'v4']:
    ax = sc.pl.umap(mdata["gex"], show=False, size=30)
    ax = sc.pl.umap(mdata["gex"][mdata["gex"].obs['CD62L+'+j]==1], color='CD45RA+'+j, ax=ax, show=False, size=30, 
                cmap=cust)
    plt.title('CD62L+ colored by CD45RA '+str(j))
    plt.savefig(f'UMAP_CD62L_colored_by_CD45RA_{j}.png', dpi=600)
    plt.show()

In [None]:
for j in ['v1', 'v2', 'v3', 'v4']:
    ax = sc.pl.umap(mdata["gex"], show=False, size=30)
    ax = sc.pl.umap(mdata["gex"][mdata["gex"].obs['CD95+'+j]==1], color='CD45RA+'+j, ax=ax, show=False, size=30, 
                cmap=cust)
    plt.title('CD95+ colored by CD45RA '+str(j))
    plt.savefig(f'UMAP_CD95_colored_by_CD45RA_{j}.png', dpi=600)
    plt.show()

In [None]:
rcParams['figure.figsize'] = (5, 5)
sc.pl.umap(mdata["gex"], color=['clr_CD45RA', 'CD45RA+v1', 'CD45RA+v2', 'CD45RA+v3', 'CD45RA+v4', 
                                'clr_CD62L', 'CD62L+v1', 'CD62L+v2', 'CD62L+v3', 'CD62L+v4',
                                'clr_CD95', 'CD95+v1', 'CD95+v2', 'CD95+v3', 'CD95+v4'],
           show=False, color_map=cust, size=30, ncols=5)
plt.tight_layout()
plt.savefig(f'UMAP_comparison_v1-4.png', dpi=600)
plt.show()

### Histograms stacked per Leiden Cluster

In [None]:
sb.histplot(data=mdata["gex"].obs, 
            x=mdata["gex"].obs['clr_CD62L'], 
            hue=mdata["gex"].obs['leiden'], multiple='stack', legend=True)
plt.xlim(right=4)
plt.ylabel('number of cells')
plt.xlabel('CD62L')
plt.axvline(x=2, color='black', linestyle='dashed')
plt.tight_layout()
plt.savefig(f'Histogram_stacked_leiden_CD62L_version_4.png', dpi=600)
plt.show()

In [None]:
sb.histplot(data=mdata["gex"].obs, 
            x=mdata["gex"].obs['clr_CD45RA'], 
            hue=mdata["gex"].obs['leiden'], multiple="stack", legend=True)
plt.xlim(right=3)
plt.ylabel('number of cells')
plt.xlabel('CD45RA')
plt.axvline(x=1, color='black', linestyle='dashed')
plt.tight_layout()
plt.savefig(f'Histogram_stacked_leiden_CD45RA_version_1.png', dpi=600)
plt.show()

In [None]:
sb.histplot(data=mdata["gex"].obs, 
            x=mdata["gex"].obs['clr_CD95'], 
            hue=mdata["gex"].obs['leiden'], multiple="stack", legend=True)
plt.xlim(right=2.5)
plt.ylabel('number of cells')
plt.xlabel('CD95')
plt.axvline(x=0.5, color='black', linestyle='dashed')
plt.tight_layout()
plt.savefig(f'Histogram_stacked_leiden_CD95_version_1.png', dpi=600)
plt.show()

### Correlation Protein / RNA

In [None]:
sc.pl.scatter(mdata["gex"], x='clr_CD62L', y='SELL')
sc.pl.scatter(mdata["gex"], x='clr_CD45RA', y='PTPRC')
sc.pl.scatter(mdata["gex"], x='clr_CD95', y='FAS')

In [None]:
sc.pl.umap(mdata["gex"], color=['clr_CD45RA', 'PTPRC', 
                                'clr_CD62L', 'SELL',
                                'clr_CD95', 'FAS'], show=False, color_map=cust, size=30, ncols=2)
plt.tight_layout()
plt.savefig(f'UMAP_comparison_protein_rna.png', dpi=600)
plt.show()

## Subset distribution

### UMAP all cells

In [None]:
#Plot UMAP of FACS Phenotypes for all cells in comparison to leiden
pal = {'N': '#d4d4d4',
      'SCM': '#90bff9',
      'CM': '#0e4d92',
      'EM': '#21918c',
      'EF': '#00c000'}

rcParams['figure.figsize'] = (6, 6)
sc.pl.umap(mdata["gex"], color=['leiden'], show=False, size=30)
plt.tight_layout()
plt.savefig('/media/agschober/HDD12/3_scRNA-Seq_Sina/4_Cite-Seq/UMAPs_leiden.png', dpi=600)
plt.show()

rcParams['figure.figsize'] = (6, 6)
sc.pl.umap(mdata["gex"], color=['FACS_Phenotype_v1'], show=False, size=30, palette=pal)
plt.tight_layout()
plt.savefig('/media/agschober/HDD12/3_scRNA-Seq_Sina/4_Cite-Seq/Gating_strategy_v1-4/UMAP_FACS_Phenotype_v1.png', dpi=600)
plt.show()

rcParams['figure.figsize'] = (6, 6)
sc.pl.umap(mdata["gex"], color=['FACS_Phenotype_v2'], show=False, size=30, palette=pal)
plt.tight_layout()
plt.savefig('/media/agschober/HDD12/3_scRNA-Seq_Sina/4_Cite-Seq/Gating_strategy_v1-4/UMAP_FACS_Phenotype_v2.png', dpi=600)
plt.show()

rcParams['figure.figsize'] = (6, 6)
sc.pl.umap(mdata["gex"], color=['FACS_Phenotype_v3'], show=False, size=30, palette=pal)
plt.tight_layout()
plt.savefig('/media/agschober/HDD12/3_scRNA-Seq_Sina/4_Cite-Seq/Gating_strategy_v1-4/UMAP_FACS_Phenotype_v3.png', dpi=600)
plt.show()

rcParams['figure.figsize'] = (6, 6)
sc.pl.umap(mdata["gex"], color=['FACS_Phenotype_v4'], show=False, size=30, palette=pal)
plt.tight_layout()
plt.savefig('/media/agschober/HDD12/3_scRNA-Seq_Sina/4_Cite-Seq/Gating_strategy_v1-4/UMAP_FACS_Phenotype_v4.png', dpi=600)
plt.show()

In [None]:
# violin plots
specific_order = ['N', 'SCM', 'CM', 'EM', 'EF']

mdata["gex"].obs['FACS_Phenotype_v1'] = mdata["gex"].obs['FACS_Phenotype_v1'].cat.reorder_categories(['N', 'SCM', 'CM', 'EM', 'EF'], ordered=True)
mdata["gex"].obs['FACS_Phenotype_v2'] = mdata["gex"].obs['FACS_Phenotype_v2'].cat.reorder_categories(['N', 'SCM', 'CM', 'EM', 'EF'], ordered=True)
mdata["gex"].obs['FACS_Phenotype_v3'] = mdata["gex"].obs['FACS_Phenotype_v3'].cat.reorder_categories(['N', 'SCM', 'CM', 'EM', 'EF'], ordered=True)
mdata["gex"].obs['FACS_Phenotype_v4'] = mdata["gex"].obs['FACS_Phenotype_v4'].cat.reorder_categories(['N', 'SCM', 'CM', 'EM', 'EF'], ordered=True)

rcParams['figure.figsize'] = (5, 5)
sc.pl.violin(mdata["gex"], keys=['FACS_Phenotype_v1'], groupby='leiden')
plt.tight_layout()
plt.savefig('/media/agschober/HDD12/3_scRNA-Seq_Sina/4_Cite-Seq/Gating_strategy_v1-4/Violin_FACS_Phenotype_v1.png', dpi=600)
plt.show()

sc.pl.violin(mdata["gex"], keys=['FACS_Phenotype_v2'], groupby='leiden')
plt.tight_layout()
plt.savefig('/media/agschober/HDD12/3_scRNA-Seq_Sina/4_Cite-Seq/Gating_strategy_v1-4/Violin_FACS_Phenotype_v2.png', dpi=600)
plt.show()

sc.pl.violin(mdata["gex"], keys=['FACS_Phenotype_v3'], groupby='leiden')
plt.tight_layout()
plt.savefig('/media/agschober/HDD12/3_scRNA-Seq_Sina/4_Cite-Seq/Gating_strategy_v1-4/Violin_FACS_Phenotype_v3.png', dpi=600)
plt.show()

sc.pl.violin(mdata["gex"], keys=['FACS_Phenotype_v4'], groupby='leiden')
plt.tight_layout()
plt.savefig('/media/agschober/HDD12/3_scRNA-Seq_Sina/4_Cite-Seq/Gating_strategy_v1-4/Violin_FACS_Phenotype_v4.png', dpi=600)
plt.show()

### UMAP specific cells over time

In [None]:
#plot specific cells annotated with FACS phenotypes over time
pal = {'N': 'black',
      'SCM': '#90bff9',
      'CM': '#0e4d92',
      'EM': '#21918c',
      'EF': '#00c000'}

for j in ['v1', 'v2', 'v3', 'v4']:
    for time in (mdata['gex'].obs['time']).unique():
        for ep in ['NS4B214']:
            ax = sc.pl.umap(mdata["gex"], show=False, size=30)
            sc.pl.umap(mdata["gex"][(mdata["gex"].obs['time']==time)&((mdata["gex"].obs['specific_new']=='NS4B214'))], 
                           color='FACS_Phenotype_'+j, ax = ax, show=False, size=60, palette=pal)
            plt.title(str(time)+str(j))
            plt.tight_layout()
            plt.savefig(f'/media/agschober/HDD12/3_scRNA-Seq_Sina/4_Cite-Seq/Gating_strategy_v1-4/Specific_cells_over_time_{time}_colored_by_FACS_{j}.png')
            plt.figsize=(10,10)
            plt.show()

In [None]:
#single umaps per FACS phenotype

pal = {'N': 'black',
      'SCM': '#90bff9',
      'CM': '#0e4d92',
      'EM': '#21918c',
      'EF': '#00c000'}

for j in ['v1', 'v2', 'v3', 'v4']:
    ax = sc.pl.umap(mdata["gex"], show=False, size=30)
    ax = sc.pl.umap(mdata["gex"][mdata["gex"].obs['FACS_Phenotype_'+j]=='N'], ax=ax, show=False, size=30, 
                    color='FACS_Phenotype_'+j, palette=pal)
    plt.title('N '+j)
    plt.savefig(f'Naive_FACS_Phenotype_UMAP_{j}.png', dpi=600)
    plt.show()

    ax = sc.pl.umap(mdata["gex"], show=False, size=30)
    ax = sc.pl.umap(mdata["gex"][mdata["gex"].obs['FACS_Phenotype_'+j]=='SCM'], ax=ax, show=False, size=30, 
                    color='FACS_Phenotype_'+j, palette=pal)
    plt.title('SCM '+j)
    plt.savefig(f'SCM_FACS_Phenotype_UMAP_{j}.png', dpi=600)
    plt.show()

    ax = sc.pl.umap(mdata["gex"], show=False, size=30)
    ax = sc.pl.umap(mdata["gex"][mdata["gex"].obs['FACS_Phenotype_'+j]=='CM'], ax=ax, show=False, size=30, 
                    color='FACS_Phenotype_'+j, palette=pal)
    plt.title('CM '+j)
    plt.savefig(f'CM_FACS_Phenotype_UMAP_{j}.png', dpi=600)
    plt.show()
    
    ax = sc.pl.umap(mdata["gex"], show=False, size=30)
    ax = sc.pl.umap(mdata["gex"][mdata["gex"].obs['FACS_Phenotype_'+j]=='EM'], ax=ax, show=False, size=30, 
                    color='FACS_Phenotype_'+j, palette=pal)
    plt.title('EM '+j)
    plt.savefig(f'EM_FACS_Phenotype_UMAP_{j}.png', dpi=600)
    plt.show()

    ax = sc.pl.umap(mdata["gex"], show=False, size=30)
    ax = sc.pl.umap(mdata["gex"][mdata["gex"].obs['FACS_Phenotype_'+j]=='EF'], ax=ax, show=False, size=30, 
                    color='FACS_Phenotype_'+j, palette=pal)
    plt.title('EF '+j)
    plt.savefig(f'EF_FACS_Phenotype_UMAP_{j}.png', dpi=600)
    plt.show()

In [None]:
#Plot leiden clusters with hue FACS
pal = {'N': '#d4d4d4',
      'SCM': '#90bff9',
      'CM': '#0e4d92',
      'EM': '#21918c',
      'EF': '#00c000'}

for j in ['v1', 'v2', 'v3', 'v4']:
    ir.pl.group_abundance(mdata["gex"], groupby='leiden', target_col='FACS_Phenotype_'+j, 
                      normalize=True, fig_kws={'figsize': (12, 5)}, color=pal)
    plt.savefig(f'FACS_Phenotypes_over_Leiden_{j}.png', dpi=600)
    plt.show()

In [None]:
#Plot FACS Phenotypes with hue time - YF specific versus control cells
pal = {'N': '#d4d4d4',
      'SCM': '#90bff9',
      'CM': '#0e4d92',
      'EM': '#21918c',
      'EF': '#00c000'}

for j in ['v1', 'v2', 'v3', 'v4']:
    ir.pl.group_abundance(mdata["gex"][(mdata["gex"].obs['specific_new'].isin(['NS4B214', 'NS2B117', 'NS3293', 'NS3286', 'NS324', 'NS5672',
           'NS2A97', 'NS4B165']))],
                      groupby='time', 
                      target_col='FACS_Phenotype_'+j, 
                      normalize=True, fig_kws={'figsize': (8, 4)}, 
                      sort=['d0', 'd7', 'd11', 'd14', 'd21', 'd28', 'd49', 'd90', 'd365', 'dx'], color=pal)
    plt.title('YF specific cells over time '+j)
    plt.savefig(f'YF_specific_cells_over_FACS_Phenotype_over_time_{j}.png')
    plt.show()

    ir.pl.group_abundance(mdata["gex"][(mdata["gex"].obs['specific_new'].isin(['COV', 'HHV', 'FLU', 'EBV1', 'EBV2']))],
                      groupby='time', 
                      target_col='FACS_Phenotype_'+j, 
                      normalize=True, fig_kws={'figsize': (8, 4)}, 
                      sort=['d14', 'd21', 'd90', 'd365', 'dx'], color=pal)
    plt.title('control specific cells over time')
    plt.savefig(f'Control_specific_cells_over_FACS_Phenotype_over_time_{j}.png')
    plt.show()

In [None]:
#Plot leiden clusters with hue FACS - NS4B214 specific cells
for j in ['v1', 'v2', 'v3', 'v4']:
    for time in ['d0', 'd7', 'd11', 'd14', 'd21', 'd28', 'd49', 'd90', 'd365', 'dx']:
        ir.pl.group_abundance(mdata["gex"][(mdata["gex"].obs['specific_new']=='NS4B214') & 
                                       (mdata["gex"].obs['time']==time)],
                          groupby='leiden', 
                      target_col='FACS_Phenotype_'+j, 
                      normalize=True, fig_kws={'figsize': (12, 5)})
        plt.title(time)
        plt.savefig(f'FACS_Phenotypes_specific_cells_over_leiden_and_time_{time}_normalised_{j}.png', dpi=600)
        plt.show()

In [None]:
#Plot specific cell distribution per leiden cluster over time
vals = pd.DataFrame(columns = ['d0', 'd7', 'd11', 'd14', 'd21', 'd28', 'd49', 'd90', 'd365', 'dx'], 
                    index = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])

for time in ['d0', 'd7', 'd11', 'd14', 'd21', 'd28', 'd49', 'd90', 'd365', 'dx']:
    for i in range(13):
        vals[time][i] = len(mdata["gex"].obs[(mdata["gex"].obs['time']==time)&
                                             (mdata["gex"].obs['specific_new']=='NS4B214')&
                                             (mdata["gex"].obs['leiden']==str(i))])

In [None]:
valsper = vals
for time in ['d0', 'd7', 'd11', 'd14', 'd21', 'd28', 'd49', 'd90', 'd365', 'dx']:
    valsper[time] = (valsper[time]/(valsper[time]).sum())*100

In [None]:
valsper.to_csv('percentage_NS4B214_specific_cells_over_time_and_leiden.csv')

In [None]:
#plot valsper
plt.plot(valsper.columns, valsper.T, label=valsper.index)
plt.legend(loc='upper left', bbox_to_anchor=(1, 1), title='leiden cluster', handlelength=1.5)
plt.ylabel('% of cells')
plt.title('NS4B214 specific cells over leiden clusters')
plt.savefig('NS4B214_specific_cells_over_time_over_leiden_clusters.pdf')
plt.show()

In [None]:
sc.pl.umap(mdata["gex"], color=['leiden'], cmap=cust, show=False)

In [None]:
#same but grouped by Phenotypes

vals = pd.DataFrame(columns = ['d0', 'd7', 'd11', 'd14', 'd21', 'd28', 'd49', 'd90', 'd365', 'dx'], 
                    index = ['N', 'SCM', 'CM', 'RM', 'EM', 'EF', 'CYC'])

for time in ['d7', 'd11', 'd14', 'd21', 'd28', 'd49', 'd90', 'd365', 'dx']:
    vals[time]['N'] = len(mdata["gex"].obs[(mdata["gex"].obs['time']==time)&
                                             (mdata["gex"].obs['specific']=='NS4B214')&
                                             (mdata["gex"].obs['leiden'].isin(['4', '10']))])
    vals[time]['CM'] = len(mdata["gex"].obs[(mdata["gex"].obs['time']==time)&
                                             (mdata["gex"].obs['specific']=='NS4B214')&
                                             (mdata["gex"].obs['leiden'].isin(['9', '11']))])
    vals[time]['RM'] = len(mdata["gex"].obs[(mdata["gex"].obs['time']==time)&
                                             (mdata["gex"].obs['specific']=='NS4B214')&
                                             (mdata["gex"].obs['leiden'].isin(['7', '8']))])
    vals[time]['EM'] = len(mdata["gex"].obs[(mdata["gex"].obs['time']==time)&
                                             (mdata["gex"].obs['specific']=='NS4B214')&
                                             (mdata["gex"].obs['leiden'].isin(['2', '3']))])
    vals[time]['EF'] = len(mdata["gex"].obs[(mdata["gex"].obs['time']==time)&
                                             (mdata["gex"].obs['specific']=='NS4B214')&
                                             (mdata["gex"].obs['leiden'].isin(['9', '0', '7', '1']))])
    vals[time]['CYC'] = len(mdata["gex"].obs[(mdata["gex"].obs['time']==time)&
                                             (mdata["gex"].obs['specific']=='NS4B214')&
                                             (mdata["gex"].obs['leiden'].isin(['12']))])

In [None]:
valsper = vals
for time in ['d7', 'd11', 'd14', 'd21', 'd28', 'd49', 'd90', 'd365', 'dx']:
    valsper[time] = (valsper[time]/(valsper[time]).sum())*100

In [None]:
#plot valsper
plt.figure(figsize=(8, 6))
plt.plot(valsper.columns, valsper.T, label=valsper.index)
plt.legend(loc='upper left', bbox_to_anchor=(1, 1), title='Phenotypes', handlelength=1.5)
plt.ylabel('% of cells')
plt.title('NS4B214 specific cells over grouped leiden clusters')

In [None]:
#same plot for FACS Phenotypes

valsf = pd.DataFrame(columns = ['d0', 'd7', 'd11', 'd14', 'd21', 'd28', 'd49', 'd90', 'd365', 'dx'], 
                    index = ['N', 'SCM', 'CM', 'EM', 'EF'])

for time in ['d0', 'd7', 'd11', 'd14', 'd21', 'd28', 'd49', 'd90', 'd365', 'dx']:
    for i in ['N', 'SCM', 'CM', 'EM', 'EF']:
        valsf[time][i] = len(mdata["gex"].obs[(mdata["gex"].obs['time']==time)&
                                             (mdata["gex"].obs['specific_new']=='NS4B214')&
                                             (mdata["gex"].obs['FACS_Phenotype']==i)])
        
valsfper = valsf
for time in ['d0', 'd7', 'd11', 'd14', 'd21', 'd28', 'd49', 'd90', 'd365', 'dx']:
    valsfper[time] = (valsfper[time]/(valsfper[time]).sum())*100
    
valsfper.to_csv('percentage_NS4B214_specific_cells_over_time_and_FACS_Phenotype.csv')

In [None]:
plt.figure(figsize=(8, 6))
plt.plot(valsfper.columns, valsfper.T, label=valsfper.index)
plt.legend(loc='upper left', bbox_to_anchor=(1, 1), title='FACS Phenotypes', handlelength=1.5)
plt.ylabel('% of cells')
plt.title('NS4B214 specific cells over FACS Phenotypes')
plt.savefig('NS4B214_specific_cells_over_FACS_Phenotypes.pdf', dpi=600)
plt.show()

In [None]:
valsfper

In [None]:
mdata['gex'].uns['epitopes']

In [None]:
#Plot leiden clusters with hue time
ir.pl.group_abundance(mdata["gex"][(mdata["gex"].obs['specific_new']=='NS4B214')],
                      groupby='time', 
                      target_col='leiden', 
                      normalize=True, fig_kws={'figsize': (8, 4)}, 
                      sort=['d0', 'd7', 'd11', 'd14', 'd21', 'd28', 'd49', 'd90', 'd365', 'dx'])
plt.title('NS4B214 specific cells over time')
plt.savefig(f'NS4B214_specific_cells_over_leiden_over_time.png')
plt.show()

In [None]:
#Plot leiden clusters with hue time - YF specific versus control cells
ir.pl.group_abundance(mdata["gex"][(mdata["gex"].obs['specific_new'].isin(['NS4B214', 'NS2B117', 'NS3293', 'NS3286', 'NS324', 'NS5672',
       'NS2A97', 'NS4B165']))],
                      groupby='time', 
                      target_col='leiden', 
                      normalize=True, fig_kws={'figsize': (8, 4)}, 
                      sort=['d0', 'd7', 'd11', 'd14', 'd21', 'd28', 'd49', 'd90', 'd365', 'dx'])
plt.title('YF specific cells over time')
plt.savefig(f'YF_specific_cells_over_leiden_over_time.png')
plt.show()

ir.pl.group_abundance(mdata["gex"][(mdata["gex"].obs['specific_new'].isin(['COV', 'HHV', 'FLU', 'EBV1', 'EBV2']))],
                      groupby='time', 
                      target_col='leiden', 
                      normalize=True, fig_kws={'figsize': (8, 4)}, 
                      sort=['d14', 'd21', 'd90', 'd365', 'dx'])
plt.title('control specific cells over time')
plt.savefig(f'Control_specific_cells_over_leiden_over_time.png')
plt.show()

In [None]:
#Plot control epitopes independent of time:
ir.pl.group_abundance(mdata["gex"][mdata["gex"].obs['specific'].isin(['COV', 'FLU', 'HHV', 'EBV1', 'EBV2'])],
                      groupby='specific', 
                      target_col='leiden', 
                      normalize=True, fig_kws={'figsize': (8, 4)})
plt.title('Control epitope specific cells phenotype by Cluster')
plt.show()


In [None]:
#Plot leiden clusters with hue FACS
ir.pl.group_abundance(mdata["gex"], groupby='leiden', target_col='FACS_Phenotype', 
                      normalize=False, fig_kws={'figsize': (12, 5)})

In [None]:
#Cells per Donor per Time per Phenotype per Specificity
d = mdata['gex'].obs[['donor', 'time', 'specific_new', 'FACS_Phenotype_streng']].value_counts()
d.to_csv('Cells_per_Donor_per_Time_per_FACS_per_Specificity_CD95.csv')

## Save Data

In [None]:
mdata.write("/media/agschober/HDD12/3_scRNA-Seq_Sina/4_Cite-Seq/data_specificity_annotated_final_pseudotime_cite.h5mu")

In [None]:
import session_info
session_info.show()