In [None]:
%load_ext autoreload

In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os

import scanpy as sc
import scirpy as ir
import anndata as ann
import numpy as np
import pandas as pd
import seaborn as sb
import decoupler as dc
from tqdm import tqdm
import math
from scipy import stats, sparse

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.colors as clrs

from matplotlib import rcParams
import matplotlib.gridspec as gridspec

In [None]:
%autoreload 2
import sys
sys.path.append('..')
import utils.visualisation as utils_vis
import utils.representation as utils_rep

In [None]:
path_figs = '../../figures/dextramer/'

sc.settings.set_figure_params(dpi=600)
sc._settings.ScanpyConfig(figdir=path_figs)
sc.settings.verbosity = 3
sc.set_figure_params(vector_friendly=True, color_map='viridis', transparent=True)
sb.set_style('whitegrid')
sc._settings.settings._vector_friendly=False

colormap = 'flare'
binding_mode = 'binding_ct'
dpi = 600

In [None]:
adata = sc.read('../../data/dextramer/02_dex_annotated_cd8.h5ad')
adata.uns['log1p']['base'] = None

In [None]:
path_figs = f'../../figures/dextramer/{adata.uns["celltype"]}'

In [None]:
epitopes = adata.uns['epitopes']
cite_ids = adata.uns['cite_ids']
custom_cite_ids = adata.uns['custom_cite_ids']
cite_ids_full = cite_ids.tolist() + custom_cite_ids.tolist()

In [None]:
binding_mode = 'binding_ct'
time_order = ['P1', 'S1', 'S2', 'S3', 'T1', 'T2', 'T3', 'X3']
time_order_wo_X = ['P1', 'S1', 'S2', 'S3', 'T1', 'T2', 'T3']
cell = adata.uns['celltype']

In [None]:
leiden_dpt_order = adata.obs.groupby('leiden_CD8')['dpt_pseudotime'].mean().sort_values().index.tolist()
leiden_dpt_order_wo11 = leiden_dpt_order.copy()
leiden_dpt_order_wo11.remove('11')

In [None]:
leiden_order_tmp = leiden_dpt_order.copy()
for el in ['11', '12', '7', '9']:
    leiden_order_tmp.remove(el)
leiden_order_tmp

In [None]:
new_cts = ['9130.0', '11251.0', '15599.0', '18755.0', '19327.0', '20205.0']

In [None]:
sc.pl.umap(adata, color='leiden_CD8')
colors_leiden = dict(zip(adata.obs['leiden_CD8'].value_counts().index, adata.uns['leiden_CD8_colors']))
colors_leiden

In [None]:
adata[adata.obs['clone_id'].isin(['41.0', '5546.0', '1701.0', '16112.0', '2495.0'])].obs[['clone_id', 'donor', 'time', 'binding_ct']]

## Detection Pattern

In [None]:
mapping_a04 = {
    '7808.0': 'early',
    '6319.0': 'early',
    '772.0': 'persisting',
    '646.0': 'early',
    '201.0': 'early',
    '297.0': 'persisting',
    '7100.0': 'persisting',
    '6012.0': 'persisting',
    '5918.0': 'persisting',
    '99.0': 'persisting',
    '225.0': 'persisting',
    '718.0': 'persisting',
    '832.0': 'persisting',
    '11949.0': 'late',
    '11195.0': 'late',
    '11155.0': 'late',
    '10748.0': 'late',
    '841.0': 'early',
    '11085.0': 'late',
    '10623.0': 'non-reactive',
}

In [None]:
mapping_a08 ={
    '6844.0': 'early',
    '7287.0': 'early',
    '7094.0': 'early',
    '5458.0': 'persisting',
    '6265.0': 'early',
    '4160.0': 'early',
    '3936.0': 'persisting',
    '7156.0': 'persisting',
    '6808.0': 'persisting',
    '6254.0': 'persisting',
    '6928.0': 'persisting',
    '5170.0': 'persisting',
    '5469.0': 'persisting',
    '6302.0': 'persisting',
    '7734.0': 'persisting',
    '5251.0': 'persisting',
    '8915.0': 'early',
    '8483.0': 'early',
    '8395.0': 'early',
    '8172.0': 'early',
    '11870.0': 'late',
    '11751.0': 'late',
    '11439.0': 'late',
    '11151.0': 'late',
    '10832.0': 'persisting',
    '11192.0': 'not-expressed',
}

In [None]:
mapping_a15 = {
    '3155.0': 'early',
    '2722.0': 'early',
    '1735.0': 'early',
    '7894.0': 'early',
    '6993.0': 'early',
    '6755.0': 'early',
    '6743.0': 'persisting',
    '509.0': 'early',
    '386.0': 'early',
    '277.0': 'early',
    '545.0': 'early',
    '763.0': 'early',
    '66.0': 'early',
    '4386.0': 'early',
    '317.0': 'persisting',
    '4545.0': 'persisting',
    '7911.0': 'persisting',
    '7758.0': 'persisting',
    '6548.0': 'persisting',
    '5245.0': 'persisting',
    '684.0': 'persisting',
    '489.0': 'persisting',
    '11964.0': 'late',
    '11253.0': 'late',
    '10813.0': 'late',
    '772.0': 'late',
    '5576.0': 'early',
    '4350.0': 'non-reactive',
    '2123.0': 'non-reactive',
    '5405.0': 'not-expressed',
}

In [None]:
mapping_a29 = {
    '52.0': 'early',
    '323.0': 'early',
    '691.0': 'early',
    '6467.0': 'early',
    '6579.0': 'early',
    '6688.0': 'early',
    '7411.0': 'early',
    '3695.0': 'persisting',
    '5766.0': 'early',
    '6140.0': 'early',
    '8191.0': 'early',
    '9825.0': 'early',
    '9961.0': 'persisting',
    '10749.0': 'late',
    '11445.0': 'late',
    '9973.0': 'non-reactive',
}

In [None]:
mapping_detection = {
    'A04': mapping_a04,
    'A08': mapping_a08,
    'A15': mapping_a15,
    'A29': mapping_a29,
}

In [None]:
adata.obs['detection_pattern'] = np.nan
for d, mapping in mapping_detection.items():
    adata.obs.loc[adata.obs['donor']==d, 'detection_pattern'] = adata.obs.loc[adata.obs['donor']==d, 'clone_id'].map(mapping)
    assert adata[(adata.obs['donor']==d) & (adata.obs['detection_pattern'].notna())].obs['clone_id'].nunique() == len(mapping), d
adata.obs['detection_pattern'].value_counts()

## Table avidity

In [None]:
columns = ['clone_id', 'score_IFNg_EC50', 'level_IFNg_EC50', 'score_mTRBC_EC50', 'level_mTRBC_EC50']

In [None]:
adata[adata.obs['clone_id']=='20205.0'].obs['donor'].value_counts()

In [None]:
df_avidity_a04 = [
    [646, 0.000000124, 'high', 0.000000212, 'high'],
    [772, 0.0000001347, 'high', 0.0000002227, 'high'],
    [99, 0.0000001562, 'high', 0.000002787, 'mid'],
    [201, 0.0000001677, 'high', 0.0000002081, 'high'],
    [718, 0.0000001723, 'high', 0.000002601, 'mid'],
    [6012, 0.0000001848, 'high', 0.000002883, 'mid'],
    [841, 0.0000001976, 'high', 0.000001636, 'mid'],
    [10748, 0.0000002415, 'mid', 0.000002038, 'mid'],
    [11195, 0.0000003633, 'mid', 0.000001246, 'mid'],
    [6319, 0.0000003878, 'mid', 0.000002359, 'mid'],
    [11155, 0.0000004235, 'mid', 0.000003125, 'mid'],
    [5918, 0.0000004403, 'mid', 0.0000008157, 'mid'],
    [11949, 0.0000004541, 'mid', 0.000002241, 'mid'],
    [7100, 0.0000004854, 'mid', 0.000002326, 'mid'],
    [7808, 0.0000005036, 'mid', 0.000004321, 'mid'],
    [832, 0.0000005102, 'mid', 0.000001061, 'mid'],
    [225, 0.0000006018, 'mid', 0.000002991, 'mid'],
    [11085, 0.0000008714, 'low', 0.0000008348, 'mid'],
    [297, 0.000001532, 'low', 0.00001274, 'low'],
]
df_avidity_a04 = pd.DataFrame(df_avidity_a04, columns=columns).set_index('clone_id')
df_avidity_a04.index = df_avidity_a04.index.astype(str) + '.0'
assert np.sum(df_avidity_a04.index.isin(adata[adata.obs['donor']=='A04'].obs['clone_id']))==len(df_avidity_a04)
df_avidity_a04

In [None]:
df_avidity_a08 = [
    [11751, 0.0000001512, 'high', 0.000003054, 'mid'],
    [7287, 0.0000002045, 'high', 0.000001352, 'mid'],
    [7734, 0.0000002046, 'high', 0.0000008852, 'high'],
    [7156, 0.0000002196, 'high', 0.000003289, 'mid'],
    [11151, 0.0000002267, 'high', 0.000003088, 'mid'],
    [8915, 0.0000002914, 'high', 0.0000008946, 'high'],
    [11439, 0.0000004716, 'mid', 0.000001509, 'mid'],
    [6928, 0.000000496, 'mid', 0.00000349, 'mid'],
    [6844, 0.0000005604, 'mid', 0.000003, 'mid'],
    [10832, 0.0000005972, 'mid', 0.000001287, 'mid'],
    [6254, 0.0000006055, 'mid', 0.000002252, 'mid'],
    [7094, 0.0000006848, 'mid', 0.00000466, 'mid'],
    [4160, 0.0000007182, 'mid', 0.000005749, 'mid'],
    [6808, 0.0000007512, 'mid', 0.000001852, 'mid'],
    [6265, 0.0000007757, 'mid', 0.000005094, 'mid'],
    [5469, 0.0000009556, 'mid', 0.000004288, 'mid'],
    [6302, 0.0000009596, 'mid', 0.000003502, 'mid'],
    [5251, 0.0000009612, 'mid', 0.0000031, 'mid'],
    [5458, 0.000001003, 'mid', 0.000001951, 'mid'],
    [5170, 0.000001006, 'mid', 0.00000677,  'low'],
    [8172, 0.000001096, 'mid', 0.000003348, 'mid'],
    [8483, 0.000001127, 'mid', 0.000001264, 'high'],
    [3936, 0.000001136, 'mid', 0.000006437, 'low'],
    [11870, 0.000001894, 'low', np.nan, np.nan],
    [8395, 0.000002766, 'low', 0.000002382, 'mid'],
    [9130, 0.00000008254, np.nan, 0.000001153, np.nan],
    [11251,0.0000003493, np.nan, 0.000004135, np.nan],
]
df_avidity_a08 = pd.DataFrame(df_avidity_a08, columns=columns).set_index('clone_id')
df_avidity_a08.index = df_avidity_a08.index.astype(str) + '.0'
assert np.sum(df_avidity_a08.index.isin(adata[adata.obs['donor']=='A08'].obs['clone_id']))==len(df_avidity_a08)
df_avidity_a08

In [None]:
df_avidity_a15 = [
    [6993, 0.00000006643, 'high', 0.000000627],
    [7758, 0.00000006978, 'high', 0.0000003042, 'high'],
    [7911, 0.00000008178, 'high', 0.0000003719],
    [386, 0.00000008187, 'high', 0.0000003234],
    [66, 0.00000008321, 'high', 0.000000218, 'high'],
    [772, 0.00000009391, 'high', 0.0000001429, 'high'],
    [277, 0.0000001074, 'mid', 0.0000003362, 'mid'],
    [11253, 0.0000001078, 'mid', 0.000001294, 'mid'],
    [509, 0.0000001151, 'mid', 0.0000003426, 'mid'],
    [6743, 0.0000001199, 'mid', 0.0000007033, 'mid'],
    [5245, 0.0000001201, 'mid', 0.0000009971, 'mid'],
    [5576, 0.0000001499, 'mid', 0.00000107, 'mid'],
    [4545, 0.0000001557, 'mid', 0.0000003805, 'mid'],
    [763, 0.0000001562, 'mid', 0.0000003707, 'mid'],
    [684, 0.0000001702, 'mid', 0.000002945, 'low'],
    [1735, 0.0000001898, 'mid', 0.000001518, 'mid'],
    [6755, 0.0000001924, 'mid', 0.000001003, 'mid'],
    [6548, 0.0000002097, 'mid', 0.000001736, 'mid'],
    [10813, 0.000000243, 'mid', 0.000001064, 'mid'],
    [545, 0.0000003402, 'mid', 0.000001512, 'mid'],
    [2722, 0.0000003449, 'mid', 0.0000006764, 'mid'],
    [3155, 0.000000388, 'mid', 0.0000009333, 'mid'],
    [4386, 0.0000004069, 'mid', 0.0000007293, 'mid'],
    [489, 0.0000004128, 'mid', 0.0000006835, 'mid'],
    [317, 0.0000005827, 'mid', 0.000001118, 'mid'],
    [7894, 0.000002096, 'low', 0.000000489, 'mid'],
    [11964, 0.0001143, 'low', 0.0001015, 'low'],
]
df_avidity_a15 = pd.DataFrame(df_avidity_a15, columns=columns).set_index('clone_id')
df_avidity_a15.index = df_avidity_a15.index.astype(str) + '.0'
assert np.sum(df_avidity_a15.index.isin(adata[adata.obs['donor']=='A15'].obs['clone_id']))==len(df_avidity_a15)
df_avidity_a15

In [None]:
df_avidity_a29 = [
    [52, 0.00000002355, 'high', 0.00000001207,  'high'],
    [5766, 0.00000003258, 'high', 0.0000001136, 'mid'],
    [691, 0.00000003349, 'high', 0.000000007647,  'high'],
    [6467, 0.00000003569, 'high', 0.0000000908, 'mid'],
    [10749, 0.00000003876, 'high', 0.0000001733, 'mid'],
    [6688, 0.00000004496, 'high', 0.0000002203, 'mid'],
    [6579, 0.00000004571, 'high', 0.0000001151, 'mid'],
    [6140, 0.00000004614, 'high', 0.0000001734, 'mid'],
    [3695, 0.00000004756, 'high', 0.000000106, 'mid'],
    [9825, 0.00000005136, 'high', 0.0000003264, 'mid'],
    [323, 0.00000003406, 'high', 0.0000001557,  'high'],
    [11445, 0.00000008472, 'mid', 0.000000371, 'mid'],
    [9961, 0.0000001021, 'mid', 0.0000003585, 'mid'],
    [7411, 0.0000001325, 'mid', 0.0000002942, 'mid'],
    [8191, 0.00001236, 'low', 0.000008164, 'low'],
]
df_avidity_a29 = pd.DataFrame(df_avidity_a29, columns=columns).set_index('clone_id')
df_avidity_a29.index = df_avidity_a29.index.astype(str) + '.0'
assert np.sum(df_avidity_a29.index.isin(adata[adata.obs['donor']=='A29'].obs['clone_id']))==len(df_avidity_a29)
df_avidity_a29

In [None]:
df_avidity_him = [
    [5226, 0.00000008818, 'high', 0.0000004583, 'mid'],
    [2495, 0.00000009974, 'high', 0.0000005816, 'mid'],
    [272, 0.0000001118, 'mid', 0.0000006998, 'mid'],
    [2058, 0.0000001187, 'mid', 0.0000001299,  'high'],
    [1650, 0.000000126, 'mid', 0.0000009917, 'mid'],
    [41, 0.0000001349, 'mid', 0.000000911, 'mid'],
    [219, 0.0000001352, 'mid', 0.0000001564,  'high'],
    [1361, 0.0000001375, 'mid', 0.00000168, 'mid'],
    [1701, 0.0000001807, 'mid', 0.000002074, 'mid'],
    [2997, 0.0000002962, 'mid', 0.000001611, 'mid'],
    [475, 0.0000002986, 'mid', 0.000002468, 'mid'],
    [5546, 0.0000004082, 'mid', 0.000002859, 'mid'],
    [1276, np.nan, 'low', 0.00004086, 'low'],
]
df_avidity_him = pd.DataFrame(df_avidity_him, columns=columns).set_index('clone_id')
df_avidity_him.index = df_avidity_him.index.astype(str) + '.0'
assert np.sum(df_avidity_him.index.isin(adata[adata.obs['donor']=='HIM'].obs['clone_id']))==len(df_avidity_him)
df_avidity_him

In [None]:
donor_2_avidity = {
    'A04': df_avidity_a04,
    'A08': df_avidity_a08,
    'A15': df_avidity_a15,
    'A29': df_avidity_a29,
    'HIM': df_avidity_him,
}

for col in ['level_IFNg_EC50', 'level_mTRBC_EC50', 'score_IFNg_EC50', 'score_mTRBC_EC50']:
    adata.obs[col] = np.nan
    for donor, df_avidity in donor_2_avidity.items():
        mask = adata.obs['donor'] == donor
        dict_avidity = dict(zip(df_avidity.index.values, df_avidity[col].values))
        adata.obs.loc[mask, col] = adata.obs.loc[mask, 'clone_id'].map(dict_avidity)
adata.obs['log_score_IFNg_EC50'] = np.log10(adata.obs['score_IFNg_EC50'].values)

## Level Annotation - Top5 per Donor

In [None]:
for mod in ['IFNg_EC50', 'mTRBC_EC50']:
    col = f'level_{mod}_top5'
    adata.obs[col] = np.nan
    for donor, df_tmp in donor_2_avidity.items():
        sort_scores = df_tmp[f'score_{mod}'].dropna().sort_values()
        sort_scores[~sort_scores.index.isin(['11964.0', '8191.0'])]
        sort_scores[~sort_scores.index.isin(new_cts)]
        
        cts_high = sort_scores[:5].index.tolist()
        cts_low = sort_scores[-5:].index.tolist()
        cts_mid = [el for el in sort_scores.index if (el not in cts_high) and (el not in cts_low)]
        
        adata.obs.loc[adata.obs['clone_id'].isin(cts_high) &
                      (adata.obs['donor']==donor), col] = 'high'
        adata.obs.loc[adata.obs['clone_id'].isin(cts_low) &
                      (adata.obs['donor']==donor), col] = 'low'
        adata.obs.loc[adata.obs['clone_id'].isin(cts_mid) &
                      (adata.obs['donor']==donor), col] = 'mid'
        
adata.obs[col].value_counts()

## Level Annotation - Top5 per population (HIM separate)

In [None]:
# Fix: Clonotype 772.0 was measured twice. Let's take the mean
df_avidity_a040815 = pd.concat([df_avidity_a04, df_avidity_a08, df_avidity_a15])
df_avidity_fix_772 = df_avidity_a040815.loc['772.0']
df_avidity_a040815 = df_avidity_a040815[df_avidity_a040815.index != '772.0']
df_avidity_fix_772 = pd.DataFrame(df_avidity_fix_772.mean()).transpose()
df_avidity_fix_772.index = ['772.0']
df_avidity_fix_772[['level_IFNg_EC50', 'level_mTRBC_EC50']] = 'high'
df_avidity_a040815 = pd.concat([df_avidity_a040815, df_avidity_fix_772])

population_2_avidity = [
    (['A04', 'A08', 'A15'], df_avidity_a040815),
    (['A29'], df_avidity_a29),
    (['HIM'], df_avidity_him)
]

for mod in ['IFNg_EC50', 'mTRBC_EC50']:
    col = f'level_{mod}_population_top5'
    adata.obs[col] = np.nan
    for donors, df_tmp in population_2_avidity:
        sort_scores = df_tmp[f'score_{mod}'].dropna().sort_values()
        sort_scores[~sort_scores.index.isin(['11964.0', '8191.0'])]
        sort_scores[~sort_scores.index.isin(new_cts)]
        cts_high = sort_scores[:5].index.tolist()
        cts_low = sort_scores[-5:].index.tolist()
        cts_mid = [el for el in sort_scores.index if (el not in cts_high) and (el not in cts_low)]
        
        adata.obs.loc[adata.obs['clone_id'].isin(cts_high) &
                      (adata.obs['donor'].isin(donors)), col] = 'high'
        adata.obs.loc[adata.obs['clone_id'].isin(cts_low) &
                      (adata.obs['donor'].isin(donors)), col] = 'low'
        adata.obs.loc[adata.obs['clone_id'].isin(cts_mid) &
                      (adata.obs['donor'].isin(donors)), col] = 'mid'
        
adata.obs[col].value_counts()

## Annotate tested reactivity

In [None]:
adata.obs['reactivity'] = np.nan
adata.obs.loc[adata.obs['score_IFNg_EC50'].notna() | adata.obs['level_IFNg_EC50'].notna(), 'reactivity'] = 'reactive'

unreactive_clones = ['10623.0', '4350.0', '2123.0', '1690.0', '9973.0',
                    '15599.0', '18755.0', '19327.0', '20205.0']
adata.obs.loc[adata.obs['clone_id'].isin(unreactive_clones), 'reactivity'] = 'unreactive'
adata.obs['reactivity'].value_counts()

## Metabolic scores

In [None]:
import omnipath as op

In [None]:
msigdb = dc.get_resource('MSigDB')
selected_kegg_pathways = [
    'KEGG_OXIDATIVE_PHOSPHORYLATION',
    'KEGG_GLYCOLYSIS_GLUCONEOGENESIS',
    'KEGG_T_CELL_RECEPTOR_SIGNALING_PATHWAY',
    'KEGG_RNA_POLYMERASE',
    'KEGG_SPLICEOSOME'
]
msigdb_selected = msigdb[msigdb['geneset'].isin(selected_kegg_pathways)]
msigdb_selected['weight'] = 1
msigdb_selected = msigdb_selected.drop_duplicates()
msigdb_selected.head()

In [None]:
dc.run_mlm(
    mat=adata,
    net=msigdb_selected,
    source='geneset',
    target='genesymbol',
    weight='weight',
    use_raw = False,
    verbose=True
)

for col in adata.obsm['mlm_estimate'].columns:
    adata.obs[f'score_{col.lower()}'] = adata.obsm['mlm_estimate'][col]
    
kegg_scores = [f'score_{el.lower()}' for el in selected_kegg_pathways]
sc.pl.umap(adata, color=kegg_scores, ncols=3)

In [None]:
for col in kegg_scores:
    rcParams['figure.figsize'] = (8, 4)
    plot = sb.boxplot(data=adata.obs, x='leiden_CD8', y=col)
    plt.show()

# Plots

## Score by cluster

In [None]:
rcParams['figure.figsize'] = (8, 4)

for n, ds in [('_withoutHim', ['HIM']), ('', [])]:
    plot = sb.boxplot(data=adata[~adata.obs['donor'].isin(ds)
                                & (~adata.obs['clone_id'].isin(new_cts))].obs, x='leiden_CD8', y='score_IFNg_EC50', 
                      palette=colors_leiden, flierprops={'marker': 'd', 'markerfacecolor' :'black'})
    plot.set_yscale('log')
    plt.gca().invert_yaxis()
    plt.savefig(f'{path_figs}/avidity/avidity_leiden_ifng_box{n}.pdf', bbox_inches='tight', dpi=300,)
    adata.obs.groupby('leiden_CD8')['score_IFNg_EC50'].mean()
    plt.show()

In [None]:
for n, ds in [('_withoutHim', ['HIM']), ('', [])]:
    adata_tmp = adata[~adata.obs['donor'].isin(ds)
                     & (~adata.obs['clone_id'].isin(new_cts))].copy()
    adata_tmp.obs['log_score_IFNg_EC50'] = np.log10(adata_tmp.obs['score_IFNg_EC50'].values)
    plot = sb.stripplot(data=adata_tmp.obs, x='leiden_CD8', y='log_score_IFNg_EC50', 
                            color='gray',
                            size=3, jitter=0.3,
                            order=leiden_dpt_order, zorder=10,
                            )
    plot = sb.violinplot(data=adata_tmp.obs, x='leiden_CD8', y='log_score_IFNg_EC50', 
                         scale='width', palette=colors_leiden, inner=None,
                        order=leiden_dpt_order)
    plt.gca().invert_yaxis()
    plot.legend().remove()
        
    sb.despine(ax=plot)
    plot.grid(False)
    plot.set_title(n.replace('_', ''))
    
    plt.savefig(f'{path_figs}/avidity/avidity_leiden_ifng_scatter{n}.pdf', bbox_inches='tight', dpi=300,)
    plt.show()

In [None]:
for n, ds in [('_withoutHim', ['HIM']), ('', [])]:
    plot = sb.boxplot(data=adata[~adata.obs['donor'].isin(ds)
                                 & (~adata.obs['clone_id'].isin(new_cts))].obs, x='leiden_CD8', y='score_mTRBC_EC50',
                     palette=colors_leiden, flierprops={'marker': 'd', 'markerfacecolor' :'black'})
    plot.set_yscale('log')
    plt.gca().invert_yaxis()
    plt.savefig(f'{path_figs}/avidity/avidity_leiden_mtrbc_box{n}.pdf', bbox_inches='tight', dpi=300,)
    adata.obs.groupby('leiden_CD8')['score_mTRBC_EC50'].mean()
    plt.show()

In [None]:
for n, ds in [('_withoutHim', ['HIM']), ('', [])]:
    adata_tmp = adata[~adata.obs['donor'].isin(ds)
                     & (~adata.obs['clone_id'].isin(new_cts))].copy()
    adata_tmp.obs['log_score_mTRBC_EC50'] = np.log10(adata_tmp.obs['score_mTRBC_EC50'].values)
    plot = sb.stripplot(data=adata_tmp.obs, x='leiden_CD8', y='log_score_mTRBC_EC50', 
                            color='gray', size=3, jitter=0.3,
                            order=leiden_dpt_order, zorder=10,
                            )
    plot = sb.violinplot(data=adata_tmp.obs, x='leiden_CD8', y='log_score_mTRBC_EC50', 
                         scale='width', palette=colors_leiden, inner=None,
                        order=leiden_dpt_order)
    plt.gca().invert_yaxis()
    plot.legend().remove()
    
    sb.despine(ax=plot)
    plot.grid(False)
    plot.set_title(n.replace('_', ''))
    
    plt.savefig(f'{path_figs}/avidity/avidity_leiden_mtrbc_scatter{n}.pdf', bbox_inches='tight', dpi=300,)
    plt.show()

### IFNG by leiden with unreactive

In [None]:
fig = plt.figure(figsize=(8, 4))

gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], hspace=0.05)

df_tmp = adata[adata.obs['donor']!='HIM'].obs.copy()
df_tmp = df_tmp[df_tmp['score_IFNg_EC50'].notna() | (df_tmp['reactivity']=='unreactive')]
df_tmp = df_tmp[~df_tmp['clone_id'].isin(new_cts)]
df_tmp['log_score_IFNg_EC50'] = df_tmp['log_score_IFNg_EC50'].fillna(1)

ax_full = fig.add_subplot(gs[:])
ax_u = fig.add_subplot(gs[0])
ax_b = fig.add_subplot(gs[1])


d = 0.03
ax_b.plot((-d*4/7, +d*4/7), (-d*2+1, +d*2+1), transform=ax_b.transAxes, color='silver', clip_on=False, linewidth=0.5)
ax_u.plot((-d*4/7, +d*4/7), (-d*2/3, +d*2/3), transform=ax_u.transAxes, color='silver', clip_on=False, linewidth=0.5)

plot = sb.stripplot(data=df_tmp[df_tmp['log_score_IFNg_EC50']<1], x='leiden_CD8', y='log_score_IFNg_EC50', 
                        color='gray', ax=ax_u,
                        size=3, jitter=0.3,
                        order=leiden_dpt_order, zorder=10,
                        )
plot = sb.violinplot(data=df_tmp[df_tmp['log_score_IFNg_EC50']<1], x='leiden_CD8', y='log_score_IFNg_EC50', 
                     scale='width', palette=colors_leiden, inner=None, ax=ax_u,
                    order=leiden_dpt_order)
sb.despine(ax=plot, bottom=True)
plot.grid(False)
plot.set_ylabel(None)
plot.invert_yaxis()
ylim = plot.get_ylim()


plot = sb.stripplot(data=df_tmp[df_tmp['log_score_IFNg_EC50']>=1], jitter=0.3,
                    x='leiden_CD8', y='log_score_IFNg_EC50', color='gray', ax=ax_b,
                    size=3,
                    order=leiden_dpt_order)

sb.despine(ax=plot)
plot.grid(False)
plot.set_ylabel(None)
plot.set_yticks([1])
plot.set_yticklabels(['N.R.'])


sb.despine(ax=ax_full, bottom=True, left=True)
ax_full.set_yticklabels([])
ax_full.set_xticklabels([])
ax_full.set_ylabel('log$_{10}$(IFNg EC50)', labelpad=30)
ax_full.grid(False)
ax_full.set_title('wo HIM')

plt.tight_layout()
plt.savefig(f'{path_figs}/avidity/avidity_leiden_ifng_scatter_woHIM_withUnreactives.pdf', bbox_inches='tight', dpi=300,)
plt.savefig(f'{path_figs}/avidity/avidity_leiden_ifng_scatter_woHIM_withUnreactives.png', bbox_inches='tight', dpi=300,)
plt.show()



### New clones IFNG

In [None]:
new_cts = ['9130.0', '11251.0', '15599.0', '18755.0', '19327.0', '20205.0']
df_tmp = adata[adata.obs['clone_id'].isin(new_cts)].obs.copy()['log_score_IFNg_EC50']

In [None]:
fig = plt.figure(figsize=(4, 4))
gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], hspace=0.05)

new_cts = ['9130.0', '11251.0', '15599.0', '18755.0', '19327.0', '20205.0']
df_tmp = adata[adata.obs['clone_id'].isin(new_cts)].obs.copy()
df_tmp['log_score_IFNg_EC50'] = df_tmp['log_score_IFNg_EC50'].fillna(1)
order_tmp = [el for el in leiden_dpt_order if el in df_tmp['leiden_CD8'].values]

ax_full = fig.add_subplot(gs[:])
ax_u = fig.add_subplot(gs[0])
ax_b = fig.add_subplot(gs[1])


d = 0.03
ax_b.plot((-d*4/7, +d*4/7), (-d*2+1, +d*2+1), transform=ax_b.transAxes, color='silver', clip_on=False, linewidth=0.5)
ax_u.plot((-d*4/7, +d*4/7), (-d*2/3, +d*2/3), transform=ax_u.transAxes, color='silver', clip_on=False, linewidth=0.5)

plot = sb.swarmplot(data=df_tmp[df_tmp['log_score_IFNg_EC50']<1], #jitter=0.3,
                    x='leiden_CD8', y='log_score_IFNg_EC50', color='gray', ax=ax_u,
                   order=order_tmp)
plot.set_ylim((ylim[1], ylim[0]))
sb.despine(ax=plot, bottom=True)
plot.grid(False)
plot.set_ylabel(None)
plot.invert_yaxis()
plot.set_xlabel(None)
plot.set_xticklabels([])

plot = sb.swarmplot(data=df_tmp[df_tmp['log_score_IFNg_EC50']>=1], #jitter=0.3,
                    x='leiden_CD8', y='log_score_IFNg_EC50', color='gray', ax=ax_b,
                    order=order_tmp)


sb.despine(ax=plot)
plot.grid(False)
plot.set_ylabel(None)
plot.set_yticks([1])
plot.set_yticklabels(['N.R.'])


sb.despine(ax=ax_full, bottom=True, left=True)
ax_full.set_yticklabels([])
ax_full.set_xticklabels([])
ax_full.set_ylabel('log$_{10}$(IFNg EC50)', labelpad=30)
ax_full.grid(False)

plt.tight_layout()
plt.savefig(f'{path_figs}/avidity/scatter_ifng_newClones.pdf', bbox_inches='tight', dpi=300,)
plt.savefig(f'{path_figs}/avidity/scatter_ifng_newClones.png', bbox_inches='tight', dpi=300,)
plt.show()

### INFg by leiden with unreactives - all clones

In [None]:
fig = plt.figure(figsize=(8, 4))

gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], hspace=0.05)

df_tmp = adata[adata.obs['donor']!='HIM'].obs.copy()
df_tmp = df_tmp[df_tmp['score_IFNg_EC50'].notna() | (df_tmp['reactivity']=='unreactive')]
df_tmp['log_score_IFNg_EC50'] = df_tmp['log_score_IFNg_EC50'].fillna(1)
ax_full = fig.add_subplot(gs[:])
ax_u = fig.add_subplot(gs[0])
ax_b = fig.add_subplot(gs[1])


d = 0.03
ax_b.plot((-d*4/7, +d*4/7), (-d*2+1, +d*2+1), transform=ax_b.transAxes, color='silver', clip_on=False, linewidth=0.5)
ax_u.plot((-d*4/7, +d*4/7), (-d*2/3, +d*2/3), transform=ax_u.transAxes, color='silver', clip_on=False, linewidth=0.5)

plot = sb.stripplot(data=df_tmp[(df_tmp['log_score_IFNg_EC50']<1)
                               & (~df_tmp['clone_id'].isin(new_cts))], x='leiden_CD8', y='log_score_IFNg_EC50', 
                        palette=colors_leiden, ax=ax_u,
                        size=3, jitter=0.3,
                        order=leiden_order_tmp, zorder=10,
                        )
plot = sb.swarmplot(data=df_tmp[(df_tmp['log_score_IFNg_EC50']<1)
                               & (df_tmp['clone_id'].isin(new_cts))], x='leiden_CD8', y='log_score_IFNg_EC50', 
                        color='black', ax=ax_u, marker='D',
                        size=3, #jitter=0.3,
                        order=leiden_order_tmp, zorder=10,
                        )
plot = sb.violinplot(data=df_tmp[df_tmp['log_score_IFNg_EC50']<1], x='leiden_CD8', y='log_score_IFNg_EC50', 
                     scale='width', color='white', linecolor='black', inner=None, ax=ax_u,
                    order=leiden_order_tmp)

mean_value = np.log10(df_tmp[df_tmp['log_score_IFNg_EC50']<1]['score_IFNg_EC50'].mean())
ax_u.axhline(y=mean_value, color='silver', linestyle='--', zorder=5)

sb.despine(ax=plot, bottom=True)
plot.grid(False)
plot.set_ylabel(None)
plot.invert_yaxis()
ylim = plot.get_ylim()
plot.set_xlabel(None)
plot.set_xticklabels([])


plot = sb.stripplot(data=df_tmp[(df_tmp['log_score_IFNg_EC50']>=1)
                               & (~df_tmp['clone_id'].isin(new_cts))], jitter=0.3,
                    x='leiden_CD8', y='log_score_IFNg_EC50', palette=colors_leiden, ax=ax_b,
                    size=3,
                    order=leiden_order_tmp)
plot = sb.stripplot(data=df_tmp[(df_tmp['log_score_IFNg_EC50']>=1)
                               & (df_tmp['clone_id'].isin(new_cts))], jitter=0.3,
                    x='leiden_CD8', y='log_score_IFNg_EC50', color='black', ax=ax_b,
                    size=3, marker='D',
                    order=leiden_order_tmp)

sb.despine(ax=plot)
plot.grid(False)
plot.set_ylabel(None)
plot.set_yticks([1])
plot.set_yticklabels(['N.R.'])


sb.despine(ax=ax_full, bottom=True, left=True)
ax_full.set_yticklabels([])
ax_full.set_xticklabels([])
ax_full.set_ylabel('log$_{10}$(IFNg EC50)', labelpad=30)
ax_full.grid(False)
ax_full.set_title('wo HIM')

plt.tight_layout()
plt.savefig(f'{path_figs}/avidity/avidity_leiden_ifng_scatter_woHIM_withUnreactivesOldNew.pdf', bbox_inches='tight', dpi=300,)
plt.savefig(f'{path_figs}/avidity/avidity_leiden_ifng_scatter_woHIM_withUnreactivesOldNew.png', bbox_inches='tight', dpi=300,)
plt.show()

In [None]:
adata[adata.obs['log_score_IFNg_EC50'].notna()].obs[
    ['leiden_CD8', 'log_score_IFNg_EC50']].groupby('leiden_CD8')['log_score_IFNg_EC50'].mean()

In [None]:
np.log10(adata[adata.obs['log_score_IFNg_EC50'].notna()].obs[
    ['leiden_CD8', 'score_IFNg_EC50']].groupby('leiden_CD8')['score_IFNg_EC50'].mean())

## UMAP

In [None]:
epitope = 'LTDEMIAQY'
score = 'score_IFNg_EC50'

fig, axes = plt.subplots(1, len(time_order), figsize=(5*len(time_order), 5))


vmin = np.log10(adata.obs[score].min())
vmax = np.log10(adata.obs[score].max())
for i, time in enumerate(time_order):
    ax = axes[i]
    sc.pl.umap(adata, ax=ax, show=False)

    adata_tmp = adata[(adata.obs['binding_ct']==epitope) & (adata.obs['time']==time) & (adata.obs[score].notna())
                     & (~adata.obs['clone_id'].isin(new_cts))]

    if len(adata_tmp) > 0:
        adata_tmp.obs[f'log_{score}'] = np.log10(adata_tmp.obs[score])
        sc.pl.umap(adata_tmp, color=f'log_{score}', ax=ax, show=False, 
                   vmin=vmin, vmax=vmax, cmap=f'rocket_r', size=len(adata)/120,
                  )
    adata_tmp = adata[(adata.obs['binding_ct']==epitope) & (adata.obs['time']==time) 
                      & (adata.obs['reactivity']=='unreactive')
                     & (~adata.obs['clone_id'].isin(new_cts))]
    if len(adata_tmp) > 0:
        plot = sc.pl.umap(adata_tmp, color=f'reactivity', ax=ax, show=False, 
                   palette=['silver'], size=len(adata)/120,)
        plot.legend().remove()
    
    ax.set_ylabel(None)
    ax.set_xlabel(None)
    ax.set_title(None)
    if i != len(time_order)-1:
        ax.collections[1].colorbar.remove()
            

for i, time in enumerate(time_order):
    axes[i].set_title(time)


axes[0].set_ylabel(f'log10 {score}')
    
plt.tight_layout()
plt.savefig(f'{path_figs}/avidity/ltd_scores_over_time.pdf', bbox_inches='tight', dpi=300,)
plt.savefig(f'{path_figs}/avidity/ltd_scores_over_time.png', bbox_inches='tight', dpi=300,)
plt.show()

In [None]:
epitope = 'LTDEMIAQY'
score = 'score_IFNg_EC50'

fig, axes = plt.subplots(1, len(time_order)-1, figsize=(5*(len(time_order)-1), 5))


vmin = np.log10(adata.obs[score].min())
vmax = np.log10(adata.obs[score].max())
for i, time in enumerate(time_order):
    if time == 'X3':
        break
    ax = axes[i]
    sc.pl.umap(adata, ax=ax, show=False)

    adata_tmp = adata[(adata.obs['binding_ct']==epitope) & (adata.obs['time']==time) 
                      & (adata.obs[score].notna()) & (adata.obs['donor']!='HIM')
                     & (~adata.obs['clone_id'].isin(new_cts))]

    if len(adata_tmp) > 0:
        adata_tmp.obs[f'log_{score}'] = np.log10(adata_tmp.obs[score])
        sc.pl.umap(adata_tmp, color=f'log_{score}', ax=ax, show=False, 
                   vmin=vmin, vmax=vmax, cmap=f'rocket_r', size=len(adata)/120,
                  )
    adata_tmp = adata[(adata.obs['binding_ct']==epitope) & (adata.obs['time']==time) 
                      & (adata.obs['reactivity']=='unreactive')
                     & (~adata.obs['clone_id'].isin(new_cts))
                      & (adata.obs['donor']!='HIM')]
    if len(adata_tmp) > 0:
        plot = sc.pl.umap(adata_tmp, color=f'reactivity', ax=ax, show=False, 
                   palette=['silver'], size=len(adata)/120,)
        plot.legend().remove()
    
    ax.set_ylabel(None)
    ax.set_xlabel(None)
    ax.set_title(None)
    if i < len(time_order)-2:
        ax.collections[1].colorbar.remove()
            

for i, time in enumerate(time_order[:-1]):
    axes[i].set_title(time)


axes[0].set_ylabel(f'log10 {score}')
    
plt.tight_layout()
plt.savefig(f'{path_figs}/avidity/ltd_scores_over_time_woHIM.pdf', bbox_inches='tight', dpi=300,)
plt.savefig(f'{path_figs}/avidity/ltd_scores_over_time_woHIM.png', bbox_inches='tight', dpi=300,)
plt.show()

In [None]:
epitope = 'YLQPRTFLL'
score = 'score_IFNg_EC50'

fig, axes = plt.subplots(1, len(time_order), figsize=(5*len(time_order), 5))


vmin = np.log10(adata.obs[score].min())
vmax = np.log10(adata.obs[score].max())
for i, time in enumerate(time_order):
    ax = axes[i]
    sc.pl.umap(adata, ax=ax, show=False)

    adata_tmp = adata[(adata.obs['binding_ct']==epitope) & (adata.obs['time']==time) & (adata.obs[score].notna())
                     & (~adata.obs['clone_id'].isin(new_cts))]

    if len(adata_tmp) > 0:
        adata_tmp.obs[f'log_{score}'] = np.log10(adata_tmp.obs[score])
        sc.pl.umap(adata_tmp, color=f'log_{score}', ax=ax, show=False, 
                   vmin=vmin, vmax=vmax, cmap=f'rocket_r', size=len(adata)/120,
                  )
        if i != 4:
            ax.collections[1].colorbar.remove()
    ax.set_ylabel(None)
    ax.set_xlabel(None)
    ax.set_title(None)         

    adata_tmp = adata[(adata.obs['binding_ct']==epitope) & (adata.obs['time']==time) 
                      & (adata.obs['reactivity']=='unreactive')
                     & (~adata.obs['clone_id'].isin(new_cts))]
    if len(adata_tmp) > 0:
        plot = sc.pl.umap(adata_tmp, color=f'reactivity', ax=ax, show=False, 
                   palette=['silver'], size=len(adata)/120,)
        plot.legend().remove()
        
    ax.set_ylabel(None)
    ax.set_xlabel(None)
    ax.set_title(None)      

for i, time in enumerate(time_order):
    axes[i].set_title(time)

axes[0].set_ylabel(f'log10 {score}')
    
plt.tight_layout()
plt.savefig(f'{path_figs}/avidity/ylq_scores_over_time.pdf', bbox_inches='tight', dpi=300,)
plt.savefig(f'{path_figs}/avidity/ylq_scores_over_time.png', bbox_inches='tight', dpi=300,)
plt.show()

In [None]:
adata.obs['level_IFNg_EC50'].value_counts()

In [None]:
adata[adata.obs['donor']!='HIM'].obs['level_IFNg_EC50'].value_counts()

In [None]:
adata[adata.obs['donor'].isin(['A29'])].obs['level_IFNg_EC50'].value_counts()

In [None]:
adata.obs['level_IFNg_EC50'].value_counts()

## Reactive vs Unreactive

In [None]:
colors_reactive = {
    'reactive': 'tab:orange',
    'unreactive': 'tab:blue'
}

In [None]:
cats = [['A04', 'A08', 'A15'], ['HIM'], ['A29']]

fig, axes = plt.subplots(1, len(cats), figsize=(len(cats)*5, 5))

for i, donors in enumerate(cats):
    ax = axes[i]
    sc.pl.umap(adata, ax=ax, show=False)
    
    adata_tmp = adata[adata.obs['donor'].isin(donors) & adata.obs['reactivity'].notna()
                     & (~adata.obs['clone_id'].isin(new_cts))]
    sc.pl.umap(adata_tmp, color='reactivity', ax=ax, show=False, size=len(adata)/1200, palette=colors_reactive)
    
    ax.set_title(','.join(donors))
    ax.set_ylabel(None)
    ax.set_xlabel(None)
    
    if i != len(cats)-1:
        ax.legend().remove()
    

plt.tight_layout()
plt.savefig(f'{path_figs}/avidity/reactivity.pdf', bbox_inches='tight', dpi=300,)
plt.savefig(f'{path_figs}/avidity/reactivity.png', bbox_inches='tight', dpi=300,)
plt.show()  

## Phenotype over time

In [None]:
time_2_numeric = {
    'P1': 10, 
    'S1': 31, 
    'S2': 90, 
    'S3': 231, 
    'T1': 258, 
    'T2': 356, 
    'T3': 437,
}
adata.obs['time_numeric'] = adata.obs['time'].map(time_2_numeric)
adata.obs['time_numeric'].value_counts(dropna=False)

In [None]:
leiden_2_pheno1 = {
    '0': 'TEff',
    '2': 'TEM',
    '3': 'CM',
    '4': 'early TEff',
    '6': 'IFN-TEff',
    '8': 'IFN-TEff',
}
adata.obs['phenotype_1'] = adata.obs['leiden_CD8'].map(leiden_2_pheno1)
adata.obs['phenotype_1'].value_counts()

In [None]:
leiden_2_pheno2 = {
    '0': 'Effector',
    '4': 'Effector',
    '2': 'Memory',
    '3': 'Memory',
    '6': 'IFN-TEff',
    '8': 'IFN-TEff'
}
adata.obs['phenotype_2'] = adata.obs['leiden_CD8'].map(leiden_2_pheno2)
adata.obs['phenotype_2'].value_counts()

In [None]:
def pheno_line_plot(adata_tmp, col, group, ax, color, epitope='LTDEMIAQY'):
    df_frac = adata_tmp.obs.groupby([col, 'time_numeric'])['binding_ct'].value_counts(normalize=True)
    df_frac = df_frac.unstack().fillna(0.)
    df_frac = pd.DataFrame(df_frac[epitope]).reset_index()
    df_frac = df_frac[df_frac[col]==group]
    df_add = [el for el in time_2_numeric.values() if el not in df_frac['time_numeric'].values]
    n = len(df_add)
    df_add = pd.DataFrame(data={col: [group]*n, 'time_numeric': df_add, epitope: [0.0]*n})
    df_frac = pd.concat([df_frac, df_add])
    df_frac = df_frac.sort_values('time_numeric').reset_index(drop=True)
    plot = sb.lineplot(data=df_frac, x='time_numeric', y=epitope, ax=ax, color=color, label=group)
    plot.set_ylabel('% of Cells')
    plot.set_xlabel('Days')
    plot.set_xticks(df_frac['time_numeric'])
    plot.set_xticks([10, 31, 90, 231, 258, 356, 437])
    sb.despine(ax=plot)
    plot.grid(False)
    plot.set_xticklabels(['P1', 'S1', 'S2', 'S3', 'T1', 'T2', 'T3']) 
#pheno_line_plot(adata_tmp, pheno_col, pheno, None, palette[pheno_col][pheno])

In [None]:
palette_pheno_1 = sb.color_palette('bright')
palette_pheno_1 = {
    'TEff': palette_pheno_1[0],
    'TEM': palette_pheno_1[1],
    'CM': palette_pheno_1[2],
    'early TEff': palette_pheno_1[3],
    'IFN-TEff': palette_pheno_1[4],
}

palette_pheno_2 = sb.color_palette('colorblind')
palette_pheno_2 = {
    'Effector': palette_pheno_2[0],
    'Memory': palette_pheno_2[1],
    'IFN-TEff': palette_pheno_2[2],
}

palette = {
    'phenotype_1': palette_pheno_1,
    'phenotype_2': palette_pheno_2,
}

In [None]:
color_vacs = 'gray'
cats = [['A04', 'A08', 'A15'], ['A04'], ['A08'], ['A15']]
for pheno_col in ['phenotype_1', 'phenotype_2']:
    nrows = len(cats)
    ncols = adata.obs[pheno_col].nunique()+1
    fig, axes = plt.subplots(ncols, nrows, figsize=(nrows*8, ncols*4))
    
    for i, donors in enumerate(cats):
        adata_tmp = adata[adata.obs['donor'].isin(donors)]

        for j, pheno in enumerate(adata[adata.obs[pheno_col].notna()].obs[pheno_col].unique()):
            ax = axes[j+1][i]
            pheno_line_plot(adata_tmp, pheno_col, pheno, ax, palette[pheno_col][pheno])
            pheno_line_plot(adata_tmp, pheno_col, pheno, axes[0][i], palette[pheno_col][pheno])
            
            ax.axvline(0, color=color_vacs)
            ax.axvline(21, color=color_vacs)
            ax.axvline(248, color=color_vacs)

            axes[0][i].axvline(0, color=color_vacs)
            axes[0][i].axvline(21, color=color_vacs)
            axes[0][i].axvline(248, color=color_vacs)

        axes[0][i].set_title(','.join(donors))
            
            
    plt.tight_layout()
    plt.savefig(f'{path_figs}/avidity/fraction_ltd_{pheno_col}.pdf', bbox_inches='tight', dpi=300,)
    plt.savefig(f'{path_figs}/avidity/fraction_ltd_{pheno_col}.png', bbox_inches='tight', dpi=300,)
    plt.show()

## UMAP level

In [None]:
palette_level = sb.color_palette('bright')
palette_level = {
    'high': palette_level[2],
    'mid': palette_level[0],
    'low': palette_level[3],
}

In [None]:
cats = [['A04'], ['A08'], ['A15'], ['A29'], ['HIM'], ['A04', 'A08', 'A15']] 
levels = ['level_IFNg_EC50', 'level_mTRBC_EC50', 
          'level_IFNg_EC50_top5', 'level_mTRBC_EC50_top5',
         'level_IFNg_EC50_population_top5', 'level_mTRBC_EC50_population_top5']


for level in levels:
    fig, axes = plt.subplots(len(cats), len(time_order), figsize=(5*len(time_order), 5*len(cats)))

    for j, donors in enumerate(cats):
        for i, time in enumerate(time_order):
            ax = axes[j][i]
            sc.pl.umap(adata, ax=ax, show=False)

            adata_tmp = adata[(adata.obs['donor'].isin(donors))
                              & (adata.obs['time']==time) 
                              & (adata.obs[level].notna())
                             & (~adata.obs['clone_id'].isin(new_cts))]

            if len(adata_tmp) > 0:
                sc.pl.umap(adata_tmp, color=level, ax=ax, show=False, 
                           size=len(adata)/1200*5, palette=palette_level,
                          )
                #if i != 4:
                    #ax.collections[1].colorbar.remove()
            ax.set_ylabel(None)
            ax.set_xlabel(None)
            ax.set_title(None)  
            if i != 7 and j != 4:
                ax.legend().remove()

    for i, time in enumerate(time_order):
        axes[0][i].set_title(time)

    for j, donors in enumerate(cats):
        axes[j][0].set_ylabel(','.join(donors))

    fig.suptitle(level)

    plt.tight_layout()
    plt.savefig(f'{path_figs}/avidity/umap_{level}_over_donorTime.pdf', bbox_inches='tight', dpi=300,)
    plt.savefig(f'{path_figs}/avidity/umap_{level}_over_donorTime.png', bbox_inches='tight', dpi=300,)
    plt.show()

### New clones

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5.75, 5))
sc.pl.umap(adata, ax=ax, show=False)

adata.obs[f'log_score_IFNg_EC50'] = np.log10(adata.obs['score_IFNg_EC50'])

adata_d = adata[adata.obs['clone_id'].isin(new_cts)]
sc.pl.umap(adata_d[adata_d.obs['score_IFNg_EC50'].notna()], color='log_score_IFNg_EC50', s=len(adata)/1200*5, cmap=f'rocket_r',
           ax=ax, show=False,
          vmin=adata.obs['log_score_IFNg_EC50'].min(), vmax=adata.obs['log_score_IFNg_EC50'].max())

plot = sc.pl.umap(adata_d[adata_d.obs['reactivity']=='unreactive'], color='reactivity', s=len(adata)/1200*5, palette=['silver'],
           ax=ax, show=False)
plot.legend().remove()
plt.tight_layout()
plt.savefig(f'{path_figs}/avidity/umap_ifngEC_newCTs.pdf', bbox_inches='tight', dpi=dpi)
plt.savefig(f'{path_figs}/avidity/umap_ifngEC_newCTs.png', bbox_inches='tight', dpi=dpi)
plt.show()

In [None]:
adata_d.obs['clone_id'].value_counts()

### All Avidity

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5.75, 5))
sc.pl.umap(adata, ax=ax, show=False)

adata_d = adata[~adata.obs['clone_id'].isin(new_cts)]
sc.pl.umap(adata_d[adata_d.obs['score_IFNg_EC50'].notna()], color='log_score_IFNg_EC50', s=len(adata)/1200*2, cmap=f'rocket_r',
           ax=ax, show=False,
          vmin=adata.obs['log_score_IFNg_EC50'].min(), vmax=adata.obs['log_score_IFNg_EC50'].max())

plot = sc.pl.umap(adata_d[adata_d.obs['reactivity']=='unreactive'], color='reactivity', s=len(adata)/1200*2, palette=['silver'],
           ax=ax, show=False)
plot.legend().remove()
plt.tight_layout()
plt.savefig(f'{path_figs}/avidity/umap_ifngEC_woNew.pdf', bbox_inches='tight', dpi=dpi)
plt.savefig(f'{path_figs}/avidity/umap_ifngEC_woNew.png', bbox_inches='tight', dpi=dpi)
plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5.75, 5))
sc.pl.umap(adata, ax=ax, show=False)

adata_d = adata
sc.pl.umap(adata_d[adata_d.obs['score_IFNg_EC50'].notna()], color='log_score_IFNg_EC50', s=len(adata)/1200*2, cmap=f'rocket_r',
           ax=ax, show=False,
          vmin=adata.obs['log_score_IFNg_EC50'].min(), vmax=adata.obs['log_score_IFNg_EC50'].max())

plot = sc.pl.umap(adata_d[adata_d.obs['reactivity']=='unreactive'], color='reactivity', s=len(adata)/1200*2, palette=['silver'],
           ax=ax, show=False)
plot.legend().remove()
plt.tight_layout()
plt.savefig(f'{path_figs}/avidity/umap_ifngEC.pdf', bbox_inches='tight', dpi=dpi)
plt.savefig(f'{path_figs}/avidity/umap_ifngEC.png', bbox_inches='tight', dpi=dpi)
plt.show()

In [None]:
rcParams['figure.figsize'] = (5.5, 5)
cmap = 'RdYlBu'

for gene in ['log_score_IFNg_EC50']:
    plot = sc.pl.umap(adata, show=False)
    adata_tmp = adata[(adata.obs[gene].notna())]
    size = 5
    if len(adata_tmp) < 500:
        size = 15
    plot = sc.pl.umap(adata_tmp, color=gene, color_map=f'{cmap}', 
                      show=False, s=len(adata)/12000*size, ax=plot, #add_outline=True,
                      #vmin=adata[:, 'MKI67'].X.min()
                     )

    plt.tight_layout()
    plt.savefig(f'../../figures/dextramer/paper/umap_{gene}.pdf', bbox_inches='tight', dpi=dpi)
    plt.savefig(f'../../figures/dextramer/paper/umap_{gene}.png', bbox_inches='tight', dpi=dpi)
    plt.show()

In [None]:
rcParams['figure.figsize'] = (5.5, 5)
cmap = 'RdYlBu'
size = 25

gene = 'log_score_IFNg_EC50'

adata_tmp = adata[adata.obs['reactivity'].notna()]

plot = sc.pl.umap(adata, show=False)
plot = sc.pl.umap(adata_tmp, color=gene, color_map=f'{cmap}', 
                  show=False, s=len(adata)/12000*size, ax=plot,
                  vmin=adata.obs[gene].min(), vmax=adata.obs[gene].max()
                 )
plot = sc.pl.umap(adata_tmp[adata_tmp.obs['reactivity']=='unreactive'], color='reactivity', 
                  s=len(adata)/12000*size, palette=['silver'],
                  ax=plot, show=False)
plot.legend().remove()
plot.set_title('LTD-YLQ all times')

plt.tight_layout()
plt.savefig(f'../../figures/dextramer/paper/umap_avidity_pooled.pdf', bbox_inches='tight', dpi=dpi)
plt.savefig(f'../../figures/dextramer/paper/umap_avidity_pooled.png', bbox_inches='tight', dpi=dpi)
plt.show()

In [None]:
rcParams['figure.figsize'] = (5.5, 5)
cmap = 'RdYlBu'
size = 25

gene = 'log_score_IFNg_EC50'

adata_tmp = adata[adata.obs['reactivity'].notna()]

plot = sc.pl.umap(adata, show=False)
plot = sc.pl.umap(adata_tmp[adata_tmp.obs['donor']!='HIM'], color=gene, color_map=f'{cmap}', 
                  show=False, s=len(adata)/12000*size, ax=plot,
                  vmin=adata.obs[gene].min(), vmax=adata.obs[gene].max()
                 )
plot = sc.pl.umap(adata_tmp[(adata_tmp.obs['reactivity']=='unreactive')
                           & (adata_tmp.obs['donor']!='HIM')], color='reactivity', 
                  s=len(adata)/12000*size, palette=['silver'],
                  ax=plot, show=False)
plot.legend().remove()
plot.set_title('LTD-YLQ all times')

plt.tight_layout()
plt.savefig(f'../../figures/dextramer/paper/umap_avidity_pooled_woHIM.pdf', bbox_inches='tight', dpi=dpi)
plt.savefig(f'../../figures/dextramer/paper/umap_avidity_pooled_woHIM.png', bbox_inches='tight', dpi=dpi)
plt.show()

In [None]:
rcParams['figure.figsize'] = (5.5, 5)
cmap = 'RdYlBu'
size = 25

gene = 'log_score_IFNg_EC50'

adata_tmp = adata[adata.obs['reactivity'].notna() 
                 & (~adata.obs['clone_id'].isin(new_cts))]

plot = sc.pl.umap(adata, show=False)
plot = sc.pl.umap(adata_tmp, color=gene, color_map=f'{cmap}', 
                  show=False, s=len(adata)/12000*size, ax=plot,
                  vmin=adata.obs[gene].min(), vmax=adata.obs[gene].max()
                 )
plot = sc.pl.umap(adata_tmp[(adata_tmp.obs['reactivity']=='unreactive')], color='reactivity', 
                  s=len(adata)/12000*size, palette=['silver'],
                  ax=plot, show=False)
plot.legend().remove()
plot.set_title('LTD-YLQ all times - wo New clones')

plt.tight_layout()
plt.savefig(f'../../figures/dextramer/paper/umap_avidity_pooled_woNewClones.pdf', bbox_inches='tight', dpi=dpi)
plt.savefig(f'../../figures/dextramer/paper/umap_avidity_pooled_woNewClones.png', bbox_inches='tight', dpi=dpi)
plt.show()

In [None]:
rcParams['figure.figsize'] = (5.5, 5)
cmap = 'RdYlBu'
size = 25

gene = 'log_score_IFNg_EC50'

adata_tmp = adata[adata.obs['reactivity'].notna() 
                 & (~adata.obs['clone_id'].isin(new_cts))]

plot = sc.pl.umap(adata, show=False)
plot = sc.pl.umap(adata_tmp[adata_tmp.obs['donor']!='HIM'], color=gene, color_map=f'{cmap}', 
                  show=False, s=len(adata)/12000*size, ax=plot,
                  vmin=adata.obs[gene].min(), vmax=adata.obs[gene].max()
                 )
plot = sc.pl.umap(adata_tmp[(adata_tmp.obs['reactivity']=='unreactive')
                           &( adata_tmp.obs['donor']!='HIM')], color='reactivity', 
                  s=len(adata)/12000*size, palette=['silver'],
                  ax=plot, show=False)
plot.legend().remove()
plot.set_title('LTD-YLQ all times - wo New clones / HIM')

plt.tight_layout()
plt.savefig(f'../../figures/dextramer/paper/umap_avidity_pooled_woNewClones_woHIM.pdf', bbox_inches='tight', dpi=dpi)
plt.savefig(f'../../figures/dextramer/paper/umap_avidity_pooled_woNewClones_woHIM.png', bbox_inches='tight', dpi=dpi)
plt.show()

In [None]:
rcParams['figure.figsize'] = (5.5, 5)
cmap = 'RdYlBu'
size = 25

gene = 'log_score_IFNg_EC50'

adata_tmp = adata[adata.obs['reactivity'].notna() 
                 & (adata.obs['donor']=='HIM')]

plot = sc.pl.umap(adata, show=False)
plot = sc.pl.umap(adata_tmp, color=gene, color_map=f'{cmap}', 
                  show=False, s=len(adata)/12000*size, ax=plot,
                  vmin=adata.obs[gene].min(), vmax=adata.obs[gene].max()
                 )
plot = sc.pl.umap(adata_tmp[adata_tmp.obs['reactivity']=='unreactive'], color='reactivity', 
                  s=len(adata)/12000*size, palette=['silver'],
                  ax=plot, show=False)
plot.legend().remove()
plot.set_title('HIM')

plt.tight_layout()
plt.savefig(f'../../figures/dextramer/paper/umap_avidity_him.pdf', bbox_inches='tight', dpi=dpi)
plt.savefig(f'../../figures/dextramer/paper/umap_avidity_him.png', bbox_inches='tight', dpi=dpi)
plt.show()

In [None]:
rcParams['figure.figsize'] = (5.5, 5)
cmap = 'RdYlBu'
size = 25

gene = 'log_score_IFNg_EC50'

adata_tmp = adata[adata.obs['clone_id'].isin(new_cts)]

plot = sc.pl.umap(adata, show=False)
plot = sc.pl.umap(adata_tmp, color=gene, color_map=f'{cmap}', 
                  show=False, s=len(adata)/12000*size, ax=plot, #add_outline=True,
                  vmin=adata.obs[gene].min(), vmax=adata.obs[gene].max()
                 )
plot = sc.pl.umap(adata_tmp[adata_tmp.obs['reactivity']=='unreactive'], color='reactivity', 
                  s=len(adata)/12000*size, palette=['silver'],
                  ax=plot, show=False)
plot.legend().remove()
plot.set_title('New Clones')

plt.tight_layout()
plt.savefig(f'../../figures/dextramer/paper/umap_avidity_newCTs.pdf', bbox_inches='tight', dpi=dpi)
plt.savefig(f'../../figures/dextramer/paper/umap_avidity_newCTs.png', bbox_inches='tight', dpi=dpi)
plt.show()

In [None]:
rcParams['figure.figsize'] = (5.5, 5)
cmap = 'RdYlBu'
size = 25

gene = 'log_score_IFNg_EC50'

fig, axes = plt.subplots(1, len(time_order), figsize=(5*len(time_order), 5))

for i, t in enumerate(time_order):
    ax = axes[i]
    adata_tmp = adata[adata.obs['reactivity'].notna() 
                      & (~adata.obs['clone_id'].isin(new_cts))
                     & (adata.obs['time']==t)
                     & (adata.obs['binding_ct']=='LTDEMIAQY')]

    plot = sc.pl.umap(adata, show=False, ax=ax)
    plot = sc.pl.umap(adata_tmp, color=gene, color_map=f'{cmap}', 
                      show=False, s=len(adata)/12000*size, ax=plot,
                      vmin=adata[adata.obs['binding_ct']=='LTDEMIAQY'].obs[gene].min(), 
                      vmax=adata[adata.obs['binding_ct']=='LTDEMIAQY'].obs[gene].max()
                     )
    plot = sc.pl.umap(adata_tmp[adata_tmp.obs['reactivity']=='unreactive'], color='reactivity', 
                      s=len(adata)/12000*size, palette=['silver'],
                      ax=plot, show=False)
    plot.legend().remove()
    
    plot.set_title(t)
    plot.set_xlabel(None)
    plot.set_ylabel(None)
    
    if i != 7:
        ax.collections[1].colorbar.remove()

fig.suptitle('LTD longitudinal')

plt.tight_layout()
plt.savefig(f'../../figures/dextramer/paper/umap_avidity_LTD_longitudinal.pdf', bbox_inches='tight', dpi=dpi)
plt.savefig(f'../../figures/dextramer/paper/umap_avidity_LTD_longitudinal.png', bbox_inches='tight', dpi=dpi)
plt.show()

In [None]:
rcParams['figure.figsize'] = (5.5, 5)
cmap = 'RdYlBu'
size = 25

gene = 'log_score_IFNg_EC50'

fig, axes = plt.subplots(1, len(time_order), figsize=(5*len(time_order), 5))

for i, t in enumerate(time_order):
    ax = axes[i]
    adata_tmp = adata[adata.obs['reactivity'].notna() 
                      & (~adata.obs['clone_id'].isin(new_cts))
                     & (adata.obs['time']==t)
                     & (adata.obs['binding_ct']=='LTDEMIAQY')]

    plot = sc.pl.umap(adata, show=False, ax=ax)
    plot = sc.pl.umap(adata_tmp, color=gene, color_map=f'{cmap}', 
                      show=False, s=len(adata)/12000*size, ax=plot,
                      vmin=adata.obs[gene].min(), 
                      vmax=adata.obs[gene].max()
                     )
    plot = sc.pl.umap(adata_tmp[adata_tmp.obs['reactivity']=='unreactive'], color='reactivity', 
                      s=len(adata)/12000*size, palette=['silver'],
                      ax=plot, show=False)
    plot.legend().remove()
    
    plot.set_title(t)
    plot.set_xlabel(None)
    plot.set_ylabel(None)
    
    if i != 7:
        ax.collections[1].colorbar.remove()

fig.suptitle('LTD longitudinal')

plt.tight_layout()
plt.savefig(f'../../figures/dextramer/paper/umap_avidity_LTD_longitudinal_fullScale.pdf', bbox_inches='tight', dpi=dpi)
plt.savefig(f'../../figures/dextramer/paper/umap_avidity_LTD_longitudinal_fullScale.png', bbox_inches='tight', dpi=dpi)
plt.show()

In [None]:
rcParams['figure.figsize'] = (5.5, 5)
cmap = 'RdYlBu'
size = 25

gene = 'log_score_IFNg_EC50'


for i, t in enumerate(time_order):
    fig, ax = plt.subplots(1, 1, figsize=(5, 5) if i != 7 else (5.25, 5))
    adata_tmp = adata[adata.obs['reactivity'].notna() 
                      & (~adata.obs['clone_id'].isin(new_cts))
                     & (adata.obs['time']==t)
                     & (adata.obs['binding_ct']=='LTDEMIAQY')]

    plot = sc.pl.umap(adata, show=False, ax=ax)
    plot = sc.pl.umap(adata_tmp, color=gene, color_map=f'{cmap}', 
                      show=False, s=len(adata)/12000*size, ax=plot,
                      vmin=adata.obs[gene].min(), 
                      vmax=adata.obs[gene].max()
                     )
    plot = sc.pl.umap(adata_tmp[(adata_tmp.obs['reactivity']=='unreactive')
                               | (adata_tmp.obs['clone_id']=='1276.0')], color='reactivity', 
                      s=len(adata)/12000*size, palette=['silver'],
                      ax=plot, show=False)
    plot.legend().remove()
    
    plot.set_title(t)
    plot.set_xlabel(None)
    plot.set_ylabel(None)
    
    if i != 7:
        ax.collections[1].colorbar.remove()

    plt.tight_layout()
    plt.savefig(f'../../figures/dextramer/paper/umap_avidity_LTD_longitudinal_fullScale_{t}.pdf', bbox_inches='tight', dpi=dpi)
    plt.savefig(f'../../figures/dextramer/paper/umap_avidity_LTD_longitudinal_fullScale_{t}.png', bbox_inches='tight', dpi=dpi)
    plt.show()

In [None]:
rcParams['figure.figsize'] = (5.5, 5)
cmap = 'RdYlBu'
size = 25

gene = 'log_score_IFNg_EC50'

fig, axes = plt.subplots(1, len(time_order), figsize=(5*len(time_order), 5))

for i, t in enumerate(time_order):
    ax = axes[i]
    adata_tmp = adata[adata.obs['reactivity'].notna() 
                      & (~adata.obs['clone_id'].isin(new_cts))
                     & (adata.obs['time']==t)
                     & (adata.obs['binding_ct']=='YLQPRTFLL')]

    plot = sc.pl.umap(adata, show=False, ax=ax)
    plot = sc.pl.umap(adata_tmp, color=gene, color_map=f'{cmap}', 
                      show=False, s=len(adata)/12000*size, ax=plot,
                      vmin=adata.obs[gene].min(), 
                      vmax=adata.obs[gene].max()
                     )
    plot = sc.pl.umap(adata_tmp[adata_tmp.obs['reactivity']=='unreactive'], color='reactivity', 
                      s=len(adata)/12000*size, palette=['silver'],
                      ax=plot, show=False)
    plot.legend().remove()
    
    plot.set_title(t)
    plot.set_xlabel(None)
    plot.set_ylabel(None)
    
    if i != 7:
        ax.collections[1].colorbar.remove()

fig.suptitle('YLQ longitudinal')

plt.tight_layout()
plt.savefig(f'../../figures/dextramer/paper/umap_avidity_YLQ_longitudinal.pdf', bbox_inches='tight', dpi=dpi)
plt.savefig(f'../../figures/dextramer/paper/umap_avidity_YLQ_longitudinal.png', bbox_inches='tight', dpi=dpi)
plt.show()

In [None]:
rcParams['figure.figsize'] = (5.5, 5)
cmap = 'RdYlBu'
size = 25

gene = 'log_score_IFNg_EC50'

fig, axes = plt.subplots(1, len(time_order), figsize=(5*len(time_order), 5))

for i, t in enumerate(time_order):
    ax = axes[i]
    adata_tmp = adata[adata.obs['reactivity'].notna() 
                      & (~adata.obs['clone_id'].isin(new_cts))
                     & (adata.obs['time']==t)
                     & (adata.obs['binding_ct']=='YLQPRTFLL')]

    plot = sc.pl.umap(adata, show=False, ax=ax)
    plot = sc.pl.umap(adata_tmp, color=gene, color_map=f'{cmap}', 
                      show=False, s=len(adata)/12000*size, ax=plot,
                      vmin=adata[adata.obs['binding_ct']=='YLQPRTFLL'].obs[gene].min(), 
                      vmax=adata[adata.obs['binding_ct']=='YLQPRTFLL'].obs[gene].max()
                     )
    plot = sc.pl.umap(adata_tmp[adata_tmp.obs['reactivity']=='unreactive'], color='reactivity', 
                      s=len(adata)/12000*size, palette=['silver'],
                      ax=plot, show=False)
    plot.legend().remove()
    
    plot.set_title(t)
    plot.set_xlabel(None)
    plot.set_ylabel(None)
    
    if i != 7:
        ax.collections[1].colorbar.remove()

fig.suptitle('YLQ longitudinal')

plt.tight_layout()
plt.savefig(f'../../figures/dextramer/paper/umap_avidity_YLQ_longitudinal_fullScale.pdf', bbox_inches='tight', dpi=dpi)
plt.savefig(f'../../figures/dextramer/paper/umap_avidity_YLQ_longitudinal_fullScale.png', bbox_inches='tight', dpi=dpi)
plt.show()

In [None]:
rcParams['figure.figsize'] = (5.5, 5)
cmap = 'RdYlBu'
size = 25

gene = 'log_score_IFNg_EC50'


for i, t in enumerate(time_order):
    fig, ax = plt.subplots(1, 1, figsize=(5, 5) if i != 7 else (5.25, 5))
    adata_tmp = adata[adata.obs['reactivity'].notna() 
                      & (~adata.obs['clone_id'].isin(new_cts))
                     & (adata.obs['time']==t)
                     & (adata.obs['binding_ct']=='YLQPRTFLL')]

    plot = sc.pl.umap(adata, show=False, ax=ax)
    plot = sc.pl.umap(adata_tmp, color=gene, color_map=f'{cmap}', 
                      show=False, s=len(adata)/12000*size, ax=plot,
                      vmin=adata[adata.obs['binding_ct']=='YLQPRTFLL'].obs[gene].min(), 
                      vmax=adata[adata.obs['binding_ct']=='YLQPRTFLL'].obs[gene].max()
                     )
    plot = sc.pl.umap(adata_tmp[adata_tmp.obs['reactivity']=='unreactive'], color='reactivity', 
                      s=len(adata)/12000*size, palette=['silver'],
                      ax=plot, show=False)
    plot.legend().remove()
    
    plot.set_title(t)
    plot.set_xlabel(None)
    plot.set_ylabel(None)
    
    if i != 7:
        ax.collections[1].colorbar.remove()

    plt.tight_layout()
    plt.savefig(f'../../figures/dextramer/paper/umap_avidity_YLQ_longitudinal_fullScale_{t}.pdf', bbox_inches='tight', dpi=dpi)
    plt.savefig(f'../../figures/dextramer/paper/umap_avidity_YLQ_longitudinal_fullScale_{t}.png', bbox_inches='tight', dpi=dpi)
    plt.show()

## Genes

In [None]:
rcParams['figure.figsize'] = (6, 4)

for level in levels:
    for time in ['allTime'] + time_order:
        for donors in [['A04', 'A08', 'A15'], ['A29'], ['HIM']]:
            for gene in ['IFNG', 'MKI67', 
                         'IFN Response_score', 'Proliferation_score', 'CD8 Cytotoxic_score', 'CD8 Cytokine_score'
                        ] + kegg_scores:
                adata_tmp = adata[adata.obs[level].notna()
                                 & adata.obs['donor'].isin(donors)
                                 & (~adata.obs['clone_id'].isin(new_cts))]
                if time != 'allTime':
                    adata_tmp = adata_tmp[adata_tmp.obs['time']==time]
                if len(adata_tmp) == 0:
                    continue
                if 'score' in gene:
                    gene_vals = adata_tmp.obs[gene].values.reshape(-1)
                else: 
                    gene_vals = adata_tmp[:, gene].X.A.reshape(-1)
                df_tmp = pd.DataFrame({
                    level: adata_tmp.obs[level].tolist(),
                    gene: gene_vals})
                plot = sb.stripplot(data=df_tmp, y=gene, x=level, palette=palette_level, order=['low', 'mid', 'high'])
                plot.set_title(','.join(donors) + ' ' + time)
                plt.tight_layout()
                plt.savefig(f'{path_figs}/avidity/scores/{gene}_over_{level}_{",".join(donors)}_{time}.pdf', 
                            bbox_inches='tight', dpi=300,)
                plt.show()

## Expansion level

In [None]:
adata.obs['level_exp'] = np.nan
for donor in ['A04', 'A08', 'A15', 'A29', 'HIM']:
    cts_count = adata[
        adata.obs['binding_ct'].isin(['LTDEMIAQY', 'YLQPRTFLL'])
        & (adata.obs['donor']==donor)
    ].obs[['clone_id', 'clone_size_donor']].drop_duplicates().reset_index(drop=True)
    cts_count = cts_count[cts_count['clone_id']!='nan']
    cts_count = cts_count[~cts_count['clone_id'].isin(['11964.0', '8191.0'])]
    cts_high = cts_count[cts_count['clone_size_donor']>4]['clone_id'].values.tolist()
    cts_low = cts_count[cts_count['clone_size_donor']==1]['clone_id'].values.tolist()
    cts_mid = cts_count[(cts_count['clone_size_donor']>1) 
                        & (cts_count['clone_size_donor']<5)]['clone_id'].values.tolist()
    adata.obs.loc[(adata.obs['donor']==donor) & (adata.obs['clone_id'].isin(cts_high)), 'level_exp'] = 'high expanding'
    adata.obs.loc[(adata.obs['donor']==donor) & (adata.obs['clone_id'].isin(cts_mid)), 'level_exp'] = 'mid expanding'
    adata.obs.loc[(adata.obs['donor']==donor) & (adata.obs['clone_id'].isin(cts_low)), 'level_exp'] = 'low expanding'
adata.obs['level_exp'].value_counts()

In [None]:
adata.obs['level_expT1'] = np.nan
for donor in ['A04', 'A08', 'A15', 'A29', 'HIM']:
    cts_count = adata[
            adata.obs['binding_ct'].isin(['LTDEMIAQY', 'YLQPRTFLL'])
            & (adata.obs['donor']==donor)
            & (adata.obs['clone_id']!='nan')
        & (adata.obs['time']=='T1')
        ].obs['clone_id'].value_counts()
    cts_count = cts_count[~cts_count.index.isin(['11964.0', '8191.0'])]
    
    cts_high = cts_count[cts_count>4].index.tolist()
    cts_low = cts_count[cts_count==1].index.values.tolist()
    cts_mid = cts_count[(cts_count>1) 
                        & (cts_count<5)].index.values.tolist()
    
    adata.obs.loc[(adata.obs['donor']==donor) & adata.obs['binding_ct'].isin(['LTDEMIAQY', 'YLQPRTFLL'])
                  & (~adata.obs['clone_id'].isin(['11964.0', '8191.0'])),
                  'level_expT1'] = 'non expanding'
    adata.obs.loc[(adata.obs['donor']==donor) & (adata.obs['clone_id'].isin(cts_low)), 'level_expT1'] = 'low expanding'
    adata.obs.loc[(adata.obs['donor']==donor) & (adata.obs['clone_id'].isin(cts_high)), 'level_expT1'] = 'high expanding'
    adata.obs.loc[(adata.obs['donor']==donor) & (adata.obs['clone_id'].isin(cts_mid)), 'level_expT1'] = 'mid expanding'
    
adata.obs['level_expT1'].value_counts()

In [None]:
levels_exp = ['level_exp', 'level_expT1']

### DEG 

In [None]:
for level in levels_exp:
    sc.pp.highly_variable_genes(adata, n_top_genes=5000)
    
    for donors in [['A04', 'A08', 'A15'], ['HIM'], ['A29']]:
        if 'HIM' in donors and level == 'level_expT1':
            continue
        adata_tmp = adata[:, adata.var['highly_variable']]
        adata_tmp = adata_tmp[adata_tmp.obs['donor'].isin(donors)]
        adata_tmp = adata_tmp[adata_tmp.obs[level].notna()]
        sc.tl.rank_genes_groups(adata_tmp, groupby=level)

        dict_deg = {}
        for cluster in adata_tmp.obs[level].unique():
            names = adata_tmp.uns['rank_genes_groups']['names'][cluster].tolist()
            scores = adata_tmp.uns['rank_genes_groups']['scores'][cluster].tolist()
            pvals = adata_tmp.uns['rank_genes_groups']['pvals_adj'][cluster].tolist()
            dict_deg[f'{cluster}_gene'] = names
            dict_deg[f'{cluster}_scores'] = scores
            dict_deg[f'{cluster}_pValsAdj'] = pvals

        df_degs = pd.DataFrame(dict_deg)
        df_degs = df_degs[sorted(df_degs.columns.tolist())]
        df_degs.to_csv(f'../../results/dextramer/CD8/DEGs/deg_gex_{",".join(donors)}_{level}.csv')


        adata_cite = ann.AnnData(X=adata_tmp.obs[[f'clr_{el}' for el in cite_ids]
                                            ].values, obs=adata_tmp.obs[[level] + cite_ids.tolist()])
        adata_cite.var_names = cite_ids
        adata_cite = adata_cite[~adata_cite.obs[cite_ids[0]].isna()]

        try:
            sc.tl.rank_genes_groups(adata_cite, groupby=level)

            dict_deg = {}
            for cluster in adata_cite.obs[level].unique():
                names = adata_cite.uns['rank_genes_groups']['names'][cluster].tolist()
                scores = adata_cite.uns['rank_genes_groups']['scores'][cluster].tolist()
                pvals = adata_cite.uns['rank_genes_groups']['pvals_adj'][cluster].tolist()
                dict_deg[f'{cluster}_gene'] = names
                dict_deg[f'{cluster}_scores'] = scores
                dict_deg[f'{cluster}_pValsAdj'] = pvals

            df_degs = pd.DataFrame(dict_deg)
            df_degs = df_degs[sorted(df_degs.columns.tolist())]
            df_degs.to_csv(f'../../results/dextramer/CD8/DEGs/deg_cite_{",".join(donors)}_{level}.csv')
        except Exception as e:
            print(e)


### UMAP

In [None]:
palette_level_exp = sb.color_palette('muted')
palette_level_exp = {
    'high expanding': palette_level_exp[2],
    'mid expanding': palette_level_exp[0],
    'low expanding': palette_level_exp[3],
    'non expanding': palette_level_exp[8],
}

In [None]:
cats = [['A04'], ['A08'], ['A15'], ['A29'], ['HIM'], ['A04', 'A08', 'A15']] 

for level in levels_exp:
    fig, axes = plt.subplots(len(cats), len(time_order), figsize=(5*len(time_order), 5*len(cats)))

    for j, donors in enumerate(cats):
        for i, time in enumerate(time_order):
            ax = axes[j][i]
            sc.pl.umap(adata, ax=ax, show=False)

            adata_tmp = adata[(adata.obs['donor'].isin(donors))
                              & (adata.obs['time']==time) 
                              & (adata.obs[level].notna())]

            if len(adata_tmp) > 0:
                sc.pl.umap(adata_tmp, color=level, ax=ax, show=False, 
                           size=len(adata)/1200*5, palette=palette_level_exp,
                          )
                #if i != 4:
                    #ax.collections[1].colorbar.remove()
            ax.set_ylabel(None)
            ax.set_xlabel(None)
            ax.set_title(None)  
            if i != 7 and j != 4:
                ax.legend().remove()

    for i, time in enumerate(time_order):
        axes[0][i].set_title(time)

    for j, donors in enumerate(cats):
        axes[j][0].set_ylabel(','.join(donors))

    fig.suptitle(level)

    plt.tight_layout()
    plt.savefig(f'{path_figs}/avidity/umap_{level}_over_donorTime.pdf', bbox_inches='tight', dpi=300,)
    plt.savefig(f'{path_figs}/avidity/umap_{level}_over_donorTime.png', bbox_inches='tight', dpi=300,)
    plt.show()

### Scores

In [None]:
rcParams['figure.figsize'] = (8, 4)

for level in levels_exp:
    for time in ['allTime'] + time_order:
        for donors in [['A04', 'A08', 'A15'], ['A29'], ['HIM']]:
            if level == 'level_expT1' and 'HIM' in donors:
                continue
            for gene in ['IFNG', 'MKI67', 
                         'IFN Response_score', 'Proliferation_score', 'CD8 Cytotoxic_score', 'CD8 Cytokine_score'
                         ] + kegg_scores:
                adata_tmp = adata[adata.obs[level].notna()
                                 & adata.obs['donor'].isin(donors)]
                if time != 'allTime':
                    adata_tmp = adata_tmp[adata_tmp.obs['time']==time]
                if len(adata_tmp) == 0:
                    continue
                if 'score' in gene:
                    gene_vals = adata_tmp.obs[gene].values.reshape(-1)
                else: 
                    gene_vals = adata_tmp[:, gene].X.A.reshape(-1)
                df_tmp = pd.DataFrame({
                    level: adata_tmp.obs[level].tolist(),
                    gene: gene_vals})
                order = [el for el in ['high expanding', 'mid expanding', 'low expanding', 'non expanding'] 
                         if el in adata.obs[level].values]

                plot = sb.stripplot(data=df_tmp, y=gene, x=level, palette=palette_level_exp, 
                                    order=order)
                plot.set_title(','.join(donors) + ' ' + time)
                plt.tight_layout()
                plt.savefig(f'{path_figs}/avidity/scores_exp/{gene}_over_{level}_{",".join(donors)}_{time}.pdf', 
                            bbox_inches='tight', dpi=300,)
                plt.show()

## Avidity vs Cluster 3

In [None]:
df_tmp = adata[adata.obs['log_score_IFNg_EC50'].notna()
              & (adata.obs['donor']!='HIM')
              ].obs[['clone_id', 'log_score_IFNg_EC50']].drop_duplicates()
df_tmp = df_tmp.sort_values('log_score_IFNg_EC50')
df_tmp = df_tmp.set_index('clone_id')

fractions_top = []
fraction_ct = []
for i, ct in enumerate(df_tmp.index):
    cts = df_tmp.index[:i+1].tolist()    
    frac = adata[adata.obs['clone_id'].isin(cts)].obs['leiden_CD8']
    frac = np.sum(frac=='8') / len(frac)
    fractions_top.append(frac)
    frac = adata[adata.obs['clone_id']==ct].obs['leiden_CD8']
    frac = np.sum(frac=='8') / len(frac)
    fraction_ct.append(frac)

df_tmp['fraction_8'] = fractions_top
df_tmp['fraction_8_ct'] = fraction_ct
df_tmp['top_x'] = range(1, len(df_tmp)+1)

plot = sb.lineplot(data=df_tmp, y='fraction_8', x='top_x', label='Cluster 8', color='tab:blue')

df_tmp = adata[adata.obs['log_score_IFNg_EC50'].notna()
              & (adata.obs['donor']!='HIM')
              ].obs[['clone_id', 'log_score_IFNg_EC50']].drop_duplicates()
df_tmp = df_tmp.sort_values('log_score_IFNg_EC50')
df_tmp = df_tmp.set_index('clone_id')

fractions_top = []
fraction_ct = []
for i, ct in enumerate(df_tmp.index):
    cts = df_tmp.index[:i+1].tolist()    
    frac = adata[adata.obs['clone_id'].isin(cts)].obs['leiden_CD8']
    frac = np.sum(frac=='3') / len(frac)
    fractions_top.append(frac)
    frac = adata[adata.obs['clone_id']==ct].obs['leiden_CD8']
    frac = np.sum(frac=='3') / len(frac)
    fraction_ct.append(frac)

df_tmp['fraction_3'] = fractions_top
df_tmp['fraction_3_ct'] = fraction_ct
df_tmp['top_x'] = range(1, len(df_tmp)+1)

plot = sb.lineplot(data=df_tmp, y='fraction_3', x='top_x', label='Cluster 3', color='tab:orange')

sb.despine(ax=plot)
plot.grid(False)
plot.set_ylabel('Fraction per cluster')
plot.set_xlabel('in Top X Clonotypes')
plot.set_ylim((0, plot.get_ylim()[1]))

handles, labels = plot.get_legend_handles_labels()
order = [1, 0]
plot.legend([handles[idx] for idx in order], [labels[idx] for idx in order])

plt.tight_layout()
plt.savefig(f'{path_figs}/avidity/avidityRankedClones_fraction38.pdf', bbox_inches='tight', dpi=300,)
plt.savefig(f'{path_figs}/avidity/avidityRankedClones_fraction38.png', bbox_inches='tight', dpi=300,)
plt.show()

df_tmp['bin'] = ((df_tmp['top_x']-1) // 10 + 1)/10

plot = sb.stripplot(data=df_tmp, y='fraction_3_ct', x='bin', color='firebrick')
plot = sb.boxplot(data=df_tmp, y='fraction_3_ct', x='bin', color='white')
sb.despine(ax=plot)
plot.grid(False)
plot.set_ylabel('Fraction in Cluster 3')
plot.set_xlabel('Avidity bins')

plt.tight_layout()
plt.savefig(f'{path_figs}/avidity/avidityBinnedClones_fraction3_ct.pdf', bbox_inches='tight', dpi=300,)
plt.savefig(f'{path_figs}/avidity/avidityBinnedClones_fraction3_ct.png', bbox_inches='tight', dpi=300,)
plt.show()

In [None]:
rcParams['figure.figsize'] = (5, 5)
df_tmp = adata[adata.obs['log_score_IFNg_EC50'].notna()
              & (adata.obs['donor']!='HIM')
              ].obs[['clone_id', 'log_score_IFNg_EC50']].drop_duplicates()
df_tmp = df_tmp.sort_values('log_score_IFNg_EC50')
df_tmp = df_tmp.set_index('clone_id')

for cluster in adata.obs['leiden_CD8'].unique():
    if cluster in ['7', '9', '11', '12']:
        continue
    fractions_top = []
    for i, ct in enumerate(df_tmp.index):
        cts = df_tmp.index[:i+1].tolist()    
        frac = adata[adata.obs['clone_id'].isin(cts)].obs['leiden_CD8']
        frac = np.sum(frac==cluster) / len(frac)
        fractions_top.append(frac)
    df_tmp[f'fraction_{cluster}'] = fractions_top
df_tmp['top_x'] = range(1, len(df_tmp)+1)
df_tmp_cum = df_tmp[[f'fraction_{el}' for el in range(0, 13) if f'fraction_{el}' in df_tmp.columns] + ['top_x']].copy()

df_tmp_cum['-1'] = 0.0
df_tmp_cum = df_tmp_cum[['-1'] + df_tmp_cum.columns[:-1].tolist()]

for i, col in enumerate(df_tmp_cum.columns[1:-1]):
    df_tmp_cum[col] = df_tmp_cum[col] + df_tmp_cum[df_tmp_cum.columns[i]]
df_tmp_cum.columns = [el if not 'fraction' in el else el.split('_')[1] for el in df_tmp_cum.columns]


fig, plot = plt.subplots(1, 1, figsize=(5,5))
for i, col in enumerate(df_tmp_cum.columns[1:-1]):
    x = df_tmp_cum['top_x']
    y1 = df_tmp_cum[df_tmp_cum.columns[i]]
    y2 = df_tmp_cum[col]
    plot.fill_between(x, y1, y2, interpolate=True, color=colors_leiden[col], alpha=1, label=col)
sb.despine(ax=plot)
plot.grid(False)
plot.legend(bbox_to_anchor=(1, 0.5), loc='center left', borderaxespad=0)
plot.set_ylabel('Fraction per cluster')
plot.set_xlabel('in Top X Clonotypes')
plot.set_ylim((0, plot.get_ylim()[1]))
plot.set_xlim((1, plot.get_xlim()[1]))

plt.tight_layout()
plt.savefig(f'{path_figs}/avidity/avidityRankedClones_fractionArea.pdf', bbox_inches='tight', dpi=dpi,)
plt.savefig(f'{path_figs}/avidity/avidityRankedClones_fractionArea.png', bbox_inches='tight', dpi=dpi,)
plt.show()

## IFN_seumois

In [None]:
rcParams['figure.figsize'] = (8, 4)
sb.barplot(data=adata.obs, x='leiden_CD8', y='ifn_seumois', palette=adata.uns['leiden_CD8_colors'])
plt.show()

In [None]:
df_tmp = adata.obs.copy()
df_tmp['MKI67'] = adata[:, ['MKI67']].X.A.reshape(-1)
df_tmp = df_tmp[df_tmp['donor']!='HIM']
sb.barplot(data=df_tmp, y='MKI67', x='leiden_CD8', palette=adata.uns['leiden_CD8_colors'])
plt.show()

In [None]:
adata[adata.obs['clone_id']=='1690.0'].obs[['IR_VJ_1_junction_aa', 'IR_VJ_2_junction_aa',
                                           'IR_VDJ_1_junction_aa', 'IR_VDJ_2_junction_aa',]]

In [None]:
rcParams['figure.figsize'] = (5, 5)
sc.pl.umap(adata, color=['leiden_CD8', 'ifn_seumois'])

## Detection Pattern over time

In [None]:
from matplotlib.lines import Line2D

In [None]:
cats = [['A04', 'A08', 'A15'], ['A29']] 

palette_detect = sb.color_palette('muted')
palette_detect = {
    'persisting': palette_detect[2],
    'early': palette_detect[0],
    'late': palette_detect[3],
    'non-reactive': palette_detect[4],
    'not-expressed': palette_detect[5],
}

for j, donors in enumerate(cats):
    fig, axes = plt.subplots(1, len(time_order_wo_X), figsize=(5*len(time_order_wo_X), 5))
    for i, time in enumerate(time_order_wo_X):
        ax = axes[i]
        sc.pl.umap(adata, ax=ax, show=False)

        adata_tmp = adata[(adata.obs['donor'].isin(donors))
                          & (adata.obs['time']==time) 
                          & (adata.obs['detection_pattern'].notna())]

        if len(adata_tmp) > 0:
            sc.pl.umap(adata_tmp, color='detection_pattern', ax=ax, show=False, 
                       size=len(adata)/1200*3, palette=palette_detect,
                      )
        ax.set_ylabel(None)
        ax.set_xlabel(None)
        ax.set_title(None)  
        if i != 7 and j != 4:
            ax.legend().remove()

    for i, time in enumerate(time_order_wo_X):
        axes[i].set_title(time)
        
    handles = [Line2D([0], [0], marker='o', color='w', label=label, 
                  markersize=10, markerfacecolor=color) for label, color in palette_detect.items()]
    axes[i].legend(handles=handles)

    fig.suptitle(','.join(donors))

    plt.tight_layout()
    plt.savefig(f'{path_figs}/avidity/umap_detectionPattern_over_time_{"".join(donors)}.pdf', bbox_inches='tight', dpi=300,)
    plt.savefig(f'{path_figs}/avidity/umap_detectionPattern_over_time_{"".join(donors)}.png', bbox_inches='tight', dpi=300,)
    plt.show()

### Full detection pattern

In [None]:
cts_occurance = adata[adata.obs['clone_id']!='nan'].obs.groupby('clone_id')['time'].value_counts()
cts_occurance = (cts_occurance.unstack()>0)[time_order_wo_X]
cts_occurance['occurs_early'] = cts_occurance[['P1', 'S1', 'S2', 'S3']].sum(axis=1) > 0
cts_occurance['occurs_late'] = cts_occurance[['T1', 'T2', 'T3']].sum(axis=1) > 0

cts_occurance['early'] = (cts_occurance['occurs_early']) & (~cts_occurance['occurs_late'])
cts_occurance['late'] = (~cts_occurance['occurs_early']) & (cts_occurance['occurs_late'])
cts_occurance['persisting'] = (cts_occurance['occurs_early']) & (cts_occurance['occurs_late'])
cts_occurance

In [None]:
adata.obs['detection_pattern_full'] = np.nan
for p in ['early', 'late', 'persisting']:
    cts_pattern = cts_occurance[cts_occurance[p]].index
    adata.obs.loc[adata.obs['clone_id'].isin(cts_pattern), 'detection_pattern_full'] = p

adata.obs.loc[adata.obs['binding_ct']=='No binding', 'detection_pattern_full'] = np.nan
adata.obs['detection_pattern_full'].value_counts()

In [None]:
cats = [['A04', 'A08', 'A15'], ['A29']] 

palette_detect = sb.color_palette('muted')
palette_detect = {
    'persisting': palette_detect[2],
    'early': palette_detect[0],
    'late': palette_detect[3],
}

for j, donors in enumerate(cats):
    epitope = 'YLQPRTFLL' if 'A29' in donors else 'LTDEMIAQY' 
    fig, axes = plt.subplots(1, len(time_order_wo_X), figsize=(5*len(time_order_wo_X), 5))
    for i, time in enumerate(time_order_wo_X):
        ax = axes[i]
        sc.pl.umap(adata, ax=ax, show=False)

        adata_tmp = adata[(adata.obs['donor'].isin(donors))
                          & (adata.obs['time']==time) 
                          & (adata.obs['detection_pattern_full'].notna())
                         & (adata.obs['binding_ct']==epitope)]

        if len(adata_tmp) > 0:
            sc.pl.umap(adata_tmp, color='detection_pattern_full', ax=ax, show=False, 
                       size=len(adata)/1200*3, palette=palette_detect,
                      )
        ax.set_ylabel(None)
        ax.set_xlabel(None)
        ax.set_title(None)  
        if i != 7 and j != 4:
            ax.legend().remove()

    for i, time in enumerate(time_order_wo_X):
        axes[i].set_title(time)
        
    handles = [Line2D([0], [0], marker='o', color='w', label=label, 
                  markersize=10, markerfacecolor=color) for label, color in palette_detect.items()]
    axes[i].legend(handles=handles)

    fig.suptitle(','.join(donors))

    plt.tight_layout()
    plt.savefig(f'{path_figs}/avidity/umap_detectionPattern_full_over_time_{"".join(donors)}.pdf', bbox_inches='tight', dpi=300,)
    plt.savefig(f'{path_figs}/avidity/umap_detectionPattern_full_over_time_{"".join(donors)}.png', bbox_inches='tight', dpi=300,)
    plt.show()

## DEG

In [None]:
for level in levels:
    sc.pp.highly_variable_genes(adata, n_top_genes=5000)
    
    for woHIM in ['allDonors', 'woHIM']:
        adata_tmp = adata[:, adata.var['highly_variable']]
        if woHIM == 'woHIM':
            adata_tmp = adata_tmp[adata_tmp.obs['donor']!='HIM']
        adata_tmp = adata_tmp[adata_tmp.obs[level].notna()]
        sc.tl.rank_genes_groups(adata_tmp, groupby=level)

        dict_deg = {}
        for cluster in adata_tmp.obs[level].unique():
            names = adata_tmp.uns['rank_genes_groups']['names'][cluster].tolist()
            scores = adata_tmp.uns['rank_genes_groups']['scores'][cluster].tolist()
            pvals = adata_tmp.uns['rank_genes_groups']['pvals_adj'][cluster].tolist()
            dict_deg[f'{cluster}_gene'] = names
            dict_deg[f'{cluster}_scores'] = scores
            dict_deg[f'{cluster}_pValsAdj'] = pvals

        df_degs = pd.DataFrame(dict_deg)
        df_degs = df_degs[sorted(df_degs.columns.tolist())]
        df_degs.to_csv(f'../../results/dextramer/CD8/DEGs/deg_gex_{woHIM}_{level}.csv')


        adata_cite = ann.AnnData(X=adata_tmp.obs[[f'clr_{el}' for el in cite_ids]
                                            ].values, obs=adata_tmp.obs[[level] + cite_ids.tolist()])
        adata_cite.var_names = cite_ids
        adata_cite = adata_cite[~adata_cite.obs[cite_ids[0]].isna()]

        try:
            sc.tl.rank_genes_groups(adata_cite, groupby=level)

            dict_deg = {}
            for cluster in adata_cite.obs[level].unique():
                names = adata_cite.uns['rank_genes_groups']['names'][cluster].tolist()
                scores = adata_cite.uns['rank_genes_groups']['scores'][cluster].tolist()
                pvals = adata_cite.uns['rank_genes_groups']['pvals_adj'][cluster].tolist()
                dict_deg[f'{cluster}_gene'] = names
                dict_deg[f'{cluster}_scores'] = scores
                dict_deg[f'{cluster}_pValsAdj'] = pvals

            df_degs = pd.DataFrame(dict_deg)
            df_degs = df_degs[sorted(df_degs.columns.tolist())]
            df_degs.to_csv(f'../../results/dextramer/CD8/DEGs/deg_cite_{woHIM}_{level}.csv')
        except Exception as e:
            print(e)


## DEGs

In [None]:
def rank_genes_groups_2_csv(adata_deg, title):
    dict_deg = {}
    for cluster in adata_deg.obs[adata_deg.uns['rank_genes_groups']['params']['groupby']].unique():
        names = adata_deg.uns['rank_genes_groups']['names'][cluster].tolist()
        scores = adata_deg.uns['rank_genes_groups']['scores'][cluster].tolist()
        pvals = adata_deg.uns['rank_genes_groups']['pvals_adj'][cluster].tolist()
        lfc = adata_deg.uns['rank_genes_groups']['logfoldchanges'][cluster].tolist()
        dict_deg[f'{cluster}_gene'] = names
        dict_deg[f'{cluster}_scores'] = scores
        dict_deg[f'{cluster}_pValsAdj'] = pvals
        dict_deg[f'{cluster}_lfc'] = lfc

    df_degs = pd.DataFrame(dict_deg)
    df_degs = df_degs[sorted(df_degs.columns.tolist())]
    df_degs.to_csv(f'../../results/dextramer/CD8/DEGs/{title}.csv')

In [None]:
rcParams['figure.figsize'] = (5, 5)

In [None]:
adata_cite = ann.AnnData(X=adata.obs[[f'clr_{el}' for el in cite_ids]
                                            ].values, obs=adata.obs)
adata_cite.var_names = cite_ids
adata_cite = adata_cite[~adata_cite.obs[cite_ids[0]].isna()]
adata_cite.obs = adata_cite.obs[[el for el in adata.obs.columns if el not in cite_ids]]

### Cite top5 IFNG

In [None]:
palette_vulcano = {
    'not_sig': 'silver',
    'left': 'dodgerblue',
    'right': 'firebrick'
}

def plot_vulcano(df_degs, cat):
    group = f'{cat} vs Rest'
    df_tmp = pd.DataFrame(np.column_stack([df_degs['logfoldchanges'][cat], df_degs['pvals_adj'][cat]]),
                         columns=['logfoldchange', 'pvals_adj'], index=df_degs['names'][cat])
    df_tmp['pvals_adj'] = df_tmp['pvals_adj'].apply(lambda x: max(x, 1e-11))
    #df_tmp['logfoldchange'] = df_tmp['logfoldchange'].apply(lambda x: max(x, -5))
    #df_tmp['logfoldchange'] = df_tmp['logfoldchange'].apply(lambda x: min(x, 5))
    df_tmp['log_padj'] = - np.log10(df_tmp['pvals_adj'])
    
    df_tmp['color'] = df_tmp.apply(lambda x: 'not_sig' if x['pvals_adj']>0.05 or np.abs(x['logfoldchange'])<1.5
                                   else 'left' if x['logfoldchange']<0 else 'right',
                                  axis=1)
    
    plot = sb.scatterplot(data=df_tmp, x='logfoldchange', y='log_padj', hue='color', palette=palette_vulcano,
                          s=15, edgecolor='none')
    plot.legend().remove()
    sb.despine(ax=plot)
    plot.grid(False)
    plot.set_ylabel('-log$_{10}$(p-value$_{adj}$)')
    plot.set_xlabel('log$_{2}$(Fold-Change)')
    plot.set_title(group)
    return df_tmp

In [None]:
adata_cite_tmp = adata_cite[adata_cite.obs['level_IFNg_EC50_top5'].notna()]
sc.tl.rank_genes_groups(adata_cite_tmp, groupby='level_IFNg_EC50_top5')
rank_genes_groups_2_csv(adata_cite_tmp, 'deg_cite_level_IFNg_EC50_top5')

for group in ['high', 'low', 'mid']:
    plot_vulcano(adata_cite_tmp.uns['rank_genes_groups'], group)
    
    plt.tight_layout()
    plt.savefig(f'{path_figs}/avidity/deg_cite_ifng_top5_{group}.pdf', bbox_inches='tight', dpi=300,)
    plt.savefig(f'{path_figs}/avidity/deg_cite_ifng_top5_{group}.png', bbox_inches='tight', dpi=300,)
    plt.show()
adata_cite_tmp.obs['level_IFNg_EC50_top5'].value_counts()

### Cite IFNG PopTop5

In [None]:
adata_cite_tmp = adata_cite[adata_cite.obs['level_IFNg_EC50_population_top5'].notna()]
sc.tl.rank_genes_groups(adata_cite_tmp, groupby='level_IFNg_EC50_population_top5')
rank_genes_groups_2_csv(adata_cite_tmp, 'deg_cite_level_IFNg_EC50_popTop5')

for group in ['high', 'low', 'mid']:
    plot_vulcano(adata_cite_tmp.uns['rank_genes_groups'], group)
    
    plt.tight_layout()
    plt.savefig(f'{path_figs}/avidity/deg_cite_ifng_popTop5_{group}.pdf', bbox_inches='tight', dpi=300,)
    plt.savefig(f'{path_figs}/avidity/deg_cite_ifng_popTop5_{group}.png', bbox_inches='tight', dpi=300,)
    plt.show()
    
adata_cite_tmp.obs['level_IFNg_EC50_population_top5'].value_counts()

### DEG IFNG Top5

In [None]:
sc.pp.highly_variable_genes(adata, n_top_genes=5000)

In [None]:
adata_tmp = adata[adata.obs['level_IFNg_EC50_top5'].notna(), adata.var['highly_variable']]
sc.tl.rank_genes_groups(adata_tmp, groupby='level_IFNg_EC50_top5')
rank_genes_groups_2_csv(adata_tmp, 'deg_gex_level_IFNg_EC50_top5')

for group in ['high', 'low', 'mid']:
    plot_vulcano(adata_tmp.uns['rank_genes_groups'], group)
    
    plt.tight_layout()
    plt.savefig(f'{path_figs}/avidity/deg_gex_ifng_Top5_{group}.pdf', bbox_inches='tight', dpi=300,)
    plt.savefig(f'{path_figs}/avidity/deg_gex_ifng_Top5_{group}.png', bbox_inches='tight', dpi=300,)
    plt.show()
adata_tmp.obs['level_IFNg_EC50_top5'].value_counts()

### DEG IFNG Top5 - high vs low

In [None]:
adata_tmp = adata[adata.obs['level_IFNg_EC50_top5'].isin(['high', 'low']), adata.var['highly_variable']]
adata_tmp.obs['level_IFNg_EC50_top5'] = adata_tmp.obs['level_IFNg_EC50_top5'].astype(str)
sc.tl.rank_genes_groups(adata_tmp, groupby='level_IFNg_EC50_top5')
rank_genes_groups_2_csv(adata_tmp, 'deg_gex_level_IFNg_EC50_top5_highLow')

for group in ['high', 'low']:
    plot_vulcano(adata_tmp.uns['rank_genes_groups'], group)
    
    plt.tight_layout()
    plt.savefig(f'{path_figs}/avidity/deg_gex_ifng_Top5_{group}_highLow.pdf', bbox_inches='tight', dpi=300,)
    plt.savefig(f'{path_figs}/avidity/deg_gex_ifng_Top5_{group}_highLow.png', bbox_inches='tight', dpi=300,)
    plt.show()
adata_tmp.obs['level_IFNg_EC50_top5'].value_counts()

### DEG IFNG PopTop5

In [None]:
adata_tmp = adata[adata.obs['level_IFNg_EC50_population_top5'].notna(), adata.var['highly_variable']]
sc.tl.rank_genes_groups(adata_tmp, groupby='level_IFNg_EC50_population_top5')
rank_genes_groups_2_csv(adata_tmp, 'deg_gex_level_IFNg_EC50_popTop5')

for group in ['high', 'mid', 'low']:
    plot_vulcano(adata_tmp.uns['rank_genes_groups'], group)
    
    plt.tight_layout()
    plt.savefig(f'{path_figs}/avidity/deg_gex_ifng_popTop5_{group}.pdf', bbox_inches='tight', dpi=300,)
    plt.savefig(f'{path_figs}/avidity/deg_gex_ifng_popTop5_{group}.png', bbox_inches='tight', dpi=300,)
    plt.show()
adata_tmp.obs['level_IFNg_EC50_population_top5'].value_counts()

### DEG IFNG PopTop5 - high vs low

In [None]:
adata_tmp = adata[adata.obs['level_IFNg_EC50_population_top5'].isin(['high', 'low']), adata.var['highly_variable']]
sc.tl.rank_genes_groups(adata_tmp, groupby='level_IFNg_EC50_population_top5')
rank_genes_groups_2_csv(adata_tmp, 'deg_gex_level_IFNg_EC50_popTop5_highLow')

for group in ['high', 'low']:
    plot_vulcano(adata_tmp.uns['rank_genes_groups'], group)
    
    plt.tight_layout()
    plt.savefig(f'{path_figs}/avidity/deg_gex_ifng_popTop5_{group}_highLow.pdf', bbox_inches='tight', dpi=300,)
    plt.savefig(f'{path_figs}/avidity/deg_gex_ifng_popTop5_{group}_highLow.png', bbox_inches='tight', dpi=300,)
    plt.show()
adata_tmp.obs['level_IFNg_EC50_population_top5'].value_counts()

### DEG expanded in T1 

In [None]:
### DEG expanded in T1 - high vs low

adata_tmp = adata[adata.obs['level_expT1'].notna(), adata.var['highly_variable']]
sc.tl.rank_genes_groups(adata_tmp, groupby='level_expT1')
rank_genes_groups_2_csv(adata_tmp, 'deg_gex_t1_expansion')

for group in ['non expanding', 'high expanding', 'low expanding', 'mid expanding']:
    plot_vulcano(adata_tmp.uns['rank_genes_groups'], group)
    
    plt.tight_layout()
    plt.savefig(f'{path_figs}/avidity/deg_gex_t1_expansion_{group}.pdf', bbox_inches='tight', dpi=300,)
    plt.savefig(f'{path_figs}/avidity/deg_gex_t1_expansion_{group}.png', bbox_inches='tight', dpi=300,)
    plt.show()

adata_tmp.obs['level_expT1'].value_counts()

### DEG expanded in T1 - high vs low

In [None]:
adata_tmp = adata[adata.obs['level_expT1'].isin(['high expanding', 'low expanding']), adata.var['highly_variable']]
sc.tl.rank_genes_groups(adata_tmp, groupby='level_expT1')
rank_genes_groups_2_csv(adata_tmp, 'deg_gex_t1_expansion_highLow')

for group in ['high expanding', 'low expanding']:
    plot_vulcano(adata_tmp.uns['rank_genes_groups'], group)
    
    plt.tight_layout()
    plt.savefig(f'{path_figs}/avidity/deg_gex_t1_expansion_{group}_highLow.pdf', bbox_inches='tight', dpi=300,)
    plt.savefig(f'{path_figs}/avidity/deg_gex_t1_expansion_{group}_highLow.png', bbox_inches='tight', dpi=300,)
    plt.show()

adata_tmp.obs['level_expT1'].value_counts()

## TCRdist

In [None]:
import sys
sys.path.append('..')

import utils.tcrdist as tcrdist

In [None]:
df_all_cts = adata.obs.copy()
df_all_cts = df_all_cts[df_all_cts['clone_id']!='nan']
df_all_cts = df_all_cts[df_all_cts['binding_ct']!='No binding']
df_all_cts = df_all_cts[['clone_id', 'clonotype_sequence', 'v_genes', 'j_genes', 'binding_ct']]
df_all_cts = df_all_cts.drop_duplicates()
df_all_cts[['cdr3_a_aa', 'cdr3_b_aa']] = df_all_cts['clonotype_sequence'].str.split(' ', expand=True)
df_all_cts[['v_a_gene', 'v_b_gene']] = df_all_cts['v_genes'].str.split(' ', expand=True)
df_all_cts['count'] = df_all_cts['clone_id']

for col in ['v_a_gene', 'v_b_gene']:
    df_all_cts[col] = df_all_cts[col] + '*01'

In [None]:
anno = pd.DataFrame(index=df_all_cts['clone_id'])

anno = pd.concat([anno, # binding_ct
                adata[adata.obs['binding_ct']!='No binding'
                     ].obs[['clone_id', 'binding_ct']].drop_duplicates().set_index('clone_id'),
                 ], axis=1, join='outer')

anno = pd.concat([anno, # total number cells
                  pd.DataFrame(adata.obs.groupby('clone_id')['clone_id_size'].max()),
                 ], axis=1, join='outer')

df_t1 = pd.DataFrame(adata[adata.obs['time']=='T1'].obs['clone_id'].value_counts())
df_t1.columns = ['clone_size_T1']
anno = pd.concat([anno, # number of cells at T1
                  df_t1], axis=1, join='outer')

anno = pd.concat([anno, # detection pattern
                adata[adata.obs['detection_pattern_full'].notna()
                     ].obs[['clone_id', 'detection_pattern_full']].drop_duplicates().set_index('clone_id'),
                 ], axis=1, join='outer')

anno = pd.concat([anno, # reactivity
                adata[adata.obs['reactivity'].notna()
                     ].obs[['clone_id', 'reactivity']].drop_duplicates().set_index('clone_id'),
                 ], axis=1, join='outer')

df_avidity_full = pd.concat(donor_2_avidity.values())[['score_IFNg_EC50']].drop_duplicates()
df_avidity_full = df_avidity_full[~df_avidity_full.index.duplicated(keep='first')]

anno = pd.concat([anno, # avidity
                  df_avidity_full], axis=1, join='outer')

anno = pd.concat([anno, # pseudotime
                  pd.DataFrame(adata.obs.groupby('clone_id')['dpt_pseudotime'].max()),
                 ], axis=1, join='outer')

anno = anno[anno.index.isin(df_all_cts['clone_id'])]
anno['log10_clone_id_size'] = np.log10(anno['clone_id_size'])
anno['log10_clone_size_T1'] = np.log10(anno['clone_size_T1'])
anno['log10_score_IFNg_EC50'] = np.log10(anno['score_IFNg_EC50'])
anno.head()

### DEX+ cells

In [None]:
path_graph = '../../results/dextramer/CD8/graphs'
df_dists_both = tcrdist.get_tcr_distance(df_all_cts, 'both')
graph_dex = tcrdist.dists_to_graph(df_dists_both, 120, percentile=100, min_compenents=2, 
                                    annotation=anno, path_save=f'{path_graph}/paper_dex_positive.gexf')

In [None]:
cts_graph = [el for el in graph_dex.nodes]
anno_graph = anno[anno.index.isin(cts_graph)]
print('Column:\t\t\tMin\t-\tMax')
for col in ['log10_score_IFNg_EC50', 'dpt_pseudotime', 'log10_clone_id_size', 'log10_clone_size_T1',]:
    print(f'{col}:\t{anno_graph[col].min()}\t-\t{anno_graph[col].max()}')

### LTD cells

In [None]:
df_ltd = df_all_cts[df_all_cts['binding_ct']=='LTDEMIAQY'].copy()
anno_ltd = anno[anno.index.isin(df_ltd['clone_id'])].copy()

In [None]:
path_graph = '../../results/dextramer/CD8/graphs'
df_dists_both = tcrdist.get_tcr_distance(df_ltd, 'both')
graph_dex = tcrdist.dists_to_graph(df_dists_both, 120, percentile=100, min_compenents=1, 
                                    annotation=anno_ltd, path_save=f'{path_graph}/paper_ltd_positive.gexf')

### YLQ cells

In [None]:
df_ylq = df_all_cts[df_all_cts['binding_ct']=='YLQPRTFLL'].copy()
anno_ylq = anno[anno.index.isin(df_ylq['clone_id'])].copy()

In [None]:
path_graph = '../../results/dextramer/CD8/graphs'
df_dists_both = tcrdist.get_tcr_distance(df_ylq, 'both')
graph_dex = tcrdist.dists_to_graph(df_dists_both, 120, percentile=100, min_compenents=1, 
                                    annotation=anno_ylq, path_save=f'{path_graph}/paper_ylq_positive.gexf')

In [None]:
adata.obs['donor'].nunique()

## Tables Avidity Groups

In [None]:
initial_tested = {
    'high': ['646.0', '772.0', '99.0', '201.0', '718.0', '6012.0', '841.0', '10748.0', 
             '11195.0', '6319.0', '11155.0', '5918.0', '11949.0', '7100.0', '7808.0', 
             '832.0', '225.0', '11085.0', '297.0', '11751.0', '7287.0', '7734.0', '7156.0', 
             '11151.0', '8915.0', '11439.0', '6928.0', '6844.0', '10832.0', '6254.0', 
             '7094.0', '4160.0', '6808.0', '6265.0', '5469.0', '6302.0', '5251.0', '5458.0', 
             '5170.0', '8172.0', '8483.0', '3936.0', '11870.0', '8395.0', '6993.0', '7758.0',
             '7911.0', '386.0', '66.0', '772.0', '277.0', '11253.0', '509.0', '6743.0', '5245.0',
             '5576.0', '4545.0', '763.0', '684.0', '1735.0', '6755.0', '6548.0', '10813.0', 
             '545.0', '2722.0', '3155.0', '4386.0', '489.0', '317.0', '7894.0', '11964.0',
             '52.0', '5766.0', '691.0', '6467.0', '10749.0', '6688.0', '6579.0', '6140.0', 
             '3695.0', '9825.0', '323.0', '11445.0', '9961.0', '7411.0', '8191.0', '5226.0', 
             '2495.0', '272.0', '2058.0', '1650.0', '41.0', '219.0', '1361.0', '1701.0', '2997.0', 
             '475.0', '5546.0', '1276.0'],
    'low': ['11964.0', '8191.0'],
    'non-reactive': ['10623.0', '2123.0', '4350.0', '9973.0'],
}
new_tested = {
    'high': ['9130.0', '11251.0'],
    'non-reactive': ['15599.0', '18755.0', '19327.0', '20205.0'],
}

In [None]:
adata.obs['avidity_level_hand'] = np.nan
adata.obs.loc[adata.obs['score_IFNg_EC50'].notna(), 'avidity_level_hand'] = 'initial_high'
adata.obs.loc[adata.obs['clone_id'].isin(initial_tested['low']), 'avidity_level_hand'] = 'initial_low'
adata.obs.loc[adata.obs['clone_id'].isin(initial_tested['non-reactive']), 'avidity_level_hand'] = 'initial_non-reactive'
adata.obs.loc[adata.obs['clone_id'].isin(new_tested['high']), 'avidity_level_hand'] = 'new_high'
adata.obs.loc[adata.obs['clone_id'].isin(new_tested['non-reactive']), 'avidity_level_hand'] = 'new_non-reactive'
                        
adata.obs['avidity_level_hand'].value_counts()

In [None]:
df_tmp = adata.obs.groupby('leiden_CD8')['avidity_level_hand'].value_counts().unstack().fillna(0.0)
df_tmp.to_csv('../../results/dextramer/CD8/avidity_level_hand_by_leiden.csv')
df_tmp

In [None]:
df_tmp = adata[adata.obs['donor']!='HIM'].obs.groupby('leiden_CD8')['avidity_level_hand'].value_counts().unstack().fillna(0.0)
df_tmp.to_csv('../../results/dextramer/CD8/avidity_level_hand_by_leiden_woHIM.csv')
df_tmp

### Tables Dex+

In [None]:
df_tmp = pd.DataFrame(adata.obs['binding_ct'].value_counts())
df_tmp.to_csv('../../results/dextramer/CD8/cellnumbers_binding.csv')
df_tmp

In [None]:
pd.DataFrame(adata.obs.groupby('binding_ct')['clone_id'].nunique()).sort_values('clone_id', ascending=False)

In [None]:
pd.DataFrame(adata.obs['binding_ct_unmatched'].value_counts())

In [None]:
pd.DataFrame(adata.obs.groupby('binding_ct_unmatched')['clone_id'].nunique()).sort_values('clone_id', ascending=False)

## Statistical Testing

In [None]:
import scipy.stats as stats

In [None]:
df_tmp = adata[adata.obs['score_IFNg_EC50'].notna()].obs[['score_IFNg_EC50', 'leiden_CD8', 'clone_id']].drop_duplicates()
values_cluster5 = df_tmp[df_tmp['leiden_CD8']=='5']['score_IFNg_EC50'].values
values_rest = df_tmp[df_tmp['leiden_CD8']!='5']['score_IFNg_EC50'].values
stats.mannwhitneyu(values_cluster5, values_rest)

In [None]:
values_cluster5.shape

In [None]:
values_rest.shape

In [None]:
df_tmp = adata[adata.obs['score_IFNg_EC50'].notna()].obs[['score_IFNg_EC50', 'leiden_CD8', 'clone_id']].drop_duplicates()
values_cluster3 = df_tmp[df_tmp['leiden_CD8']=='3']['score_IFNg_EC50'].values
values_rest = df_tmp[df_tmp['leiden_CD8']!='3']['score_IFNg_EC50'].values
stats.mannwhitneyu(values_cluster3, values_rest)

In [None]:
print(df_tmp['score_IFNg_EC50'].mean())
df_tmp.groupby('leiden_CD8')['score_IFNg_EC50'].mean()

In [None]:
df_tmp

In [None]:
import statsmodels.stats.multitest as mt

In [None]:
df_tmp = adata[adata.obs['log_score_IFNg_EC50'].notna()].obs[['log_score_IFNg_EC50', 'leiden_CD8', 'clone_id']].drop_duplicates()

results = {
    'leiden_CD8': [],
    'shapiro-wikl-normality_group': [],
    'shapiro-wikl-normality_background': [],
    'levene_homogeneity': [],
}
for c in df_tmp['leiden_CD8'].unique():
    values_cluster = df_tmp[df_tmp['leiden_CD8']==c]['log_score_IFNg_EC50'].values
    values_rest = df_tmp[df_tmp['leiden_CD8']!=c]['log_score_IFNg_EC50'].values
    
    results['leiden_CD8'].append(c)    
    results['shapiro-wikl-normality_group'].append(stats.shapiro(values_cluster)[1] if len(values_cluster)> 2 else np.nan)
    results['shapiro-wikl-normality_background'].append(stats.shapiro(values_rest)[1])
    results['levene_homogeneity'].append(stats.levene(values_cluster, values_rest)[1])
    
results = pd.DataFrame(results)
results

In [None]:
sb.kdeplot(df_tmp['log_score_IFNg_EC50'])

Assumption of normality of the background data was violated for log transformed scores (un-transformed as well, not shown). Therefore, we conduct Mann-Witney-U-test.

In [None]:
df_tmp = adata[adata.obs['score_IFNg_EC50'].notna()].obs[['score_IFNg_EC50', 'leiden_CD8', 'clone_id']].drop_duplicates()

results = {
    'leiden_CD8': [],
    'u-stats': [],
    'p-values': [],
}
for c in ['3', '5']: #df_tmp['leiden_CD8'].unique():
    values_cluster = df_tmp[df_tmp['leiden_CD8']==c]['score_IFNg_EC50'].values
    values_rest = df_tmp[df_tmp['leiden_CD8']!=c]['score_IFNg_EC50'].values
    t_stat, pval = stats.mannwhitneyu (values_cluster, values_rest)
    
    results['leiden_CD8'].append(c)
    results['u-stats'].append(t_stat)
    results['p-values'].append(pval)

    
results = pd.DataFrame(results)
results = results[results['p-values'].notna()]
results['p-values_corr'] = mt.multipletests(results['p-values'].values, alpha=0.05, method='bonferroni')[1]
results

In [None]:
df_tmp['leiden_CD8'].value_counts()