In [None]:
import scanpy as sc
from glob import glob
import pandas as pd
import numpy as np
import seaborn as sns
import anndata
import scipy
import re
import os
import matplotlib
from vquest import *
from Bio import SeqIO
from matplotlib import pyplot as plt
from matplotlib import rcParams
from matplotlib.legend import Legend
%matplotlib inline
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 200)
pd.options.display.max_seq_items = 2000

sc.set_figure_params(scanpy=True, dpi=300, dpi_save=300, frameon=True, vector_friendly=True, fontsize=12, 
                         color_map='Dark2', format='pdf', transparent=True, ipython_format='png2x')

rcParams.update({'font.size': 8})
rcParams.update({'font.family': 'Helvetica'})
rcParams['pdf.fonttype'] = 42
rcParams['ps.fonttype'] = 42

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from IPython.core.display import display, HTML
display(HTML("""
<style>
#notebook-container {
    width: 100%
}
 
.code_cell {
   flex-direction: row !important;
}
 
.code_cell .input {
    width: 50%
}
 
.code_cell .output_wrapper {
    width: 50%
}
</style>
"""))

In [None]:
##generate initial TCR tables 

## list of samples that did not have TCRs
no_tcr_pt = ['h39C']

## filter cellranger output and combine into one final table
short_pt_nums = ['h1A','h1C','h2A','h2B','h2C','h3A1','h3A2','h3B1','h3B2','h3C1','h3C2',
                 'h4A','h4B','h4C','h5A','h5B','h5C','h6A','h6B','h6C','h7A','h7B','h7C',
                 'h9A','h9B','h9C']

test = ['cdr3','cdr3_nt','v_gene','d_gene','j_gene','c_gene','length','raw_clonotype_id','umis']
test2 = ['TRA_','TRB_']
cols = [x + y for x in test2 for y in test]

files = os.listdir('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/TCR/')
files = [x for x in files if 'PembroRT' in x and 'TCR' in x]

for i,f in enumerate(files):
    df = pd.read_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/TCR/'+f+'/outs/all_contig_annotations.csv',header=0,index_col=0)
    clonotypes = pd.read_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/TCR/'+f+'/outs/clonotypes.csv',header=0,index_col=0)
    
    pt = f.split('_')[1]
    
    if pt in no_tcr_pt:
        continue
    
    check = df.chain.unique().tolist()
    if (('TRA' not in check)&('TRB' not in check)):
        print(pt+'_error with chain')
    
    df = df[(df.productive==True)&(df.chain!='Multi')]
    
    BC = [x.split('-1')[0] for x in df.index]
    if pt in short_pt_nums:
        BC = ['h0'+pt.split('h')[1]+'_P_'+x for x in BC]
    else:
        BC = [pt+'_P_'+x for x in BC]
    df.index = BC
    
    TRA = df[(df.chain=='TRA')]
    BC = TRA.index.unique().tolist()
    TRAfiltered = pd.DataFrame(columns=TRA.columns)
    for b in BC:
        tmp = TRA[TRA.index==b]
        if (len(tmp)==1):
            TRAfiltered = TRAfiltered.append(tmp)
        else:
            TRAfiltered = TRAfiltered.append(tmp.sort_values(by='umis',ascending=False).iloc[0,:])

    TRB = df[(df.chain=='TRB')]
    BC = TRB.index.unique().tolist()
    TRBfiltered = pd.DataFrame(columns=TRB.columns)
    for b in BC:
        tmp = TRB[TRB.index==b]
        if (len(tmp)==1):
            TRBfiltered = TRBfiltered.append(tmp)
        else:
            TRBfiltered = TRBfiltered.append(tmp.sort_values(by='umis',ascending=False).iloc[0,:])

    TRAfiltered.columns = ['TRA_'+x for x in TRAfiltered.columns]
    TRBfiltered.columns = ['TRB_'+x for x in TRBfiltered.columns]
    finaltable = TRAfiltered.merge(TRBfiltered,how='inner',left_index=True,right_index=True)
 
    finaltable = finaltable[cols]
    len(finaltable)
    
    tmp = clonotypes[(clonotypes.inkt_evidence=='TRA:gene+junction;TRB:gene')]
    if (len(tmp)>0):
        print('iNKT found')
        
    finaltable['mait_evidence'] = 'NaN'
    tmp = clonotypes[(clonotypes.mait_evidence=='TRA:gene+junction;TRB:gene')]
    if (len(tmp)>0):
        print('mait found')
        for j in tmp.index:
            finaltable.loc[((finaltable.TRA_raw_clonotype_id==j)&(finaltable.TRB_raw_clonotype_id==j)),'mait_evidence'] = 'TRA:gene+junction;TRB:gene'
        
    finaltable.to_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/TCR/'+pt+'_filtered_tcr_table.csv')
    
    if (i==0):
        combinedtable = finaltable
    else:
        combinedtable = combinedtable.append(finaltable)
    
    print(f)
    
combinedtable.to_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/TCR/combined_filtered_tcr_table.csv')



## add celltype annotations, and filter to only T-cells
celltype = 'tcell_filtered'
adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/h5adfiles/PembroRT_immune_R100.h5ad')
BC = np.load('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/scvi_outputs/'+celltype+'/barcodes.npy',allow_pickle=True)
adata = adata[list(BC)].copy()
adata.obs.drop(columns=['combined_tcr'],inplace=True)

TCR = pd.read_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/TCR/combined_filtered_tcr_table.csv',header=0,index_col=0)
TCR['combined_tcr'] = [x+'_'+y for x,y in zip(TCR['TRA_cdr3'],TCR['TRB_cdr3'])]
adata.obs = adata.obs.merge(TCR,right_index=True,left_index=True,how='left')
tmp = adata[(~pd.isnull(adata.obs.combined_tcr))].obs
tmp.to_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/TCR/TCRtable_annotation.csv')


In [None]:
## calculate TCR expansion - all Tcells together
df = pd.read_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/TCR/TCRtable_annotation.csv',header=0,index_col=0)

pt = df.cohort.unique().tolist()
tx = ['Base','PD1','RTPD1']

for i,p in enumerate(pt):
    for j,t in enumerate(tx):
        tmp = df[(df.cohort==p)&(df.treatment==t)]
        if (len(tmp)>0):
            counts = tmp.combined_tcr.value_counts()
            counts.name = 'tcr_counts'
            tmp = tmp.merge(counts,left_on='combined_tcr',right_index=True,how='left')
            thresh = 1.5*np.median(tmp.tcr_counts)
            print(str(thresh))
            tmp['medianExpansion'] = 'not_expanded'
            tmp.loc[(tmp.tcr_counts>thresh),'medianExpansion'] = 'expanded'

            if ((i==0)&(j==0)):
                finaldf = tmp
            else:
                finaldf = finaldf.append(tmp)

len(df)
len(finaldf)

finaldf.to_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/TCR/TCRtable_annotation_expansion.csv')


In [None]:
## checking CD4/CD8 distribution within clonotypes

df = pd.read_csv('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/TCR/TCRtable_annotation_expansion.csv',header=0,index_col=0)

## load celltype data

celltype = 'tcell_filtered'

adata = sc.read_h5ad('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/h5adfiles/PembroRT_immune_R100.h5ad')

BC = np.load('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/scvi_outputs/'+celltype+'/barcodes.npy',allow_pickle=True)
adata = adata[list(BC)].copy()
adata

embFile = 'umap_n-0055_md-0.80_s-2.28.npy'
emb = np.load('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/scvi_outputs/'+celltype+'/'+embFile)
clustFile = 'scvi_cugraph_leiden_nbr100_res1.2.npy'
clust = np.load('/Users/gouink/Documents/RTPD1Manuscript/Human/manuscript_review_analysis/scvi_outputs/'+celltype+'/'+clustFile)

adata.obsm['X_umap'] = emb
adata.obs['leiden'] = [str(x) for x in clust]
adata.obs.leiden = adata.obs.leiden.astype('category')

adata.raw = adata
sc.pp.normalize_per_cell(adata, counts_per_cell_after=10000)
sc.pp.log1p(adata)

cols = [x for x in df.columns if x not in adata.obs.columns]
df = df[cols]

adata.obs = adata.obs.merge(df,how='left',left_index=True,right_index=True)

dotsize = (120000/len(adata))*2
sc.pl.umap(adata,color=['leiden'],legend_loc='on data',size=dotsize,show=False)
sc.pl.umap(adata,color=['medianExpansion'],groups=['expanded','not_expanded'],palette=['tab:blue','tab:gray','tab:orange'],size=dotsize,show=False)