In [None]:
#import the library
import scanpy as sc
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib import colors
import seaborn as sb
from gprofiler import GProfiler
import seaborn as sns
import rpy2.rinterface_lib.callbacks
import logging
import scirpy
import anndata
from rpy2.robjects import pandas2ri
import anndata2ri

import importlib
import warnings
warnings.filterwarnings("ignore")

import pickle as pkl
from matplotlib.colors import LinearSegmentedColormap
from sccd45ra import cd45ra_infer

In [None]:
import h5py
from scipy.sparse import csr_matrix

# Open the HDF5 file
file_path = 'Synapse/NonEpithelial_Count_matrix.h5'
with h5py.File(file_path, 'r') as f:
    # Load the necessary datasets
    data = f['matrix/data'][()]
    indices = f['matrix/indices'][()]
    indptr = f['matrix/indptr'][()]
    shape = f['matrix/shape'][()]

In [None]:
with h5py.File(file_path, 'r') as f:
    # Load the necessary datasets
    genes = f['matrix/features/name'][()]
    gene_ID = [str(i).split("'")[1] for i in genes]

In [None]:
from matplotlib.colors import LinearSegmentedColormap
values = [0,1]
colors = [(227, 227, 227), (255, 42, 18)]
norm = plt.Normalize(min(values), max(values))
my_cmap = LinearSegmentedColormap.from_list(
    '', [(norm(value), tuple(np.array(color) / 255)) for value, color in zip(values, colors)])

In [None]:
def print_full(x):
    pd.set_option('display.max_rows', len(x))
    print(x)
    pd.reset_option('display.max_rows')

In [None]:

rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)

pandas2ri.activate()
anndata2ri.activate()
%load_ext rpy2.ipython

In [None]:
plt.rcParams['figure.figsize']=(8,8)
sc.settings.verbosity = 3

sc.logging.print_versions()

In [None]:
%%R

.libPaths(.libPaths('R\\win-library\\4.3'))


library(scran)
library(Seurat)

In [None]:
my_palette = ['#0351A8','#8CB0E0','#D56D11','#FFBB78','#234E08','#53CB8B','#D30083','#CB788D','#4E195A','#C58CCF','#AA290F','#B03FD1','#E8BCCF','#64605F','#B2AD9A','#D2D30B','#D1BD4F','#06DCF2','#9EDAE5','#517219','#5B43CF','#D92F24','#FFD900','#002F33','#B8A3A3']

In [None]:
matrix = csr_matrix((data, indices, indptr), shape=[shape[1],shape[0]])

In [None]:
meta_data = pd.read_csv('Synapse/NonEpithelial_metadata.csv',index_col = 0)

In [None]:
import anndata
adata =anndata.AnnData(X = matrix)
adata.var_names = gene_ID
adata.obs_names = meta_data.index

In [None]:
adata.obs = meta_data

In [None]:
adata.write('Synapse/synapse.h5ad')

In [None]:
adata = sc.read_h5ad('Synapse/synapse.h5ad')

In [None]:
meta = pd.read_excel('Synapse/41588_2022_1100_MOESM3_ESM.xlsx',sheet_name='Supplementary Table 13')

In [None]:
adata.obs['patient.ID'] = adata.obs['patient.ID'].replace('-JSC','')

In [None]:
for col_name in meta.columns[4:-3]:
    dictionary = pd.Series(meta[col_name].astype(str).values, index=meta['patient.ID']).to_dict()
    adata.obs[col_name] = adata.obs['patient.ID'].replace(dictionary)

In [None]:
adata.obs['patient.ID'] = [i.replace('-JSC','') for i in adata.obs['patient.ID']]

In [None]:
for col_name in meta.columns[2:-3]:
    dictionary = pd.Series(meta[col_name].values, index=meta['patient.ID']).to_dict()
    adata.obs[col_name] = adata.obs['patient.ID'].replace(dictionary)

In [None]:
for i in adata.obs.columns:
    adata.obs[i] = adata.obs[i].astype(str)

In [None]:
adata.obs[['sample.ID', 'patient.ID', 'sample.origin','dataset','Gender', 'Age at recruitment', 'Group Stage', 'Stage TNM','Site']]

In [None]:
adata.obs['patient'] = adata.obs['patient.ID']
adata.obs['tissue'] = adata.obs['sample.origin']
adata.obs['gender'] = adata.obs['Gender'] 
adata.obs['age'] = adata.obs['Age at recruitment'] 
adata.obs['site'] = adata.obs['Site']
adata.obs['stage'] = adata.obs['Group Stage']

In [None]:
adata.obs['TNM_T'] = [i.split('T')[1].split('N')[0] for i in adata.obs['Stage TNM']]
adata.obs['TNM_N'] = [i.split('N')[1].split('M')[0] for i in adata.obs['Stage TNM']]
adata.obs['TNM_M'] = 'x'
for i,stage in enumerate(adata.obs['Stage TNM']):
    if len(stage.split('M')) > 1:
        adata.obs['TNM_M'][i] = stage.split('M')[1]

In [None]:
adata.obs['gender'] = adata.obs['gender'].replace({'F':'Female','M':'Male'})

In [None]:
sc.pl.umap(adata,color = ['patient','tissue','gender','age','site','TNM_T','TNM_N','TNM_M','stage'])

In [None]:
sc.pp.normalize_total(adata, key_added = 'normalization_factors')
sc.pp.log1p(adata)

In [None]:
sc.pp.highly_variable_genes(adata, flavor='seurat', n_top_genes=3000)
sc.pp.pca(adata, n_comps=50, use_highly_variable=True, svd_solver='arpack')

In [None]:
sc.pp.neighbors(adata, n_neighbors = 15, n_pcs = 50)
sc.tl.leiden(adata, resolution = 0.8, key_added= 'leiden')                                                 

In [None]:
sc.tl.umap(adata)

In [None]:
%matplotlib inline
plt.rcParams['axes.linewidth'] = 2
fig = sc.pl.umap(adata, color =  ['leiden','TRGV4','CD3D','CD3E','CD3G'],size = 40, legend_loc = 'on data', palette  = my_palette, ncols = 1, cmap = my_cmap, return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
#plt.savefig('overall_map_withunmatched.png')

In [None]:
sc.pl.umap(adata,color = ['leiden','RORC','IL17A','IL17F'],cmap = my_cmap, vmax = 3, size = 40, legend_loc = 'on data')

In [None]:
potential_17 = adata[adata.obs['leiden'].isin(['1']),:]

In [None]:
del potential_17.uns

In [None]:
sc.pp.highly_variable_genes(potential_17, flavor='seurat', n_top_genes=3000)
sc.pp.pca(potential_17, n_comps=50, use_highly_variable=True, svd_solver='arpack')

In [None]:
sc.pp.neighbors(potential_17, n_neighbors = 15, n_pcs = 50)
sc.tl.leiden(potential_17, resolution = 1, key_added= 'leiden')                                                 

In [None]:
sc.tl.tsne(potential_17)

In [None]:
%matplotlib inline
sc.pl.tsne(potential_17, color=['leiden','RORC','IL17A','IL17F'],size = 40,cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_17, restrict_to = ('leiden', ['5']), resolution = 0.4, key_added= 'leiden1')
sc.pl.tsne(potential_17, color=['leiden1','RORC','IL17A','IL17F'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_17, restrict_to = ('leiden1', ['7']), resolution = 0.4, key_added= 'leiden2')
sc.pl.tsne(potential_17, color=['leiden2','RORC','IL17A','IL17F'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_17, restrict_to = ('leiden2', ['2']), resolution = 0.5, key_added= 'leiden3')
sc.pl.tsne(potential_17, color=['leiden3','RORC','IL17A','IL17F'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
IL17_genesmask = [gene.startswith("IL17") and gene.startswith("IL17R")==0 for gene in potential_17.var_names]
IL17_genes = potential_17.var_names[IL17_genesmask]
IL17_exp_set ={
    'cytokine': IL17_genes, 'TF': 'RORC'
}
sc.pl.dotplot(potential_17,IL17_exp_set,groupby = 'leiden3' , vmax = 1, swap_axes = False, dot_min =0.1, dot_max =1,standard_scale = 'var')

In [None]:
T17_1 = potential_17[potential_17.obs['leiden3'].isin(['2,0','2,1','2,3','2,4','5,2','7,1','7,2','7,3','12']),:]

In [None]:
potential_17 = adata[adata.obs['leiden'].isin(['9']),:]

In [None]:
del potential_17.uns

In [None]:
sc.pp.highly_variable_genes(potential_17, flavor='seurat', n_top_genes=3000)
sc.pp.pca(potential_17, n_comps=50, use_highly_variable=True, svd_solver='arpack')

In [None]:
sc.pp.neighbors(potential_17, n_neighbors = 15, n_pcs = 50)
sc.tl.leiden(potential_17, resolution = 1, key_added= 'leiden')                                                 

In [None]:
sc.tl.tsne(potential_17)

In [None]:
%matplotlib inline
sc.pl.tsne(potential_17, color=['leiden','RORC','IL17A','IL17F'],size = 40,cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_17, restrict_to = ('leiden', ['2']), resolution = 0.9, key_added= 'leiden1')
sc.pl.tsne(potential_17, color=['leiden1','RORC','IL17A','IL17F'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_17, restrict_to = ('leiden1', ['8']), resolution = 0.4, key_added= 'leiden2')
sc.pl.tsne(potential_17, color=['leiden2','RORC','IL17A','IL17F'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_17, restrict_to = ('leiden2', ['2']), resolution = 0.5, key_added= 'leiden3')
sc.pl.tsne(potential_17, color=['leiden3','RORC','IL17A','IL17F'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
IL17_genesmask = [gene.startswith("IL17") and gene.startswith("IL17R")==0 for gene in potential_17.var_names]
IL17_genes = potential_17.var_names[IL17_genesmask]
IL17_exp_set ={
    'cytokine': IL17_genes, 'TF': 'RORC'
}
sc.pl.dotplot(potential_17,IL17_exp_set,groupby = 'leiden2' , vmax = 1, swap_axes = False, dot_min =0.1, dot_max =1,standard_scale = 'var')

In [None]:
T17_2 = potential_17[potential_17.obs['leiden2'].isin(['2,1','16']),:]

In [None]:
potential_17 = adata[adata.obs['leiden'].isin(['8']),:]

In [None]:
del potential_17.uns

In [None]:
sc.pp.highly_variable_genes(potential_17, flavor='seurat', n_top_genes=3000)
sc.pp.pca(potential_17, n_comps=50, use_highly_variable=True, svd_solver='arpack')

In [None]:
sc.pp.neighbors(potential_17, n_neighbors = 15, n_pcs = 50)
sc.tl.leiden(potential_17, resolution = 1, key_added= 'leiden')                                                 

In [None]:
sc.tl.tsne(potential_17)

In [None]:
%matplotlib inline
sc.pl.tsne(potential_17, color=['leiden','RORC','IL17A','IL17F'],size = 40,cmap = my_cmap)

In [None]:
IL17_genesmask = [gene.startswith("IL17") and gene.startswith("IL17R")==0 for gene in potential_17.var_names]
IL17_genes = potential_17.var_names[IL17_genesmask]
IL17_exp_set ={
    'cytokine': IL17_genes, 'TF': 'RORC'
}
sc.pl.dotplot(potential_17,IL17_exp_set,groupby = 'leiden' , vmax = 1, swap_axes = False, dot_min =0.1, dot_max =1,standard_scale = 'var')

In [None]:
T17_3 = potential_17[potential_17.obs['leiden'].isin(['0']),:]

In [None]:
potential_17 = adata[adata.obs['leiden'].isin(['3']),:]

In [None]:
del potential_17.uns

In [None]:
sc.pp.highly_variable_genes(potential_17, flavor='seurat', n_top_genes=3000)
sc.pp.pca(potential_17, n_comps=50, use_highly_variable=True, svd_solver='arpack')

In [None]:
sc.pp.neighbors(potential_17, n_neighbors = 15, n_pcs = 50)
sc.tl.leiden(potential_17, resolution = 1, key_added= 'leiden')                                                 

In [None]:
sc.tl.tsne(potential_17)

In [None]:
%matplotlib inline
sc.pl.tsne(potential_17, color=['leiden','RORC','IL17A','IL17F'],size = 40,cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_17, restrict_to = ('leiden', ['1']), resolution = 0.9, key_added= 'leiden1')
sc.pl.tsne(potential_17, color=['leiden1','RORC','IL17A','IL17F'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
IL17_genesmask = [gene.startswith("IL17") and gene.startswith("IL17R")==0 for gene in potential_17.var_names]
IL17_genes = potential_17.var_names[IL17_genesmask]
IL17_exp_set ={
    'cytokine': IL17_genes, 'TF': 'RORC'
}
sc.pl.dotplot(potential_17,IL17_exp_set,groupby = 'leiden1' , vmax = 1, swap_axes = False, dot_min =0.1, dot_max =1,standard_scale = 'var')

In [None]:
T17_4 = potential_17[potential_17.obs['leiden2'].isin(['2,1','16']),:]

In [None]:
potential_17 = adata[adata.obs['leiden'].isin(['4']),:]

In [None]:
del potential_17.uns

In [None]:
sc.pp.highly_variable_genes(potential_17, flavor='seurat', n_top_genes=3000)
sc.pp.pca(potential_17, n_comps=50, use_highly_variable=True, svd_solver='arpack')

In [None]:
sc.pp.neighbors(potential_17, n_neighbors = 15, n_pcs = 50)
sc.tl.leiden(potential_17, resolution = 1, key_added= 'leiden')                                                 

In [None]:
sc.tl.tsne(potential_17)

In [None]:
%matplotlib inline
sc.pl.tsne(potential_17, color=['leiden','RORC','IL17A','IL17F'],size = 40,cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_17, restrict_to = ('leiden', ['7']), resolution = 0.5, key_added= 'leiden1')
sc.pl.tsne(potential_17, color=['leiden1','RORC','IL17A','IL17F'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_17, restrict_to = ('leiden1', ['4']), resolution = 0.5, key_added= 'leiden2')
sc.pl.tsne(potential_17, color=['leiden2','RORC','IL17A','IL17F'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_17, restrict_to = ('leiden2', ['9']), resolution = 0.5, key_added= 'leiden3')
sc.pl.tsne(potential_17, color=['leiden3','RORC','IL17A','IL17F'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_17, restrict_to = ('leiden3', ['3']), resolution = 0.6, key_added= 'leiden4')
sc.pl.tsne(potential_17, color=['leiden4','RORC','IL17A','IL17F'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
IL17_genesmask = [gene.startswith("IL17") and gene.startswith("IL17R")==0 for gene in potential_17.var_names]
IL17_genes = potential_17.var_names[IL17_genesmask]
IL17_exp_set ={
    'cytokine': IL17_genes, 'TF': 'RORC'
}
sc.pl.dotplot(potential_17,IL17_exp_set,groupby = 'leiden4' , vmax = 1, swap_axes = False, dot_min =0.1, dot_max =1,standard_scale = 'var')

In [None]:
T17_5 = potential_17[potential_17.obs['leiden'].isin(['3,0','3,2','4,1','7,0','7,1','7,3','9,0','9,1']),:]

In [None]:
potential_17 = adata[adata.obs['leiden'].isin(['5']),:]

In [None]:
del potential_17.uns

In [None]:
sc.pp.highly_variable_genes(potential_17, flavor='seurat', n_top_genes=3000)
sc.pp.pca(potential_17, n_comps=50, use_highly_variable=True, svd_solver='arpack')

In [None]:
sc.pp.neighbors(potential_17, n_neighbors = 15, n_pcs = 50)
sc.tl.leiden(potential_17, resolution = 0.7, key_added= 'leiden')                                                 

In [None]:
sc.tl.tsne(potential_17)

In [None]:
%matplotlib inline
sc.pl.tsne(potential_17, color=['leiden','RORC','IL17A','IL17F'],size = 40,cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_17, restrict_to = ('leiden', ['3']), resolution = 1, key_added= 'leiden1')
sc.pl.tsne(potential_17, color=['leiden1','RORC','IL17A','IL17F'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_17, restrict_to = ('leiden1', ['4']), resolution = 1, key_added= 'leiden2')
sc.pl.tsne(potential_17, color=['leiden2','RORC','IL17A','IL17F'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
IL17_genesmask = [gene.startswith("IL17") and gene.startswith("IL17R")==0 for gene in potential_17.var_names]
IL17_genes = potential_17.var_names[IL17_genesmask]
IL17_exp_set ={
    'cytokine': IL17_genes, 'TF': 'RORC'
}
sc.pl.dotplot(potential_17,IL17_exp_set,groupby = 'leiden1' , vmax = 1, swap_axes = False, dot_min =0.1, dot_max =1,standard_scale = 'var')

In [None]:
T17_6 = potential_17[potential_17.obs['leiden1'].isin(['3,7','9']),:]

In [None]:
potential_17 = adata[adata.obs['leiden'].isin(['15']),:]

In [None]:
del potential_17.uns

In [None]:
sc.pp.highly_variable_genes(potential_17, flavor='seurat', n_top_genes=3000)
sc.pp.pca(potential_17, n_comps=50, use_highly_variable=True, svd_solver='arpack')

In [None]:
sc.pp.neighbors(potential_17, n_neighbors = 15, n_pcs = 50)
sc.tl.leiden(potential_17, resolution = 1, key_added= 'leiden')                                                 

In [None]:
sc.tl.tsne(potential_17)

In [None]:
%matplotlib inline
sc.pl.tsne(potential_17, color=['leiden','RORC','IL17A','IL17F'],size = 40,cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_17, restrict_to = ('leiden', ['3']), resolution = 0.5, key_added= 'leiden1')
sc.pl.tsne(potential_17, color=['leiden1','RORC','IL17A','IL17F'], size = 40, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_17, restrict_to = ('leiden1', ['4']), resolution = 0.5, key_added= 'leiden2')
sc.pl.tsne(potential_17, color=['leiden2','RORC','IL17A','IL17F'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_17, restrict_to = ('leiden2', ['5']), resolution = 0.5, key_added= 'leiden3')
sc.pl.tsne(potential_17, color=['leiden3','RORC','IL17A','IL17F'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_17, restrict_to = ('leiden3', ['10']), resolution = 0.4, key_added= 'leiden4')
sc.pl.tsne(potential_17, color=['leiden4','RORC','IL17A','IL17F'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
IL17_genesmask = [gene.startswith("IL17") and gene.startswith("IL17R")==0 for gene in potential_17.var_names]
IL17_genes = potential_17.var_names[IL17_genesmask]
IL17_exp_set ={
    'cytokine': IL17_genes, 'TF': 'RORC'
}
sc.pl.dotplot(potential_17,IL17_exp_set,groupby = 'leiden4' , vmax = 1, swap_axes = False, dot_min =0.1, dot_max =1,standard_scale = 'var')

In [None]:
T17_7 = potential_17[potential_17.obs['leiden'].isin(['3,1','3,3','4,0','4,2','5,0','10,2','5,3','4,1']),:]

In [None]:
potential_17 = adata[adata.obs['leiden'].isin(['39']),:]

In [None]:
del potential_17.uns

In [None]:
sc.pp.highly_variable_genes(potential_17, flavor='seurat', n_top_genes=3000)
sc.pp.pca(potential_17, n_comps=50, use_highly_variable=True, svd_solver='arpack')

In [None]:
sc.pp.neighbors(potential_17, n_neighbors = 15, n_pcs = 50)
sc.tl.leiden(potential_17, resolution = 0.7, key_added= 'leiden')                                                 

In [None]:
sc.tl.tsne(potential_17)

In [None]:
%matplotlib inline
sc.pl.tsne(potential_17, color=['leiden','RORC','IL17A','IL17F'],size = 40,cmap = my_cmap)

In [None]:
IL17_genesmask = [gene.startswith("IL17") and gene.startswith("IL17R")==0 for gene in potential_17.var_names]
IL17_genes = potential_17.var_names[IL17_genesmask]
IL17_exp_set ={
    'cytokine': IL17_genes, 'TF': 'RORC'
}
sc.pl.dotplot(potential_17,IL17_exp_set,groupby = 'leiden' , vmax = 1, swap_axes = False, dot_min =0.1, dot_max =1,standard_scale = 'var')

In [None]:
T17_8 = potential_17[potential_17.obs['leiden'].isin(['4'])==0,:]

In [None]:
potential_17 = adata[adata.obs['leiden'].isin(['6']),:]

In [None]:
del potential_17.uns

In [None]:
sc.pp.highly_variable_genes(potential_17, flavor='seurat', n_top_genes=3000)
sc.pp.pca(potential_17, n_comps=50, use_highly_variable=True, svd_solver='arpack')

In [None]:
sc.pp.neighbors(potential_17, n_neighbors = 15, n_pcs = 50)
sc.tl.leiden(potential_17, resolution = 0.7, key_added= 'leiden')                                                 

In [None]:
sc.tl.tsne(potential_17)

In [None]:
%matplotlib inline
sc.pl.tsne(potential_17, color=['leiden','RORC','IL17A','IL17F'],size = 40,cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_17, restrict_to = ('leiden', ['12']), resolution = 0.3, key_added= 'leiden1')
sc.pl.tsne(potential_17, color=['leiden1','RORC','IL17A','IL17F'], size = 40, legend_loc = 'on data', cmap = my_cmap)

In [None]:
IL17_genesmask = [gene.startswith("IL17") and gene.startswith("IL17R")==0 for gene in potential_17.var_names]
IL17_genes = potential_17.var_names[IL17_genesmask]
IL17_exp_set ={
    'cytokine': IL17_genes, 'TF': 'RORC'
}
sc.pl.dotplot(potential_17,IL17_exp_set,groupby = 'leiden1' , vmax = 1, swap_axes = False, dot_min =0.1, dot_max =1,standard_scale = 'var')

In [None]:
T17_9 = potential_17[potential_17.obs['leiden1'].isin(['12,0','12,2'])==0,:]

In [None]:
adata_17 = T17_1.concatenate(T17_2,T17_3,T17_5,T17_6,T17_7,T17_8,T17_9, batch_key = 'original_cluster', join = 'outer',index_unique = None, fill_value=0)

In [None]:
sc.pp.highly_variable_genes(adata_17, flavor='seurat', n_top_genes=3000)
sc.pp.pca(adata_17, n_comps=50, use_highly_variable=True, svd_solver='arpack')

In [None]:
sc.pp.neighbors(adata_17, n_neighbors = 15, n_pcs = 50)
sc.tl.leiden(adata_17, resolution = 1, key_added= 'leiden')                                                 

In [None]:
sc.tl.tsne(adata_17)

In [None]:
%matplotlib inline
sc.pl.tsne(adata_17, color=['leiden','RORC','IL17A','IL17F'],size = 40,legend_loc = 'on data',cmap = my_cmap)

In [None]:
adata_17_refined = adata_17[adata_17.obs['leiden'].isin(['1','15','9','3','16','17','11'])]

In [None]:
%matplotlib inline
sc.pl.tsne(adata_17_refined, color=['leiden','RORC','IL17A','IL17F'],size = 40,legend_loc = 'on data',cmap = my_cmap)

In [None]:
adata_17_refined.write('synapse_T17.h5ad')

In [None]:
adata_test = sc.read_h5ad('synapse.h5ad')

In [None]:
T_population = [str(i) for i in [1,3,4,5,8,9,10,15,39,45]]
T_adata = adata[adata.obs['leiden'].isin(T_population),:]

In [None]:
T_adata.write('T_adata.h5ad')

In [None]:
import scanpy as sc
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib import colors
import seaborn as sb
from gprofiler import GProfiler
import seaborn as sns
import rpy2.rinterface_lib.callbacks
import logging
import tensorflow as tf
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.svm import LinearSVC, SVC
from sklearn.linear_model import  LogisticRegression
from keras.models import load_model
from skopt import gp_minimize
from skopt.space import Real, Integer
from skopt.utils import use_named_args
from functools import partial
from sklearn.ensemble import StackingClassifier
from bayes_opt import BayesianOptimization
from rpy2.robjects import pandas2ri
from joblib import dump, load
import pickle
import anndata2ri
import anndata
from sklearn.metrics import precision_score, recall_score

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
from matplotlib.colors import LinearSegmentedColormap
values = [0,1]
colors = [(227, 227, 227), (255, 42, 18)]
norm = plt.Normalize(min(values), max(values))
my_cmap = LinearSegmentedColormap.from_list(
    '', [(norm(value), tuple(np.array(color) / 255)) for value, color in zip(values, colors)])

In [None]:
def print_full(x):
    pd.set_option('display.max_rows', len(x))
    print(x)
    pd.reset_option('display.max_rows')

In [None]:
my_palette = ['#0351A8','#8CB0E0','#D56D11','#FFBB78','#234E08','#53CB8B','#D30083','#CB788D','#4E195A','#C58CCF','#AA290F','#B03FD1','#E8BCCF','#64605F','#B2AD9A','#D2D30B','#D1BD4F','#06DCF2','#9EDAE5','#517219','#5B43CF','#D92F24','#FFD900','#002F33','#B8A3A3']

In [None]:
adata = sc.read_h5ad('T_adata.h5ad')

In [None]:
lr = load('best_LR.joblib')
features = pd.read_excel('best_LR_features.xlsx')[0].values.tolist()

In [None]:
SVM = load('best_SVM.joblib')

In [None]:
_org_df = pd.DataFrame(index=adata.obs_names, columns=features)

for feature in features:
    if feature in adata.var_names:
        # Get column values, taking care of sparse data
        column_data = adata.X[:, adata.var_names == feature].A if isinstance(adata.X, sp.sparse.spmatrix) else adata.X[:, adata.var_names == feature]

        # Since column_data might have more than 1 dimension, we need to squeeze it to match _org_df shape
        _org_df[feature] = np.squeeze(column_data)
    else:
        _org_df[feature] = 0  # This will broadcast the assignment across all rows

In [None]:
adata.obs['gd_predict'] = SVM.predict(_org_df)
adata.obs['gd_predict'] = adata.obs['gd_predict'].astype(str)

In [None]:
sc.pl.tsne(adata,color = 'gd_predict')

In [None]:
del adata.uns['log1p']
sc.pp.highly_variable_genes(adata, flavor = 'seurat', n_top_genes=3000)
#adata.var['highly_variable'] = adata.var_names.isin(features)
sc.pp.pca(adata, n_comps=50, use_highly_variable=True, svd_solver='arpack')

In [None]:
sc.pp.neighbors(adata, n_neighbors = 15, n_pcs = 50)
sc.tl.leiden(adata, resolution = 0.8, key_added= 'leiden')                                                 

In [None]:
del adata.uns['leiden_colors']

In [None]:
sc.tl.tsne(adata)

In [None]:
%matplotlib inline
plt.rcParams['axes.linewidth'] = 2
fig = sc.pl.tsne(adata, color = ['leiden','ZBTB16','IKZF2','TRGV4','CD3D','CD3E','CD3G','CD247','TRDC','CD4','CD8A','PDCD1','LGALS3','NCAM1'],size = 20, legend_loc = 'on data', palette  = my_palette, ncols = 4, cmap = my_cmap, return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
#plt.savefig('overall_map_withunmatched.png')

In [None]:
%matplotlib inline
plt.rcParams['axes.linewidth'] = 2
fig = sc.pl.tsne(adata, color = ['RORC'],size = 40, legend_loc = 'on data', ncols = 4, cmap = my_cmap, return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
#plt.savefig('overall_map_withunmatched.png')

## Tedious procedure of gamma delta T identification

### temp checkpoints

In [None]:
prob_gd = adata[adata.obs['leiden'].isin(['7']),:]

In [None]:
sc.pp.highly_variable_genes(prob_gd, flavor = 'seurat', n_top_genes=3000)
#.var['highly_variable'] = adata.var_names.isin(features)
sc.pp.pca(prob_gd, n_comps=50, use_highly_variable=True, svd_solver='arpack')

In [None]:
sc.pp.neighbors(prob_gd, n_neighbors = 15, n_pcs = 50)
sc.tl.leiden(prob_gd, resolution = 0.8, key_added= 'leiden')                                                 

In [None]:
sc.tl.tsne(prob_gd)

In [None]:
%matplotlib inline
plt.rcParams['axes.linewidth'] = 2
fig = sc.pl.tsne(prob_gd, color = ['leiden','ZBTB16','IKZF2','TRGV4','CD3D','CD3G','CD3E',
                                   'CD247','IFNG','IL17A','TRDC','TRDV1','TRDV2','TRDV3','CD4','CD8A',
                                   'PDCD1','KLRK1','RORC','TRAV1-2'],size = 20, legend_loc = 'on data', palette  = my_palette, ncols = 4, cmap = my_cmap, return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
#plt.savefig('overall_map_withunmatched.png')

In [None]:
gamma_genesmask = [gene.startswith("TRG") for gene in adata.var_names]
gamma_genes = adata.var_names[gamma_genesmask]
delta_genesmask = [gene.startswith("TRD") for gene in adata.var_names]
delta_genes = adata.var_names[delta_genesmask]

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(prob_gd, restrict_to = ('leiden', ['4']), resolution = 0.9, key_added= 'leiden1')
sc.pl.tsne(prob_gd, color=['leiden1','CD4','CD8A','TRDC'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(prob_gd, restrict_to = ('leiden1', ['6']), resolution = 0.35, key_added= 'leiden2')
sc.pl.tsne(prob_gd, color=['leiden2','CD4','CD8A','TRDC'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(prob_gd, restrict_to = ('leiden2', ['3']), resolution = 0.35, key_added= 'leiden3')
sc.pl.tsne(prob_gd, color=['leiden3','CD4','CD8A','TRDC'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(prob_gd, restrict_to = ('leiden3', ['1']), resolution = 0.9, key_added= 'leiden4')
sc.pl.tsne(prob_gd, color=['leiden4','CD4','CD8A','TRDC'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(prob_gd, restrict_to = ('leiden4', ['4,1']), resolution = 0.5, key_added= 'leiden5')
sc.pl.tsne(prob_gd, color=['leiden5','CD4','CD8A','TRDC'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
TCR_exp_set ={
"gamma": gamma_genes, "delta": delta_genes, "CD3S": ['CD3E','CD3D','CD3G']
}
sc.pl.dotplot(prob_gd,TCR_exp_set,groupby = 'leiden5' , vmax = 1, swap_axes = False, dot_min =0, dot_max =1,standard_scale = 'var')

In [None]:
gd1 = prob_gd[prob_gd.obs['leiden4'].isin(['1,8','4,0','4,1,0','4,1,1','4,3','4,4','4,5','4,6','4,7']),:]

In [None]:
sc.pl.tsne(gd1,color = ['leiden4','CD3D','CD3E','CD3G','CD247','TRDC'])

### further cluster the pseudo gd

In [None]:
prob_gd = adata[adata.obs['leiden'].isin(['9']),:]

In [None]:
sc.pp.highly_variable_genes(prob_gd, flavor = 'seurat', n_top_genes=3000)
#.var['highly_variable'] = adata.var_names.isin(features)
sc.pp.pca(prob_gd, n_comps=50, use_highly_variable=True, svd_solver='arpack')

In [None]:
sc.pp.neighbors(prob_gd, n_neighbors = 15, n_pcs = 50)
sc.tl.leiden(prob_gd, resolution = 0.8, key_added= 'leiden')                                                 

In [None]:
sc.tl.tsne(prob_gd)

In [None]:
%matplotlib inline
plt.rcParams['axes.linewidth'] = 2
fig = sc.pl.tsne(prob_gd, color = ['leiden','ZBTB16','IKZF2','TRDC','CD3D','CD3G','CD3E','CD247','IFNG','IL17A','TRDV1','TRDV2','TRDV3','CD3D','CD3E','CD3G','CD247','PDCD1','KLRK1','RORC','TRAV1-2'],size = 20, legend_loc = 'on data', palette  = my_palette, ncols = 4, cmap = my_cmap, return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
#plt.savefig('overall_map_withunmatched.png')

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(prob_gd, restrict_to = ('leiden', ['6']), resolution = 0.4, key_added= 'leiden1')
sc.pl.tsne(prob_gd, color=['leiden1','CD3D','CD3E','CD3G','CD247','TRDC'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(prob_gd, restrict_to = ('leiden1', ['5']), resolution = 0.5, key_added= 'leiden2')
sc.pl.tsne(prob_gd, color=['leiden2','CD3D','CD3E','CD3G','CD247','TRDC'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(prob_gd, restrict_to = ('leiden2', ['3']), resolution = 0.5, key_added= 'leiden3')
sc.pl.tsne(prob_gd, color=['leiden3','CD3D','CD3E','CD3G','CD247','TRDC'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
TCR_exp_set ={
"gamma": gamma_genes, "delta": delta_genes, "CD3S": ['CD3E','CD3D','CD3G']
}



sc.pl.dotplot(prob_gd,TCR_exp_set,groupby = 'leiden3' , vmax = 1, swap_axes = False, dot_min =0, dot_max =1,standard_scale = 'var')

In [None]:
gd2 = prob_gd[prob_gd.obs['leiden3'].isin(['5,2','6,2','6,3']),:]

In [None]:
sc.pl.tsne(gd2,color = ['leiden3','CD3D','CD3E','CD3G','CD247','TRDC'])

### further cluster the pseudo gd

In [None]:
prob_gd = adata[adata.obs['leiden'].isin(['4']),:]

In [None]:
sc.pp.highly_variable_genes(prob_gd, flavor = 'seurat', n_top_genes=3000)
#.var['highly_variable'] = adata.var_names.isin(features)
sc.pp.pca(prob_gd, n_comps=50, use_highly_variable=True, svd_solver='arpack')

In [None]:
sc.pp.neighbors(prob_gd, n_neighbors = 15, n_pcs = 50)
sc.tl.leiden(prob_gd, resolution = 0.8, key_added= 'leiden')                                                 

In [None]:
sc.tl.tsne(prob_gd)

In [None]:
%matplotlib inline
plt.rcParams['axes.linewidth'] = 2
fig = sc.pl.tsne(prob_gd, color = ['leiden','ZBTB16','IKZF2','TRDC','CD3D','CD3G','CD3E',
                                   'CD247',],size = 20, legend_loc = 'on data', palette  = my_palette, ncols = 4, cmap = my_cmap, return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
#plt.savefig('overall_map_withunmatched.png')

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(prob_gd, restrict_to = ('leiden', ['7']), resolution = 0.5, key_added= 'leiden1')
sc.pl.tsne(prob_gd, color=['leiden1','CD3D','CD3E','CD3G','CD247','TRDC'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(prob_gd, restrict_to = ('leiden1', ['1']), resolution = 0.6, key_added= 'leiden2')
sc.pl.tsne(prob_gd, color=['leiden2','CD3D','CD3E','CD3G','CD247','TRDC'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(prob_gd, restrict_to = ('leiden2', ['6']), resolution = 0.4, key_added= 'leiden3')
sc.pl.tsne(prob_gd, color=['leiden3','CD3D','CD3E','CD3G','CD247','TRDC'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
TCR_exp_set ={
"gamma": gamma_genes, "delta": delta_genes, "CD3S": ['CD3E','CD3D','CD3G']
}



sc.pl.dotplot(prob_gd,TCR_exp_set,groupby = 'leiden3' , vmax = 1, swap_axes = False, dot_min =0, dot_max =1,standard_scale = 'var')

In [None]:
gd3 = prob_gd[prob_gd.obs['leiden3'].isin(['1,3','6,3','7,1','7,2','9']),:]

In [None]:
sc.pl.tsne(gd3,color = ['leiden3','CD3D','CD3E','CD3G','CD247','TRDC'])

### further cluster the pseudo gd

In [None]:
prob_gd = adata[adata.obs['leiden'].isin(['14']),:]

In [None]:
sc.pp.highly_variable_genes(prob_gd, flavor = 'seurat', n_top_genes=3000)
#.var['highly_variable'] = adata.var_names.isin(features)
sc.pp.pca(prob_gd, n_comps=50, use_highly_variable=True, svd_solver='arpack')

In [None]:
sc.pp.neighbors(prob_gd, n_neighbors = 15, n_pcs = 50)
sc.tl.leiden(prob_gd, resolution = 0.8, key_added= 'leiden')                                                 

In [None]:
sc.tl.tsne(prob_gd)

In [None]:
%matplotlib inline
plt.rcParams['axes.linewidth'] = 2
fig = sc.pl.tsne(prob_gd, color = ['leiden','ZBTB16','IKZF2','TRGV4','CD3D','CD3G','CD3E',
                                   'CD247','TRDC'],size = 50, legend_loc = 'on data', palette  = my_palette, ncols = 4, cmap = my_cmap, return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
#plt.savefig('overall_map_withunmatched.png')

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(prob_gd, restrict_to = ('leiden', ['7']), resolution = 0.3, key_added= 'leiden1')
sc.pl.tsne(prob_gd, color=['leiden1','CD3D','CD3E','CD3G','CD247','TRDC'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(prob_gd, restrict_to = ('leiden1', ['3']), resolution = 0.5, key_added= 'leiden2')
sc.pl.tsne(prob_gd, color=['leiden2','CD3D','CD3E','CD3G','CD247','TRDC'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
TCR_exp_set ={
"gamma": gamma_genes, "delta": delta_genes, "CD3S": ['CD3E','CD3D','CD3G']
}



sc.pl.dotplot(prob_gd,TCR_exp_set,groupby = 'leiden2' , vmax = 1, swap_axes = False, dot_min =0, dot_max =1,standard_scale = 'var')

In [None]:
gd4 = prob_gd[prob_gd.obs['leiden2'].isin(['3,2','3,3','7,1']),:]

In [None]:
sc.pl.tsne(gd4,color = ['leiden2','CD3D','CD3E','CD3G','CD247','TRDC'])

### possibly gamma deltas

In [None]:
adata_gd = gd1.concatenate(gd2,gd3,gd4, batch_key = 'original_cluster', batch_categories=['7','9','4','14'],join = 'outer',fill_value=0)

In [None]:
adata_gd

In [None]:
sc.pp.highly_variable_genes(adata_gd, flavor = 'seurat', n_top_genes=3000)
#.var['highly_variable'] = adata.var_names.isin(features)
sc.pp.pca(adata_gd, n_comps=50, use_highly_variable=True, svd_solver='arpack')

In [None]:
sc.pp.neighbors(adata_gd, n_neighbors = 15, n_pcs = 50)
sc.tl.leiden(adata_gd, resolution = 0.8, key_added= 'leiden')                                                 

In [None]:
sc.tl.tsne(adata_gd)

In [None]:
%matplotlib inline
plt.rcParams['axes.linewidth'] = 2
fig = sc.pl.tsne(adata_gd, color = ['original_cluster','leiden','ZBTB16','IKZF2','TRGV4','CD3D','CD3G','CD3E',
                                   'CD247','IFNG','IL17A','TRDC','TRDV1','TRDV2','TRDV3','CD4',
                                   'CD8A','CD8B','PDCD1','KLRK1','RORC','TRDC','FOXP3','TRAV1-2'],size = 50, legend_loc = 'on data', palette  = my_palette, ncols = 4, cmap = my_cmap, return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
#plt.savefig('overall_map_withunmatched.png')

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata_gd, restrict_to = ('leiden', ['4']), resolution = 0.3, key_added= 'leiden1')
sc.pl.tsne(adata_gd, color=['leiden1','CD3E','CD3G','CD3D','TRDC'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
TCR_exp_set ={
"gamma": gamma_genes, "delta": delta_genes, "CD3S": ['CD3E','CD3D','CD3G']
}



sc.pl.dotplot(adata_gd,TCR_exp_set,groupby = 'leiden1' , vmax = 1, swap_axes = False, dot_min =0, dot_max =1,standard_scale = 'var')

In [None]:
adata_gd_filtered = adata_gd[adata_gd.obs['leiden1'].isin(['4,0','4,2']) == 0,:]

In [None]:
adata_gd_filtered.write('synapse_gd.h5ad')

## Do they show any sign of IL17 secreting?

In [None]:
sc.pl.tsne(adata_gd_filtered,color = ['IL17A','RORC'], cmap = my_cmap)

In [None]:
meta = pd.read_excel('Synapse/41588_2022_1100_MOESM3_ESM.xlsx',sheet_name='Supplementary Table 13')

In [None]:
adata.obs['patient.ID'] = adata.obs['patient.ID'].replace('-JSC','')

In [None]:
for col_name in meta.columns[4:-3]:
    dictionary = pd.Series(meta[col_name].astype(str).values, index=meta['patient.ID']).to_dict()
    adata.obs[col_name] = adata.obs['patient.ID'].replace(dictionary)

In [None]:
adata_gd_filtered = sc.read_h5ad('synapse_gd.h5ad')

In [None]:
adata_gd_filtered.obs['patient.ID'] = [i.replace('-JSC','') for i in adata_gd_filtered.obs['patient.ID']]

In [None]:
for col_name in meta.columns[2:-3]:
    dictionary = pd.Series(meta[col_name].values, index=meta['patient.ID']).to_dict()
    adata_gd_filtered.obs[col_name] = adata_gd_filtered.obs['patient.ID'].replace(dictionary)

In [None]:
%matplotlib inline
plt.rcParams['axes.linewidth'] = 2
fig = sc.pl.tsne(adata_gd_filtered, color = ['TRDV1','TRDV2','original_cluster','patient.ID','Site','Sidedness','sample.origin'],
                 size = 80, palette  = my_palette, 
                 ncols = 1, cmap = my_cmap, return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
plt.savefig('Synapse/overall_map_withunmatched.png')

#shows patient's gd T specificity

In [None]:
for i in adata_gd_filtered.obs.columns:
    adata_gd_filtered.obs[i] = adata_gd_filtered.obs[i].astype(str)
adata_gd_filtered.write('synapse_gd_with_metadata.h5ad')

In [None]:
adata_gd = sc.read_h5ad('synapse_gd_with_metadata.h5ad')

In [None]:
adata_gd.obs[['sample.ID', 'patient.ID', 'sample.origin','dataset','Gender', 'Age at recruitment', 'Group Stage', 'Stage TNM','Site']]

In [None]:
adata_gd.obs['patient'] = adata_gd.obs['patient.ID']
adata_gd.obs['tissue'] = adata_gd.obs['sample.origin']
adata_gd.obs['gender'] = adata_gd.obs['Gender'] 
adata_gd.obs['age'] = adata_gd.obs['Age at recruitment'] 
adata_gd.obs['site'] = adata_gd.obs['Site']
adata_gd.obs['stage'] = adata_gd.obs['Group Stage']

In [None]:
adata_gd.obs['TNM_T'] = [i.split('T')[1].split('N')[0] for i in adata_gd.obs['Stage TNM']]
adata_gd.obs['TNM_N'] = [i.split('N')[1].split('M')[0] for i in adata_gd.obs['Stage TNM']]
adata_gd.obs['TNM_M'] = 'x'
for i,stage in enumerate(adata_gd.obs['Stage TNM']):
    if len(stage.split('M')) > 1:
        adata_gd.obs['TNM_M'][i] = stage.split('M')[1]

In [None]:
adata_gd.obs['gender'] = adata_gd.obs['gender'].replace({'F':'Female','M':'Male'})

In [None]:
sc.pl.tsne(adata_gd,color = ['patient','tissue','gender','age','site','TNM_T','TNM_N','TNM_M','stage'])

In [None]:
adata_gd.obs[['patient','tissue','gender','age','site','TNM_T','TNM_N','TNM_M','stage']].to_csv('synapse_gd.csv')

In [None]:
adata_gd.write('Synapse/Synapse_gd.h5ad')

In [None]:
adata.var_names_make_unique()

In [None]:
gd_info = pd.read_csv('synapse_gd.csv')

In [None]:
gd_info['new_cellID'] = ['-'.join(i.split('-')[0:-1]) for i in gd_info['cell.ID']]

In [None]:
counts = adata[gd_info['new_cellID'],:].X.T
colnames = gd_info['new_cellID']
rownames = adata.var_names

In [None]:
%%R -i colnames -i rownames -i counts 

colnames(counts) = colnames
rownames(counts) = rownames

srat <- CreateSeuratObject(counts = counts, project = "synapse", min.cells = 0, min.features = 0, assay = "RNA")

In [None]:
%%R -i gd_info
srat = AddMetaData(srat, metadata = gd_info)
saveRDS(srat, 'synapse_gd.rds')

In [None]:
from matplotlib.colors import LinearSegmentedColormap
values = [0,1]
colors = [(227, 227, 227), (255, 42, 18)]
norm = plt.Normalize(min(values), max(values))
my_cmap = LinearSegmentedColormap.from_list(
    '', [(norm(value), tuple(np.array(color) / 255)) for value, color in zip(values, colors)])

In [None]:
adata = sc.read_h5ad('synapse.h5ad')
adata_17 = sc.read_h5ad('synapse_T17.h5ad')
adata_gd = sc.read_h5ad('synapse_gd.h5ad')

In [None]:
adata.obs['IL17 secreting selected'] = '0'
adata.obs['IL17 secreting selected'][adata.obs_names.isin(adata_17.obs_names)] = '1'

In [None]:
adata_gd.obs_names= ['-'.join(i.split('-')[0:-1]) for i in adata_gd.obs_names]

In [None]:
adata.obs['gdT selected'] = '0'
adata.obs['gdT selected'][adata.obs_names.isin(adata_gd.obs_names)] = '1'

In [None]:
plt.close()
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = [8,8]
fig = sc.pl.umap(adata, color=['leiden','RORC','IL17A','IL17F','IL17 secreting selected'],
                 size =1, ncols = 2, palette =my_palette, cmap = my_cmap, legend_loc = 'on data',return_fig = True, legend_fontsize = 'large', vmax = 4)
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
#fig.savefig('17_selected.png',dpi = 300,bbox_inches='tight') 

In [None]:
plt.close()
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = [8,8]
fig = sc.pl.umap(adata, color=['RORC','IL17A','IL17F','IL17 secreting selected'],
                 size =1, ncols = 2, palette = ['#E3E3E3', '#FF2A12'], cmap = my_cmap, return_fig = True, legend_fontsize = 'large', vmax = 4)
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
fig.savefig('17_selected.png',dpi = 300,bbox_inches='tight') 

In [None]:
plt.close()
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = [8,8]
fig = sc.pl.umap(adata, color=['CD3E','CD3D','CD3G','CD247','TRDC','gdT selected'],
                 size =1, ncols = 2, palette = ['#E3E3E3', '#FF2A12'], cmap = my_cmap, return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
fig.savefig('gd_selected.png',dpi = 300,bbox_inches='tight') 

In [None]:
adata_17.var_names_make_unique()

In [None]:
counts = adata_17.X.T
colnames = adata_17.obs_names
rownames = adata_17.var_names

In [None]:
metadata = adata_17.obs

In [None]:
metadata['orig.ident'] = 'synapse'

In [None]:
%%R -i colnames -i rownames -i counts 

colnames(counts) = colnames
rownames(counts) = rownames

srat <- CreateSeuratObject(counts = counts, project = "synapse", min.cells = 0, min.features = 0, assay = "RNA")

In [None]:
%%R -i metadata
srat = AddMetaData(srat, metadata = metadata)
saveRDS(srat, 'synapse_T17.rds')