In [None]:
#import the library
import scanpy as sc
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib import colors
import seaborn as sb
from gprofiler import GProfiler
import seaborn as sns
import logging
import os
import importlib
import warnings
warnings.filterwarnings("ignore")
import pickle as pkl
import anndata
from matplotlib.colors import LinearSegmentedColormap

In [None]:
def print_full(x):
    pd.set_option('display.max_rows', len(x))
    print(x)
    pd.reset_option('display.max_rows')

In [None]:
#This pallete is for colorblinds
my_palette = ['#0351A8','#8CB0E0','#D56D11','#FFBB78','#234E08','#53CB8B','#D30083','#CB788D','#4E195A','#C58CCF','#AA290F','#B03FD1','#E8BCCF','#64605F','#B2AD9A','#D2D30B','#D1BD4F','#06DCF2','#9EDAE5','#517219','#5B43CF','#D92F24','#FFD900','#002F33','#B8A3A3']
#my_palette = ['#e6194b', '#3cb44b', '#f58231', '#4363d8', '#ffe119', '#911eb4', '#46f0f0', '#f032e6', '#bcf60c', '#fabebe', '#008080', '#e6beff', '#9a6324', '#fffac8', '#800000', '#aaffc3', '#808000', '#ffd8b1', '#000075', '#808080', '#ffffff', '#000000']
stage_palette_1 = ['#e6194b', '#3cb44b', '#f58231', '#4363d8','#808080', '#ffe119']

In [None]:
from matplotlib.colors import LinearSegmentedColormap
values = [0, 1]
colors = [(227, 227, 227), (255, 42, 18)]
colors = [tuple(np.array(color) / 255) for color in colors]
my_cmap = LinearSegmentedColormap.from_list('', colors)

In [None]:
# Ignore R warning messages
#Note: this can be commented out to get more verbose R output
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)

# Automatically convert rpy2 outputs to pandas dataframes
pandas2ri.activate()
anndata2ri.activate()
%load_ext rpy2.ipython

In [None]:
plt.rcParams['figure.figsize']=(8,8) #rescale figures
sc.settings.verbosity = 3
#sc.set_figure_params(dpi=200, dpi_save=300)
sc.logging.print_versions()

In [None]:
%%R
.libPaths(.libPaths('R\\win-library\\4.3'))

# Load all the R libraries we will be using in the notebook
library(scran)
library(Seurat)
library(RColorBrewer)
library(slingshot)
library(monocle)
library(gam)
library(ggplot2)
library(plyr)
library(MAST)
library(clusterExperiment)
library(monocle3)
library(SeuratWrappers)
library(magrittr)
library(dplyr)  

In [None]:
adata = sc.read_h5ad('gd_integrated.h5ad') # read in the data

In [None]:
adata.obs['orig.ident'].values.categories # check the datasource

In [None]:
sc.pp.neighbors(adata, n_pcs = 50)

In [None]:
sc.tl.leiden(adata, resolution = 0.8, key_added= 'leiden')

In [None]:
sc.pl.pca(adata, color=['leiden','CD4'], legend_loc = 'on data')

In [None]:
%matplotlib inline
sc.tl.tsne(adata)
#plt.close()
plt.rcParams['axes.linewidth'] = 2
sc.pl.tsne(adata, color=['leiden','CD3E','CD3G','CD3D','TRDC','ICOS','CD4','CD8A','CD8B','FOXP3','IFNG'],legend_loc = 'on data',cmap = my_cmap)

In [None]:
sc.pl.tsne(adata, color=['leiden','CD3E','CD3G','CD3D','TRDC','orig.ident','gender','tissue'],cmap = my_cmap)

In [None]:
adata.write('gd_integrated.h5ad') # same the data
# at this step, the basics are done

In [None]:
plt.close()
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = [8,8]
fig = sc.pl.tsne(adata, color=['age'],
                 size =40, ncols = 1,  return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')

In [None]:
plt.close()
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = [8,8]
fig = sc.pl.tsne(adata, color=['stage','TNM_T'],
                 size =40, ncols = 1, palette = ['#e6194b', '#3cb44b', '#f58231', '#4363d8','#808080', '#ab986c', '#46f0f0', '#f032e6'], return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')

In [None]:
plt.close()
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = [8,8]
fig = sc.pl.tsne(adata, color=['TNM_N'],
                 size =30, ncols = 1, cmap = my_cmap, palette = ['#e6194b', '#3cb44b','#f58231','#808080', '#ab986c', '#ffe119'], return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')

In [None]:
plt.close()
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = [8,8]
fig = sc.pl.tsne(adata, color=['TNM_M'],
                 size =30, ncols = 1, cmap = my_cmap, palette = ['#e6194b', '#3cb44b', '#808080','#ab986c'], return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')

In [None]:
plt.close()
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = [8,8]
fig = sc.pl.tsne(adata, color=['orig.ident','gender','tissue' ],
                 size =30, ncols = 1, cmap = my_cmap, palette = my_palette, return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')

In [None]:
plt.close()
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = [8,8]
fig = sc.pl.tsne(adata, color=['site' ],
                 size =50, ncols = 1, cmap = my_cmap, palette = ["#99c969","#4da388","#5a788f","#6793ea","#3750c2",
                                                                 "#2d2dae","#7f5ad6","#8f1fd0","#69673d","#ba52a5",'#AB1717'], return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')

In [None]:
plt.close()
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = [8,8]
fig = sc.pl.tsne(adata, color=['stage' ],
                 size =50, ncols = 1, cmap = my_cmap, palette = ["#1ad10a","#a58c1d","#e16b1b","#ab1717","#808080"], return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')

In [None]:
plt.close()
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = [8,8]
fig = sc.pl.tsne(adata, color=['CD3E','CD3G','CD3D','CD247','TRDC'],
                 size =40, ncols = 5, cmap = my_cmap, return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
fig.savefig('gd_integration_identity_tsne.png',dpi = 300,bbox_inches='tight')

In [None]:
%matplotlib inline
plt.close()
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = [8,8]
fig = sc.pl.tsne(adata, color=['leiden','IL7R','KLRC1','CCL4','GZMA','IFNG','KLRB1','HSPA6','KRT8','XCL1','CCR7','CX3CR1',
                              'KLF2','CTLA4','CCR7','SELL','PHGR1','AQP3','GNLY','IGHA1'],
                 size =40, ncols = 5, cmap = my_cmap, return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
#fig.savefig('gd_integration_identity_tsne.png',dpi = 300,bbox_inches='tight')

In [None]:
plt.rcParams.update({'font.size': 5, 'font.weight': 'heavy','axes.linewidth':5})
plt.rcParams.update(plt.rcParamsDefault)


sc.pl.dotplot(adata,['CD4','CD8A','CD8B','CCR7', 'SELL', 'CD27', 'CD28', 'IL7R', 'CD44','CD38','HLA-DRB1', 'KLRG1', 'IL2',
       'IL2RA', 'IL2RB', 'CD69','S1PR1','KLF2', 'ITGAE', 'ITGA1','ITGB2','S1PR1','CCL4', 'GZMK', 'CD101', 'CX3CR1',
       'TCF7', 'LEF1', 'PRDM1', 'TBX21', 'EOMES', 'BACH2', 'GZMB', 'PRF1',
       'FAS', 'FASLG', 'TNF', 'IFNG', 'NKG7', 'CCL4', 'XCL1',
       'XCL2', 'STAT3', 'CD40LG', 'TRAV1-2', 'PDCD1', 'HAVCR2', 'LAG3',
       'MKI67', 'TFRC', 'RORC', 'RORA', 'STAT1', 'STAT4', 'STAT5A',
       'STAT6', 'RUNX1', 'RUNX3',  'CCR4', 'CCR5', 'CCL5','CCR6', 'CCR8',
       'CCR10','CXCR3',  'CXCR4', 'CXCR5', 'CXCR6', 'IL4',  'IL10', 'IL13', 'IL17A',
       'IL6R', 'IL12RB2', 'IL15RA', 'IL17RB', 'IL18R1',
       'IL21R', 'IL23R', 'IL27RA', 'KLRD1', 'KLRK1', 'TNFRSF8', 'GZMA',
       'LTA', 'IFNGR2', 'HLA-DRA', 'SLC3A2', 'CTLA4', 'FOXP3', 'SMAD3',
       'AHR', 'ENTPD1', 'NT5E', 'TGFB1', 'ITGA2', 'BCL6', 'MAF', 'BTLA',
       'ICOS', 'DPP4', 'GATA3', 'CCL20', 'IRF4', 'BATF',
       'ZBTB16', 'TNFSF8', 'IKZF2'],groupby = 'leiden' , vmax = 1, 
              swap_axes = False ,dot_min = 0.01,standard_scale = 'var', save= 'Inference.png')

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata, restrict_to = ('leiden', ['2']), resolution = 0.3, key_added= 'leiden2')
sc.pl.tsne(adata, color=['leiden2','CCL4'], size = 30, legend_loc = 'on data', cmap = my_cmap)
#go back to the DE section to make sure this subcluster make sense

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata, restrict_to = ('leiden2', ['7']), resolution = 0.3, key_added= 'leiden3')
sc.pl.tsne(adata, color=['leiden3','CCL4'], size = 30, legend_loc = 'on data', cmap = my_cmap)
#go back to the DE section to make sure this subcluster make sense

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata, restrict_to = ('leiden3', ['14']), resolution = 0.3, key_added= 'leiden4')
sc.pl.tsne(adata, color=['leiden4','IFNG'], size = 30, legend_loc = 'on data', cmap = my_cmap)
#go back to the DE section to make sure this subcluster make sense

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata, restrict_to = ('leiden4', ['0']), resolution = 0.4, key_added= 'leiden5')
sc.pl.tsne(adata, color=['leiden5','JUNB'], size = 30, legend_loc = 'on data', cmap = my_cmap)
#go back to the DE section to make sure this subcluster make sense

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata, restrict_to = ('leiden5', ['14,0']), resolution = 0.3, key_added= 'leiden6')
sc.pl.tsne(adata, color=['leiden6','CD19','CD3D'], size = 30)#, legend_loc = 'on data', cmap = my_cmap)
#go back to the DE section to make sure this subcluster make sense

In [None]:
adata.obs['cluster_blind'] = adata.obs.groupby('leiden5').ngroup().astype('str')

In [None]:
def DE_to_df(_adata, rank_key, _padj_thresh=0.05, _logfc_thresh=1):
    def process_genes(i, direction):
        _log2foldmask = (_adata.uns[rank_key]['logfoldchanges'][i].astype('double') >= _logfc_thresh) if direction == "up" else (_adata.uns[rank_key]['logfoldchanges'][i].astype('double') <= -_logfc_thresh)
        _pvalmask = _adata.uns[rank_key]['pvals_adj'][i].astype('double') <= _padj_thresh
        _additional = pd.DataFrame({
            i: _adata.uns[rank_key]['names'][i].astype('str')[_log2foldmask & _pvalmask],
            'logfoldchanges_'+i: np.abs(_adata.uns[rank_key]['logfoldchanges'][i].astype('double')[_log2foldmask & _pvalmask])
        })
        _ribo_gene_mask = [gene.startswith('RPL') or gene.startswith('RPS') for gene in _additional[i]]
        _mt_gene_mask = [gene.startswith('MT-') for gene in _additional[i]]
        if len(_ribo_gene_mask)>0 and len(_mt_gene_mask)>0:
            _drop_id = np.array(np.array(_ribo_gene_mask) | np.array(_mt_gene_mask))
            _additional = _additional.sort_values(by='logfoldchanges_'+i, ascending=False)
            _additional = _additional.iloc[_drop_id==False, :].reset_index(drop=True)
        return _additional

    _pass_genes_up = pd.DataFrame()
    _pass_genes_down = pd.DataFrame()

    for i in set(_adata.uns[rank_key]['pvals_adj'].dtype.names):

        _pass_genes_up = pd.concat([_pass_genes_up, process_genes(i, "up")[i]], ignore_index=False, axis=1)
        _pass_genes_down = pd.concat([_pass_genes_down, process_genes(i, "down")[i]], ignore_index=False, axis=1)

        
    return _pass_genes_up, _pass_genes_down

In [None]:
sc.tl.rank_genes_groups(adata, groupby='cluster_blind', key_added='rank',method = 'wilcoxon')

In [None]:
[pass_genes_up, pass_genes_down] = DE_to_df(adata, rank_key='rank', _padj_thresh = 0.01, _logfc_thresh = 2)

In [None]:
DE_list = pass_genes_up.dropna(axis = 1,how = 'all')
DE_list = DE_list[DE_list.columns.astype(int).sort_values().astype(str)]

In [None]:
plt.rcParams.update({'font.size': 15, 'font.weight': 'heavy','axes.linewidth':2})
#plt.rcParams.update(plt.rcParamsDefault)
gamma_genesmask = [gene.startswith("TRGV") or gene.startswith("TRGJ") or gene.startswith("TRGC") for gene in adata.var_names]
gamma_genes = adata.var_names[gamma_genesmask]
delta_genesmask = [gene.startswith("TRDV") or gene.startswith("TRDC") for gene in adata.var_names]
delta_genes = adata.var_names[delta_genesmask]

TCR_exp_set ={
"gamma": gamma_genes, "delta": delta_genes, "T": ['CD3D','CD3E','CD3G','CD247','CD19'] 
}



sc.pl.dotplot(adata,TCR_exp_set,groupby = 'cluster_blind' , vmax = 1, swap_axes = True, figsize = (8,10), dot_min =0, dot_max =0.8,standard_scale = 'var', save = 'gd_identity.png')

In [None]:
adata_tissue = adata[adata.obs['tissue'].isin(['Carcinoma','Normal']),:]
sc.pp.highly_variable_genes(adata_tissue, flavor='seurat', n_top_genes = 5000)

In [None]:
counts = adata_tissue.layers['counts'].T[adata_tissue.var['highly_variable'],:]
tissue = adata_tissue.obs['tissue'].astype('str')
colnames = adata_tissue.obs_names
rownames = adata_tissue.var_names[adata_tissue.var['highly_variable']]

In [None]:
%%R -i counts -i colnames -i rownames -i tissue
colnames(counts) = colnames
rownames(counts) = rownames

srat <- CreateSeuratObject(counts = counts, project = "gd", min.cells = 0, min.features = 0, assay = "RNA")
srat[['tissue']] = tissue
saveRDS(srat, file = "gd_for_DEG.rds")

In [None]:
%%R
cds <- as.cell_data_set(srat)
cds <- estimate_size_factors(cds)
cds@rowRanges@elementMetadata@listData[["gene_short_name"]] <- rownames(srat[["RNA"]])

gene_fits <- fit_models(cds, model_formula_str = "~gender", cores = 6)
fit_coefs <- coefficient_table(gene_fits)

In [None]:
%%R
# Adjust condition in the filter
terms <- fit_coefs %>% filter(term == "genderMale")
terms = terms %>% select(gene_short_name, term, q_value, normalized_effect)

# Store results
write.csv(terms,'gd_gender_DEG.csv')

In [None]:
%R -o terms

In [None]:
terms = pd.read_csv('gd_gender_DEG.csv')
terms['-logQ'] = -np.log10(terms['q_value'])
terms = terms[terms['-logQ']>0]
fc_thresh = 1
lowqval_de = terms[np.array(terms['-logQ']>=-np.log10(0.05)) & np.array(abs(terms['normalized_effect'])>=fc_thresh)]

In [None]:
##### Volcano plot of results
from adjustText import adjust_text
terms['-logQ'] = -np.log10(terms['q_value'])
terms = terms[terms['-logQ']>0]
fc_thresh = 1
plt.rcParams['figure.figsize'] = [8,8]
plt.rcParams.update({'font.size': 14, 'font.weight': 'bold','axes.linewidth':2})
lowqval_de = terms[np.array(terms['-logQ']>=-np.log10(0.05)) & np.array(abs(terms['normalized_effect'])>=fc_thresh)]
other_de = terms[np.array(terms['-logQ']<-np.log10(0.05)) | np.array(abs(terms['normalized_effect'])<fc_thresh)] #actually that's other genes (non sig de)

fig, ax = plt.subplots()
sb.regplot(other_de['normalized_effect'], other_de['-logQ'], fit_reg=False, scatter_kws={'s':50, 'edgecolors': 'black', 'linewidths': 0.5})
sb.regplot(lowqval_de['normalized_effect'], lowqval_de['-logQ'], fit_reg=False, scatter_kws={'s':50,'edgecolors': 'black', 'linewidths': 0.5})
ax.set_xlabel("Normalized log2 FC", fontsize=30)
ax.set_ylabel("-log Q-value", fontsize=30)
ax.tick_params(labelsize=15)
ax.set_title('Male vs Female',size = 30)
    
    
    
# Label names and positions
x = [i for i in lowqval_de['normalized_effect']]
y = [i*1.02 for i in lowqval_de['-logQ']]
labels = lowqval_de['gene_short_name']
# Show only some labels to avoid overcrowding the figure
to_keep = np.where((np.array(lowqval_de['gene_short_name'].str.startswith('RP'))
                    |np.array(lowqval_de['gene_short_name'].str.startswith('MT'))
                     |(np.array(abs(lowqval_de['normalized_effect'])<fc_thresh*1.75)
                           &np.array(lowqval_de['-logQ']<150))
                   )==0)

labels = labels.iloc[to_keep]
texts = []
for i,txt in enumerate(labels):
    texts.append(ax.text(x[to_keep[0][i]], y[to_keep[0][i]], txt,size = 11, color= '#904800', weight = 'bold'))

adjust_text(texts)
#ax.xaxis.label.set_fontweight('bold')
#ax.yaxis.label.set_fontweight('bold')
#ax.yaxis.label.set_fontsize(22)
    #ax.annotate(txt, (x[i], y[i]),size = 10,bbox=dict(boxstyle="Square,pad=0.3", fc="orange", ec="b", lw=2, alpha = 0.6))

plt.ylim([-0.01,350])
plt.axhline(y=-np.log10(0.05), color="orange", linestyle="--")
plt.axvline(x = -fc_thresh, color="orange", linestyle="--")
plt.axvline(x = fc_thresh, color="orange", linestyle="--")
plt.savefig('gd_gender_DE.png')
#plt.show()

In [None]:
plt.close()
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = [8,8]
fig = sc.pl.umap(adata, color=['tissue'],
                 size =30, ncols = 1, cmap = my_cmap, palette = ["#c71b18","#18b400","#ebde28","#ed9e3c"], return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
fig.savefig('gd_integration_metadata_tsne.png',dpi = 300,bbox_inches='tight')

In [None]:
plt.close()
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = [8,8]
fig = sc.pl.tsne(adata, color=['site' ],
                 size =50, ncols = 1, cmap = my_cmap, palette = ["#99c969","#4da388","#5a788f","#6793ea","#3750c2",
                                                                 "#2d2dae","#7f5ad6","#8f1fd0","#808080","#ba52a5",'#AB1717'], return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
#fig.savefig('gd_integration_site_tsne.png',dpi = 300,bbox_inches='tight')

In [None]:
plt.close()
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = [8,8]
fig = sc.pl.tsne(adata, color=['stage' ],
                 size =50, ncols = 1, cmap = my_cmap, palette = ["#1ad10a","#a58c1d","#e16b1b","#ab1717","#808080"], return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
#fig.savefig('gd_integration_stage_tsne.png',dpi = 300,bbox_inches='tight')

In [None]:
sc.pl.tsne(adata, color = ['leiden1','CD3D','CD3E','CD3G','CD247','TRDC','CCR7','SELL','IL7R','CD4','CD8A','ICOS','KLRG1'], legend_loc = 'on data',cmap = my_cmap, size = 30)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata, restrict_to = ('leiden3', ['9']), resolution = 0.4, key_added= 'leiden4')
sc.pl.tsne(adata, color=['leiden4','CD3E','CD3G','CD3D','TRDC','RORC','IL17A','IL17F'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata, restrict_to = ('leiden4', ['1']), resolution = 0.5, key_added= 'leiden5')
sc.pl.tsne(adata, color=['leiden5','CD4','CD8A'], size = 30, legend_loc = 'on data', cmap = my_cmap, vmax = 4)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata, restrict_to = ('leiden5', ['10']), resolution = 0.2, key_added= 'leiden6')
sc.pl.tsne(adata, color=['leiden6','CD3E','CD3G','CD3D','CXCR5','RORC','IL17A','IL17F'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata, restrict_to = ('leiden6', ['6']), resolution = 0.3, key_added= 'leiden7')
sc.pl.tsne(adata, color=['leiden7','CD3E','CD3G','CD3D','TRDC','RORC','IL17A','IL17F','CD4','CD8A','CD19'], size = 30, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata, restrict_to = ('leiden7', ['8']), resolution = 0.5, key_added= 'leiden8')
sc.pl.tsne(adata, color=['leiden8','CD4','CD8A'], size = 30, legend_loc = 'on data', cmap = my_cmap, vmax = 4)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata, restrict_to = ('leiden8', ['2,0']), resolution = 0.3, key_added= 'leiden9')
sc.pl.tsne(adata, color=['leiden9','CD4','CD8A'], size = 30, legend_loc = 'on data', cmap = my_cmap, vmax = 4)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata, restrict_to = ('leiden9', ['4']), resolution = 0.3, key_added= 'leiden10')
sc.pl.tsne(adata, color=['leiden10','CD4','CD8A'], size = 30, legend_loc = 'on data', cmap = my_cmap, vmax = 4)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata, restrict_to = ('leiden10', ['11']), resolution = 0.2, key_added= 'leiden11')
sc.pl.tsne(adata, color=['leiden11','CD4','CD8A'], size = 30, legend_loc = 'on data', cmap = my_cmap, vmax = 4)

In [None]:
adata.obs['cluster_blind'] = adata.obs.groupby('leiden11').ngroup().astype('str')

In [None]:
sc.pl.tsne(adata, color = ['cluster_blind','tissue','CTLA4','cell type','GZMH','PDCD1','ENTPD1','GZMB','KLRG1','ITGA1','ITGAE','EPCAM'], legend_loc = 'on data', size = 40, cmap = my_cmap)

In [None]:
plt.rcParams.update({'font.size': 15, 'font.weight': 'heavy','axes.linewidth':2})
#plt.rcParams.update(plt.rcParamsDefault)

gene_list = ['CD3E','CD3G','CD3D','TRDC','TRDV1','TRDV2','TRDV3','CD4','CD8A','CD8B','CD28','CCR7','SELL','IL7R','TCF7','LEF1','S1PR1','KLF2',
                    'ITGAE','ITGA1','ITGB2','CD69','KLRG1','KLRB1','KLRC1','KLRC2','KLRC3','KLRD1','KLRF1','KLRK1','GZMA','GZMB','GZMK','GZMH','CCL4','NKG7','PRF1',
                         'XCL1','CX3CR1','FCGR3A','TNF','IFNG','IL2','IL2RA','FOXP3','ENTPD1','PDCD1','CTLA4','LAG3','HAVCR2','TIGIT','BTLA','IL10','CXCR5','CXCL13','ZBTB16','IKZF2','RORC','IL17A',
             'MUC2','NCR1','NCR2','NCR3','GNLY','KIT','ZNF683','PRDM1','TBX21','ID3','BCL6','BCL2','LAYN','SLAMF6','CD200','MKI67','CD244']
sc.pl.dotplot(adata,gene_list,groupby = 'cluster_blind' , vmax = 1, 
              swap_axes = True ,dot_min = 0.15,standard_scale = 'var',figsize = (10,15), save= 'inference_dot_gd.png')
# Checked IL9, nothing

In [None]:
adata.obs['cell type'] = adata.obs['cluster_blind']
adata.obs['cell type'] = adata.obs['cell type'].astype('str')

poised_effector_like_1 = np.array([7,8,15,20,22,23])
poised_effector_like_1 = [str(i) for i in poised_effector_like_1]

poised_effector_like_2 = np.array([12,10,28])
poised_effector_like_2 = [str(i) for i in poised_effector_like_2]

tissue_resident_IL7Rneg_like= np.array([11,4,5,9,13,21,29,24])
tissue_resident_IL7Rneg_like = [str(i) for i in tissue_resident_IL7Rneg_like]
Tpex = np.array([16,17,18])
Tpex =[str(i) for i in Tpex]
Tex2 = ['26','27']
Tex1 = ['33']
for c_id in set(adata.obs['cluster_blind']):
    if c_id in poised_effector_like_1:
        adata.obs = adata.obs.replace({'cell type' : { c_id: 'Poised Teff 1'}})
    elif c_id in poised_effector_like_2:
        adata.obs = adata.obs.replace({'cell type' : { c_id: 'Poised Teff 2'}})
    elif c_id in tissue_resident_IL7Rneg_like:
        adata.obs = adata.obs.replace({'cell type' : { c_id: 'IL7R- TRM'}})
    elif c_id in Tex1:
        adata.obs = adata.obs.replace({'cell type' : { c_id: 'Tex1'}})
    elif c_id in Tex2:
        adata.obs = adata.obs.replace({'cell type' : { c_id: 'Tex2'}})
    elif c_id in Tpex:
        adata.obs = adata.obs.replace({'cell type' : { c_id: 'Tpex'}})
    else:
        adata.obs = adata.obs.replace({'cell type' : { c_id: 'IL7R+ TRM'}})
adata.obs['cell type'] = adata.obs['cell type'].astype('category')

In [None]:
adata.obs['cell type'] = adata.obs['cell type'].astype('category')
ordered_celltype=['Poised Teff 1','Poised Teff 2', 'IL7R+ TRM','IL7R- TRM','Tpex','Tex1','Tex2']

adata.obs['cell type'] = adata.obs['cell type'].cat.reorder_categories(ordered_celltype)

In [None]:
palette = [ "#A7E1F1","#4c9bd4","#3B749C","#1452a3",'#6E51E5',"#132876",'#4539A3']

In [None]:
sc.tl.paga(adata, groups='cell type')
sc.pl.paga(adata,threshold = 0.25, labels=None, fontsize = 10, fontoutline = 3,node_size_scale = 3,node_size_power = 1,random_state = 0)

In [None]:
sc.tl.umap(adata,min_dist = 0.001, spread =  1.5,init_pos = 'paga')

In [None]:
plt.close()
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = [8,8]
fig = sc.pl.umap(adata, color=['cell type'],
                 size =30, ncols = 5, cmap = my_cmap, return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
fig.savefig('gd_integration_metadata_umap.png',dpi = 300,bbox_inches='tight') 

In [None]:
plt.close()
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = [8,8]
fig = sc.pl.umap(adata, color=['orig.ident'],
                 size =30, ncols = 5, cmap = my_cmap, return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
fig.savefig('gd_integration_sources_metadata_umap.png',dpi = 300,bbox_inches='tight') 

In [None]:
def DE_to_df(_adata, rank_key, _padj_thresh=0.05, _logfc_thresh=1):
    def process_genes(i, direction):
        _log2foldmask = (_adata.uns[rank_key]['logfoldchanges'][i].astype('double') >= _logfc_thresh) if direction == "up" else (_adata.uns[rank_key]['logfoldchanges'][i].astype('double') <= -_logfc_thresh)
        _pvalmask = _adata.uns[rank_key]['pvals_adj'][i].astype('double') <= _padj_thresh
        _additional = pd.DataFrame({
            i: _adata.uns[rank_key]['names'][i].astype('str')[_log2foldmask & _pvalmask],
            'logfoldchanges_'+i: np.abs(_adata.uns[rank_key]['logfoldchanges'][i].astype('double')[_log2foldmask & _pvalmask])
        })
        _ribo_gene_mask = [gene.startswith('RPL') or gene.startswith('RPS') for gene in _additional[i]]
        _mt_gene_mask = [gene.startswith('MT-') for gene in _additional[i]]
        if len(_ribo_gene_mask)>0 and len(_mt_gene_mask)>0:
            _drop_id = np.array(np.array(_ribo_gene_mask) | np.array(_mt_gene_mask))
            _additional = _additional.sort_values(by='logfoldchanges_'+i, ascending=False)
            _additional = _additional.iloc[_drop_id==False, :].reset_index(drop=True)
        return _additional

    _pass_genes_up = pd.DataFrame()
    _pass_genes_down = pd.DataFrame()

    for i in set(_adata.uns[rank_key]['pvals_adj'].dtype.names):

        _pass_genes_up = pd.concat([_pass_genes_up, process_genes(i, "up")[i]], ignore_index=False, axis=1)
        _pass_genes_down = pd.concat([_pass_genes_down, process_genes(i, "down")[i]], ignore_index=False, axis=1)

        
    return _pass_genes_up, _pass_genes_down

In [None]:
sc.tl.rank_genes_groups(adata, groupby='cell type', key_added='rank',method = 'wilcoxon')

In [None]:
[pass_genes_up, pass_genes_down] = DE_to_df(adata, rank_key='rank', _padj_thresh = 0.01, _logfc_thresh = 2)

In [None]:
sc.tl.rank_genes_groups(adata, groupby='cell type', groups = ['Tex1','Tex2'],key_added='rank_ex',method = 'wilcoxon')

In [None]:
[pass_genes_up, pass_genes_down] = DE_to_df(adata, rank_key='rank_ex', _padj_thresh = 0.05, _logfc_thresh = 2)

In [None]:
print_full(pass_genes_up)#['Tex2'].dropna()

In [None]:
plt.rcParams.update({'font.size': 12, 'font.weight': 'heavy','axes.linewidth':2})
#plt.rcParams.update(plt.rcParamsDefault)

gene_list = ['CD3E','CD3G','CD3D','CD247','TRDC','CD4','CD8A','CD8B','CCR7','SELL','IL7R','TCF7','S1PR1','KLF2',
                    'ITGAE','ITGA1','ITGB2','KLRG1','KLRB1','KLRC1','KLRF1','KLRK1','GZMA','GZMB','GZMK','GZMH','CCL4','NKG7','PRF1',
                         'XCL1','CX3CR1','FCGR3A','FGFBP2','HSPA6','BAG3','TNF','IFNG','IL2','IL2RA','FOXP3','ENTPD1','CD274','PDCD1','CTLA4','LAG3','HAVCR2','TIGIT',
             'CD244','BTLA','CXCR5','CXCL13','IKZF2','RORC','IL17A',
             'GNLY','KIT','ZNF683','PRDM1','TBX21','ID3','BCL6','MKI67','FCER1G','TNFRSF9','TNFSF10','MYO7A','HLA-DRA','HLA-DRB5']
sc.pl.dotplot(adata,gene_list,groupby = 'cell type' , vmax = 1, 
              swap_axes = True ,dot_min = 0.12,standard_scale = 'var',figsize = (4,15), save= 'inference_dot_gd.png')
# Checked IL9, nothingf

In [None]:
print_keys = ['gender','tissue','site','stage','cell type']

In [None]:
plt.rcParams['figure.figsize'] = [8,10]
plt.rcParams.update({'font.size': 16, 'font.weight': 'bold','axes.linewidth':4})
for key in print_keys:
    cat_order = adata.obs[key].values.categories
    expand_subtype = pd.DataFrame()
    for i in set(adata.obs['tissue']):
        pdtemp = pd.DataFrame(adata[adata.obs['tissue'] == i,:].obs[key].value_counts(normalize = True))
        pdtemp = pdtemp.rename({'proportion': i }, axis='columns')
        expand_subtype = pd.concat([expand_subtype, pdtemp], ignore_index=False, axis=1)
    expand_subtype = expand_subtype.reindex(index =cat_order)
    expand_subtype.transpose().plot.bar(stacked=True,color = adata.uns[key+'_colors'],fontsize = 30 ).legend(loc='center left',bbox_to_anchor=(1.0, 0.5),fontsize = 15)
    plt.title(label =key+' breakdown', fontsize = 20)
    plt.tight_layout()
    plt.savefig(key+' breakdown in tissue gd reduced.png',dpi = 300)

In [None]:
plt.rcParams['figure.figsize'] = [8,7.2]

for key in print_keys:
    cat_order = adata.obs[key].values.categories
    expand_subtype = pd.DataFrame()
    for i in set(adata.obs['orig.ident']):
        pdtemp = pd.DataFrame(adata[adata.obs['orig.ident'] == i,:].obs[key].value_counts(normalize = True))
        pdtemp = pdtemp.rename({'proportion': i }, axis='columns')
        expand_subtype = pd.concat([expand_subtype, pdtemp], ignore_index=False, axis=1)
    expand_subtype = expand_subtype.reindex(index =cat_order)
    expand_subtype.transpose().plot.bar(stacked=True,color = adata.uns[key+'_colors'],fontsize = 20 ).legend(loc='center left',bbox_to_anchor=(1.0, 0.5),fontsize = 15)
    plt.title(label =key+' breakdown', fontsize = 20)
    plt.tight_layout()
    plt.savefig(key+' breakdown in patients gd reduced.png',dpi = 300)

In [None]:
plt.rcParams['figure.figsize'] = [8,7.2]

ID_tot = adata.obs['site'].values.categories
pdtemp = pd.DataFrame(adata.obs['site'].value_counts(normalize = True))
#pdtemp = pdtemp.rename({'proportion': i}, axis='columns')

pdtemp = pdtemp.reindex(index =ID_tot)
wedges, texts, autotexts =plt.pie([i[0] for i in pdtemp.values], colors = adata.uns['site_colors'],autopct='%1.2f%%',radius = 1)
for i, (patch, text, autotext) in enumerate(zip(wedges, texts, autotexts)):
    if i== 3:
        ang = (patch.theta2 + patch.theta1) / 2
        x = 0.8 * np.cos(np.deg2rad(ang))
        y = 0.8 * np.sin(np.deg2rad(ang))
        autotext.set_position((x, y))
    elif i == 4:
        ang = (patch.theta2 + patch.theta1) / 2
        x = 0.8 * np.cos(np.deg2rad(ang))
        y = 0.8 * np.sin(np.deg2rad(ang))
        autotext.set_position((x, y))
plt.legend(wedges, ID_tot,
          loc="center left",
            bbox_to_anchor=(1, 0, 0.5, 1), fontsize = 15)
plt.title(label = 'Sites', fontsize = 30)
plt.setp(autotexts, size=15, weight="bold", color = 'w')
plt.tight_layout()
plt.savefig('site_pie_gd_1.png',dpi = 300,bbox_inches='tight')

In [None]:
plt.rcParams['figure.figsize'] = [8,7.2]

ID_tot = adata.obs['tissue'].values.categories
pdtemp = pd.DataFrame(adata.obs['tissue'].value_counts(normalize = True))
#pdtemp = pdtemp.rename({'proportion': i}, axis='columns')

pdtemp = pdtemp.reindex(index =ID_tot)
wedges, texts, autotexts =plt.pie([i[0] for i in pdtemp.values], colors = adata.uns['tissue_colors'],autopct='%1.2f%%',radius = 1.2)
plt.legend(wedges, ID_tot,
          loc="center left",
            bbox_to_anchor=(1, 0, 0.5, 1), fontsize = 15)
plt.title(label = 'Tissue Type', fontsize = 30)
plt.setp(autotexts, size=25, weight="bold")
plt.tight_layout()
plt.savefig('tissue_pie_gd reduced.png',dpi = 300,bbox_inches='tight')

In [None]:
plt.rcParams['figure.figsize'] = [8,7.2]

ID_tot = adata.obs['stage'].values.categories
pdtemp = pd.DataFrame(adata.obs['stage'].value_counts(normalize = True))
#pdtemp = pdtemp.rename({'proportion': i}, axis='columns')

pdtemp = pdtemp.reindex(index =ID_tot)
wedges, texts, autotexts =plt.pie([i[0] for i in pdtemp.values], colors = adata.uns['stage_colors'],autopct='%1.2f%%',radius = 1)
for i, (patch, text, autotext) in enumerate(zip(wedges, texts, autotexts)):
    if i== 1:
        ang = (patch.theta2 + patch.theta1) / 2
        x = 0.7 * np.cos(np.deg2rad(ang))
        y = 0.7 * np.sin(np.deg2rad(ang))
        autotext.set_position((x, y))
    elif i == 2:
        ang = (patch.theta2 + patch.theta1) / 2
        x = 0.6 * np.cos(np.deg2rad(ang))
        y = 0.6 * np.sin(np.deg2rad(ang))
        autotext.set_position((x, y))
plt.legend(wedges, ID_tot,
          loc="center left",
            bbox_to_anchor=(1, 0, 0.5, 1), fontsize = 15)
plt.title(label = 'Stage', fontsize = 30)
plt.setp(autotexts, size=25, weight="bold", color = 'w')
plt.tight_layout()
plt.savefig('stage_pie_gd reduced.png',dpi = 300,bbox_inches='tight')

In [None]:
plt.rcParams['figure.figsize'] = [8,7.2]

ID_tot = adata.obs['gender'].values.categories
pdtemp = pd.DataFrame(adata.obs['gender'].value_counts(normalize = True))
#pdtemp = pdtemp.rename({'proportion': i}, axis='columns')

pdtemp = pdtemp.reindex(index =ID_tot)
wedges, texts, autotexts =plt.pie([i[0] for i in pdtemp.values], colors = adata.uns['gender_colors'],autopct='%1.2f%%',radius = 1)
for i, (patch, text, autotext) in enumerate(zip(wedges, texts, autotexts)):
    if i== 1:
        ang = (patch.theta2 + patch.theta1) / 2
        x = 0.7 * np.cos(np.deg2rad(ang))
        y = 0.7 * np.sin(np.deg2rad(ang))
        autotext.set_position((x, y))
    elif i == 2:
        ang = (patch.theta2 + patch.theta1) / 2
        x = 0.6 * np.cos(np.deg2rad(ang))
        y = 0.6 * np.sin(np.deg2rad(ang))
        autotext.set_position((x, y))
plt.legend(wedges, ID_tot,
          loc="center left",
            bbox_to_anchor=(1, 0, 0.5, 1), fontsize = 15)
plt.title(label = 'Gender', fontsize = 30)
plt.setp(autotexts, size=20, weight="bold", color = 'w')
plt.tight_layout()
plt.savefig('gender_pie_gd reduced.png',dpi = 300,bbox_inches='tight')

In [None]:
plt.rcParams['figure.figsize'] = [8,7.2]

ID_tot = adata.obs['orig.ident'].values.categories
pdtemp = pd.DataFrame(adata.obs['orig.ident'].value_counts(normalize = True))
#pdtemp = pdtemp.rename({'proportion': i}, axis='columns')

pdtemp = pdtemp.reindex(index =ID_tot)
wedges, texts, autotexts =plt.pie([i[0] for i in pdtemp.values], colors = adata.uns['orig.ident_colors'],autopct='%1.2f%%',radius = 1)
for i, (patch, text, autotext) in enumerate(zip(wedges, texts, autotexts)):
    if i== 4:
        ang = (patch.theta2 + patch.theta1) / 2
        x = 0.7 * np.cos(np.deg2rad(ang))
        y = 0.7 * np.sin(np.deg2rad(ang))
        autotext.set_position((x, y))
    elif i== 5:
        ang = (patch.theta2 + patch.theta1) / 2
        x = 0.8 * np.cos(np.deg2rad(ang))
        y = 0.8 * np.sin(np.deg2rad(ang))
        autotext.set_position((x, y))
    elif i == 6:
        ang = (patch.theta2 + patch.theta1) / 2
        x = 1 * np.cos(np.deg2rad(ang))
        y = 0.95 * np.sin(np.deg2rad(ang))
        autotext.set_position((x, y))
plt.legend(wedges, ID_tot,
          loc="center left",
            bbox_to_anchor=(1, 0, 0.5, 1), fontsize = 15)
plt.title(label = 'Data Sources', fontsize = 30)
plt.setp(autotexts, size=15, weight="bold",color = 'w')
plt.tight_layout()
plt.savefig('orig.ident_pie_gd_reduced2.png',dpi = 300,bbox_inches='tight')

In [None]:
plt.rcParams['figure.figsize'] = [8,7.2]

ID_tot = adata.obs['cell type'].values.categories
pdtemp = pd.DataFrame(adata.obs['cell type'].value_counts(normalize = True))
#pdtemp = pdtemp.rename({'proportion': i}, axis='columns')

pdtemp = pdtemp.reindex(index =ID_tot)
wedges, texts, autotexts =plt.pie([i[0] for i in pdtemp.values], colors = adata.uns['cell type_colors'],autopct='%1.2f%%',radius = 1)
for i, (patch, text, autotext) in enumerate(zip(wedges, texts, autotexts)):
    if i== 7:
        ang = (patch.theta2 + patch.theta1) / 2
        x = 0.8 * np.cos(np.deg2rad(ang))
        y = 0.8 * np.sin(np.deg2rad(ang))
        autotext.set_position((x, y))
    elif i == 6:
        ang = (patch.theta2 + patch.theta1) / 2
        x = 0.6 * np.cos(np.deg2rad(ang))
        y = 0.6 * np.sin(np.deg2rad(ang))
        autotext.set_position((x, y))
plt.legend(wedges, ID_tot,
          loc="center left",
            bbox_to_anchor=(1, 0, 0.5, 1), fontsize = 15)
plt.title(label = 'cell type', fontsize = 30)
plt.setp(autotexts, size=20, weight="bold",color = 'w')
plt.tight_layout()
plt.savefig('celltype_pie_gd_reduced.png',dpi = 300,bbox_inches='tight')

In [None]:
plt.rcParams.update({'font.size': 15, 'font.weight': 'heavy','axes.linewidth':2})
#plt.rcParams.update(plt.rcParamsDefault)

gene_list = dict({ 'IL':['IL17A','IL17F'], 
      'Regulators': ['RORC','RUNX1','IRF4','BATF','KLF4','STAT3'],
      'Sanchez et al' : ['IL23R','CCR6','MAF','AHR','CLEC7A','RORA','SLAMF1','BLK','JAML','CXCR6','DPP4'],
      'Tan et al': ['CD9','LGALS3','IL2RA','SOX13','S100A6','IL18R1','ICOS'],
})

sc.pl.dotplot(adata,gene_list,groupby = 'cell type' , vmax = 1, 
              swap_axes = True ,dot_min = 0.1,standard_scale = 'var',figsize = (4,12), save= 'IL17_secretion_signature.png')
# Checked IL9, nothing

In [None]:
adata_tissue = adata[adata.obs['tissue'].isin(['Carcinoma','Normal']),:]
adata_tissue = adata_tissue[adata_tissue.obs['cell type'].isin(['IL7R+ TRM']),:]
sc.pp.highly_variable_genes(adata_tissue, flavor='seurat', n_top_genes = 5000)

In [None]:
counts = adata_tissue.layers['counts'].T[adata_tissue.var['highly_variable'],:]
tissue = adata_tissue.obs['tissue'].astype('str')
colnames = adata_tissue.obs_names
rownames = adata_tissue.var_names[adata_tissue.var['highly_variable']]

In [None]:
%%R -i counts -i colnames -i rownames -i tissue
colnames(counts) = colnames
rownames(counts) = rownames

srat <- CreateSeuratObject(counts = counts, project = "gd", min.cells = 0, min.features = 0, assay = "RNA")
srat[['tissue']] = tissue
saveRDS(srat, file = "gd_for_DEG.rds")

In [None]:
terms = pd.read_csv('gd_tissue_DEG.csv')
terms['-logQ'] = -np.log10(terms['q_value'])
terms = terms[terms['-logQ']>0]
fc_thresh = 1
lowqval_de = terms[np.array(terms['-logQ']>=-np.log10(0.05)) & np.array(abs(terms['normalized_effect'])>=fc_thresh)]
terms['normalized_effect'] = terms['normalized_effect']*-1

In [None]:
##### Volcano plot of results
from adjustText import adjust_text
terms['-logQ'] = -np.log10(terms['q_value'])
terms = terms[terms['-logQ']>0]
fc_thresh = 1
plt.rcParams['figure.figsize'] = [8,8]
plt.rcParams.update({'font.size': 14, 'font.weight': 'bold','axes.linewidth':2})
lowqval_de = terms[np.array(terms['-logQ']>=-np.log10(0.05)) & np.array(abs(terms['normalized_effect'])>=fc_thresh)]
other_de = terms[np.array(terms['-logQ']<-np.log10(0.05)) | np.array(abs(terms['normalized_effect'])<fc_thresh)] #actually that's other genes (non sig de)

fig, ax = plt.subplots()
sb.regplot(other_de['normalized_effect'], other_de['-logQ'], fit_reg=False, scatter_kws={'s':50, 'edgecolors': 'black', 'linewidths': 0.5})
sb.regplot(lowqval_de['normalized_effect'], lowqval_de['-logQ'], fit_reg=False, scatter_kws={'s':50,'edgecolors': 'black', 'linewidths': 0.5})
ax.set_xlabel("Normalized log2 FC", fontsize=30)
ax.set_ylabel("-log Q-value", fontsize=30)
ax.tick_params(labelsize=15)
ax.set_title('IL7R+ TRM Carcinoma vs Normal',size = 25)
    
    
    
# Label names and positions
x = [i for i in lowqval_de['normalized_effect']]
y = [i*1.02 for i in lowqval_de['-logQ']]
labels = lowqval_de['gene_short_name']
# Show only some labels to avoid overcrowding the figure
to_keep = np.where((np.array(lowqval_de['gene_short_name'].str.startswith('RP'))
                    |np.array(lowqval_de['gene_short_name'].str.startswith('MT'))
                    |np.array(lowqval_de['gene_short_name'].str.startswith('IG'))
                     |(np.array(lowqval_de['normalized_effect']<fc_thresh*1.5)
                       &np.array(lowqval_de['normalized_effect']>0)
                           &np.array(lowqval_de['-logQ']<50))
                    |(np.array(abs(lowqval_de['normalized_effect'])<fc_thresh*1.5)
                      &np.array(lowqval_de['normalized_effect']<0)
                           &np.array(lowqval_de['-logQ']<50))
                   |np.array(lowqval_de['-logQ']<5) |np.array(lowqval_de['-logQ']>70))==0)

labels = labels.iloc[to_keep]
texts = []
for i,txt in enumerate(labels):
    if txt not in ['GZMB','GNLY','NKG7','GZMH','CD74','VCAM1','KLF2','CTLA4','ITGB2','IFNG',
               'ZNF683','FCER1G','GZMK','KLRG1','FGFBP2','FCGR3A','IL7R','PDCD1','CCL4','KLRK1',
              'CD81','CD82','CD83','NFKBIA']:
        texts.append(ax.text(x[to_keep[0][i]], y[to_keep[0][i]], txt,size = 11, color= '#904800', weight = 'bold'))
    
for i,txt in enumerate(labels):    
    if txt in ['GZMB','GNLY','NKG7','GZMH','CD74','VCAM1','KLF2','CTLA4','ITGB2','IFNG',
               'ZNF683','FCER1G','GZMK','KLRG1','FGFBP2','FCGR3A','IL7R','PDCD1','CCL4','KLRK1',
              'CD81','CD82','CD83','NFKBIA','HAVCR2']:
        texts.append(ax.text(x[to_keep[0][i]], y[to_keep[0][i]], txt,size = 15, color= '#0E46E2', weight = 'bold', bbox=dict(boxstyle="Square,pad=0.1", fc="white", ec="b", lw=2, alpha = 0.8)))
        
plt.ylim([-0.01,90])
plt.xlim([-3,7.8])
adjust_text(texts,arrowprops=dict(arrowstyle='-', color='red'))
#ax.xaxis.label.set_fontweight('bold')
#ax.yaxis.label.set_fontweight('bold')
#ax.yaxis.label.set_fontsize(22)
#ax.annotate(txt, (x[i], y[i]),size = 10,bbox=dict(boxstyle="Square,pad=0.3", fc="orange", ec="b", lw=2, alpha = 0.6))


plt.axhline(y=-np.log10(0.05), color="orange", linestyle="--")
plt.axvline(x = -fc_thresh, color="orange", linestyle="--")
plt.axvline(x = fc_thresh, color="orange", linestyle="--")
plt.savefig('TRM_gd_tissue_DE.png')
#plt.show()

In [None]:
adata_tissue = adata[adata.obs['tissue'].isin(['Carcinoma','Normal']),:]
adata_tissue = adata_tissue[adata_tissue.obs['cell type'].isin(['IL7R- TRM']),:]
sc.pp.highly_variable_genes(adata_tissue, flavor='seurat', n_top_genes = 5000)

In [None]:
sc.pl.violin(adata_tissue, 'IFNG', groupby = 'tissue')

In [None]:
counts = adata_tissue.layers['counts'].T[adata_tissue.var['highly_variable'],:]
tissue = adata_tissue.obs['tissue'].astype('str')
colnames = adata_tissue.obs_names
rownames = adata_tissue.var_names[adata_tissue.var['highly_variable']]

In [None]:
%%R -i counts -i colnames -i rownames -i tissue
colnames(counts) = colnames
rownames(counts) = rownames

srat <- CreateSeuratObject(counts = counts, project = "gd", min.cells = 0, min.features = 0, assay = "RNA")
srat[['tissue']] = tissue
saveRDS(srat, file = "IL7RnegTRM_gd_for_DEG.rds")

In [None]:
terms = pd.read_csv('IL7RnegTRM_gd_for_DEG.csv')
terms['-logQ'] = -np.log10(terms['q_value'])
terms = terms[terms['-logQ']>0]
fc_thresh = 1
lowqval_de = terms[np.array(terms['-logQ']>=-np.log10(0.05)) & np.array(abs(terms['normalized_effect'])>=fc_thresh)]
terms['normalized_effect'] = terms['normalized_effect']*-1

In [None]:
##### Volcano plot of results
from adjustText import adjust_text
terms['-logQ'] = -np.log10(terms['q_value'])
terms = terms[terms['-logQ']>0]
fc_thresh = 1
plt.rcParams['figure.figsize'] = [8,8]
plt.rcParams.update({'font.size': 14, 'font.weight': 'bold','axes.linewidth':2})
lowqval_de = terms[np.array(terms['-logQ']>=-np.log10(0.05)) & np.array(abs(terms['normalized_effect'])>=fc_thresh)]
other_de = terms[np.array(terms['-logQ']<-np.log10(0.05)) | np.array(abs(terms['normalized_effect'])<fc_thresh)] #actually that's other genes (non sig de)

fig, ax = plt.subplots()
sb.regplot(other_de['normalized_effect'], other_de['-logQ'], fit_reg=False, scatter_kws={'s':50, 'edgecolors': 'black', 'linewidths': 0.5})
sb.regplot(lowqval_de['normalized_effect'], lowqval_de['-logQ'], fit_reg=False, scatter_kws={'s':50,'edgecolors': 'black', 'linewidths': 0.5})
ax.set_xlabel("Normalized log2 FC", fontsize=30)
ax.set_ylabel("-log Q-value", fontsize=30)
ax.tick_params(labelsize=15)
ax.set_title('IL7R- TRM Carcinoma vs Normal',size = 30)
    
    
    
# Label names and positions
x = [i for i in lowqval_de['normalized_effect']]
y = [i*1.02 for i in lowqval_de['-logQ']]
labels = lowqval_de['gene_short_name']
# Show only some labels to avoid overcrowding the figure
to_keep = np.where((np.array(lowqval_de['gene_short_name'].str.startswith('RP'))
                    |np.array(lowqval_de['gene_short_name'].str.startswith('MT'))
                     |(np.array(lowqval_de['normalized_effect']<fc_thresh*2)
                       &np.array(lowqval_de['normalized_effect']>0)
                           &np.array(lowqval_de['-logQ']<50))
                    |(np.array(abs(lowqval_de['normalized_effect'])<fc_thresh*1.5)
                      &np.array(lowqval_de['normalized_effect']<0)
                           &np.array(lowqval_de['-logQ']<50))
                   |np.array(lowqval_de['-logQ']<20)|np.array(lowqval_de['-logQ']>200) )==0)

labels = labels.iloc[to_keep]
texts = []
for i,txt in enumerate(labels):
    if txt in ['GZMB','GNLY','NKG7','GZMH','CD74','VCAM1','KLF2','CTLA4','ITGB2','IFNG',
               'ZNF683','FCER1G','GZMK','KLRG1','FGFBP2','FCGR3A','IL7R','PDCD1','CCL4','KLRK1',
              'CD81','CD82','CD83']:
        texts.append(ax.text(x[to_keep[0][i]], y[to_keep[0][i]], txt,size = 15, color= '#0E46E2', weight = 'bold', bbox=dict(boxstyle="Square,pad=0.1", fc="white", ec="b", lw=2, alpha = 0.8)))
    else:
        texts.append(ax.text(x[to_keep[0][i]], y[to_keep[0][i]], txt,size = 11, color= '#904800', weight = 'bold'))
plt.ylim([-0.01,220])
adjust_text(texts,arrowprops=dict(arrowstyle='-', color='red'))
#ax.xaxis.label.set_fontweight('bold')
#ax.yaxis.label.set_fontweight('bold')
#ax.yaxis.label.set_fontsize(22)
#ax.annotate(txt, (x[i], y[i]),size = 10,bbox=dict(boxstyle="Square,pad=0.3", fc="orange", ec="b", lw=2, alpha = 0.6))


plt.axhline(y=-np.log10(0.05), color="orange", linestyle="--")
plt.axvline(x = -fc_thresh, color="orange", linestyle="--")
plt.axvline(x = fc_thresh, color="orange", linestyle="--")
plt.savefig('IL7RnegTRM_gd_tissue_DE.png')
#plt.show()

In [None]:
adata_tissue = adata[adata.obs['tissue'].isin(['Carcinoma','Normal']),:]
adata_tissue = adata_tissue[adata_tissue.obs['cell type'].isin(['Tpex']),:]
sc.pp.highly_variable_genes(adata_tissue, flavor='seurat', n_top_genes = 5000)

In [None]:
counts = adata_tissue.layers['counts'].T[adata_tissue.var['highly_variable'],:]
tissue = adata_tissue.obs['tissue'].astype('str')
colnames = adata_tissue.obs_names
rownames = adata_tissue.var_names[adata_tissue.var['highly_variable']]

In [None]:
%%R -i counts -i colnames -i rownames -i tissue
colnames(counts) = colnames
rownames(counts) = rownames

srat <- CreateSeuratObject(counts = counts, project = "gd", min.cells = 0, min.features = 0, assay = "RNA")
srat[['tissue']] = tissue
saveRDS(srat, file = "Tpex_gd_for_DEG.rds")

In [None]:
terms = pd.read_csv('Tpex_gd_tissue_DEG.csv')
terms['-logQ'] = -np.log10(terms['q_value'])
terms = terms[terms['-logQ']>0]
fc_thresh = 1
lowqval_de = terms[np.array(terms['-logQ']>=-np.log10(0.05)) & np.array(abs(terms['normalized_effect'])>=fc_thresh)]
terms['normalized_effect'] = terms['normalized_effect']*-1

In [None]:
##### Volcano plot of results
from adjustText import adjust_text
terms['-logQ'] = -np.log10(terms['q_value'])
terms = terms[terms['-logQ']>0]
fc_thresh = 1
plt.rcParams['figure.figsize'] = [8,8]
plt.rcParams.update({'font.size': 14, 'font.weight': 'bold','axes.linewidth':2})
lowqval_de = terms[np.array(terms['-logQ']>=-np.log10(0.05)) & np.array(abs(terms['normalized_effect'])>=fc_thresh)]
other_de = terms[np.array(terms['-logQ']<-np.log10(0.05)) | np.array(abs(terms['normalized_effect'])<fc_thresh)] #actually that's other genes (non sig de)

fig, ax = plt.subplots()
sb.regplot(other_de['normalized_effect'], other_de['-logQ'], fit_reg=False, scatter_kws={'s':50, 'edgecolors': 'black', 'linewidths': 0.5})
sb.regplot(lowqval_de['normalized_effect'], lowqval_de['-logQ'], fit_reg=False, scatter_kws={'s':50,'edgecolors': 'black', 'linewidths': 0.5})
ax.set_xlabel("Normalized log2 FC", fontsize=30)
ax.set_ylabel("-log Q-value", fontsize=30)
ax.tick_params(labelsize=15)
ax.set_title('Tpex Carcinoma vs Normal',size = 30)
    
    
    
# Label names and positions
x = [i for i in lowqval_de['normalized_effect']]
y = [i*1.02 for i in lowqval_de['-logQ']]
labels = lowqval_de['gene_short_name']
# Show only some labels to avoid overcrowding the figure
to_keep = np.where((np.array(lowqval_de['gene_short_name'].str.startswith('RP'))
                    |np.array(lowqval_de['gene_short_name'].str.startswith('MT'))
                     |(np.array(lowqval_de['normalized_effect']<fc_thresh*2)
                       &np.array(lowqval_de['normalized_effect']>0)
                           &np.array(lowqval_de['-logQ']<50))
                    |(np.array(abs(lowqval_de['normalized_effect'])<fc_thresh*1.5)
                      &np.array(lowqval_de['normalized_effect']<0)
                           &np.array(lowqval_de['-logQ']<50))
                   |np.array(lowqval_de['-logQ']<2)|np.array(lowqval_de['-logQ']>200) )==0)

labels = labels.iloc[to_keep]
texts = []
for i,txt in enumerate(labels):
    if txt in ['GZMB','GNLY','NKG7','GZMH','CD74','VCAM1','KLF2','CTLA4','ITGB2','IFNG',
               'ZNF683','FCER1G','GZMK','KLRG1','FGFBP2','FCGR3A','IL7R','PDCD1','CCL4','KLRK1',
              'CD81','CD82','CD83']:
        texts.append(ax.text(x[to_keep[0][i]], y[to_keep[0][i]], txt,size = 15, color= '#0E46E2', weight = 'bold', bbox=dict(boxstyle="Square,pad=0.1", fc="white", ec="b", lw=2, alpha = 0.8)))
    else:
        texts.append(ax.text(x[to_keep[0][i]], y[to_keep[0][i]], txt,size = 11, color= '#904800', weight = 'bold'))
plt.ylim([-0.01,30])
adjust_text(texts,arrowprops=dict(arrowstyle='-', color='red'))
#ax.xaxis.label.set_fontweight('bold')
#ax.yaxis.label.set_fontweight('bold')
#ax.yaxis.label.set_fontsize(22)
#ax.annotate(txt, (x[i], y[i]),size = 10,bbox=dict(boxstyle="Square,pad=0.3", fc="orange", ec="b", lw=2, alpha = 0.6))


plt.axhline(y=-np.log10(0.05), color="orange", linestyle="--")
plt.axvline(x = -fc_thresh, color="orange", linestyle="--")
plt.axvline(x = fc_thresh, color="orange", linestyle="--")
plt.savefig('Tpex_gd_tissue_DEG.png')
#plt.show()

In [None]:
##### Volcano plot of results
from adjustText import adjust_text
terms['-logQ'] = -np.log10(terms['q_value'])
terms = terms[terms['-logQ']>0]
fc_thresh = 1
plt.rcParams['figure.figsize'] = [8,8]
plt.rcParams.update({'font.size': 14, 'font.weight': 'bold','axes.linewidth':2})
lowqval_de = terms[np.array(terms['-logQ']>=-np.log10(0.05)) & np.array(abs(terms['normalized_effect'])>=fc_thresh)]
other_de = terms[np.array(terms['-logQ']<-np.log10(0.05)) | np.array(abs(terms['normalized_effect'])<fc_thresh)] #actually that's other genes (non sig de)

fig, ax = plt.subplots()
sb.regplot(other_de['normalized_effect'], other_de['-logQ'], fit_reg=False, scatter_kws={'s':50, 'edgecolors': 'black', 'linewidths': 0.5})
sb.regplot(lowqval_de['normalized_effect'], lowqval_de['-logQ'], fit_reg=False, scatter_kws={'s':50,'edgecolors': 'black', 'linewidths': 0.5})
ax.set_xlabel("Normalized log2 FC", fontsize=30)
ax.set_ylabel("-log Q-value", fontsize=30)
ax.tick_params(labelsize=15)
ax.set_title('Tex1 Carcinoma vs Normal',size = 30)
    
    
    
# Label names and positions
x = [i for i in lowqval_de['normalized_effect']]
y = [i*1.02 for i in lowqval_de['-logQ']]
labels = lowqval_de['gene_short_name']
# Show only some labels to avoid overcrowding the figure
to_keep = np.where((np.array(lowqval_de['gene_short_name'].str.startswith('RP'))
                    |np.array(lowqval_de['gene_short_name'].str.startswith('MT'))
                     |(np.array(lowqval_de['normalized_effect']<fc_thresh*2)
                       &np.array(lowqval_de['normalized_effect']>0)
                           &np.array(lowqval_de['-logQ']<50))
                    |(np.array(abs(lowqval_de['normalized_effect'])<fc_thresh*1.5)
                      &np.array(lowqval_de['normalized_effect']<0)
                           &np.array(lowqval_de['-logQ']<50))
                   |np.array(lowqval_de['-logQ']<2)|np.array(lowqval_de['-logQ']>200) )==0)

labels = labels.iloc[to_keep]
texts = []
for i,txt in enumerate(labels):
    if txt in ['GZMB','GNLY','NKG7','GZMH','CD74','VCAM1','KLF2','CTLA4','ITGB2','IFNG',
               'ZNF683','FCER1G','GZMK','KLRG1','FGFBP2','FCGR3A','IL7R','PDCD1','CCL4','KLRK1',
              'CD81','CD82','CD83']:
        texts.append(ax.text(x[to_keep[0][i]], y[to_keep[0][i]], txt,size = 15, color= '#0E46E2', weight = 'bold', bbox=dict(boxstyle="Square,pad=0.1", fc="white", ec="b", lw=2, alpha = 0.8)))
    else:
        texts.append(ax.text(x[to_keep[0][i]], y[to_keep[0][i]], txt,size = 11, color= '#904800', weight = 'bold'))

adjust_text(texts,arrowprops=dict(arrowstyle='-', color='red'))
#ax.xaxis.label.set_fontweight('bold')
#ax.yaxis.label.set_fontweight('bold')
#ax.yaxis.label.set_fontsize(22)
#ax.annotate(txt, (x[i], y[i]),size = 10,bbox=dict(boxstyle="Square,pad=0.3", fc="orange", ec="b", lw=2, alpha = 0.6))

plt.ylim([-0.01,30])
plt.axhline(y=-np.log10(0.05), color="orange", linestyle="--")
plt.axvline(x = -fc_thresh, color="orange", linestyle="--")
plt.axvline(x = fc_thresh, color="orange", linestyle="--")
plt.savefig('Tex1_gd_tissue_DEG.png')
#plt.show()

In [None]:
adata_tissue = adata[adata.obs['tissue'].isin(['Carcinoma','Normal']),:]
adata_tissue = adata_tissue[adata_tissue.obs['cell type'].isin(['Poised Teff 1']),:]
sc.pp.highly_variable_genes(adata_tissue, flavor='seurat', n_top_genes = 5000)

In [None]:
counts = adata_tissue.layers['counts'].T[adata_tissue.var['highly_variable'],:]
tissue = adata_tissue.obs['tissue'].astype('str')
colnames = adata_tissue.obs_names
rownames = adata_tissue.var_names[adata_tissue.var['highly_variable']]

In [None]:
%%R -i counts -i colnames -i rownames -i tissue
colnames(counts) = colnames
rownames(counts) = rownames

srat <- CreateSeuratObject(counts = counts, project = "gd", min.cells = 0, min.features = 0, assay = "RNA")
srat[['tissue']] = tissue
saveRDS(srat, file = "Teff1_gd_for_DEG.rds")

In [None]:
terms = pd.read_csv('Teff1_gd_tissue_DEG.csv')
terms['-logQ'] = -np.log10(terms['q_value'])
terms = terms[terms['-logQ']>0]
fc_thresh = 1
lowqval_de = terms[np.array(terms['-logQ']>=-np.log10(0.05)) & np.array(abs(terms['normalized_effect'])>=fc_thresh)]
terms['normalized_effect'] = terms['normalized_effect']*-1

In [None]:
##### Volcano plot of results
from adjustText import adjust_text
terms['-logQ'] = -np.log10(terms['q_value'])
terms = terms[terms['-logQ']>0]
fc_thresh = 1
plt.rcParams['figure.figsize'] = [8,8]
plt.rcParams.update({'font.size': 14, 'font.weight': 'bold','axes.linewidth':2})
lowqval_de = terms[np.array(terms['-logQ']>=-np.log10(0.05)) & np.array(abs(terms['normalized_effect'])>=fc_thresh)]
other_de = terms[np.array(terms['-logQ']<-np.log10(0.05)) | np.array(abs(terms['normalized_effect'])<fc_thresh)] #actually that's other genes (non sig de)

fig, ax = plt.subplots()
sb.regplot(other_de['normalized_effect'], other_de['-logQ'], fit_reg=False, scatter_kws={'s':50, 'edgecolors': 'black', 'linewidths': 0.5})
sb.regplot(lowqval_de['normalized_effect'], lowqval_de['-logQ'], fit_reg=False, scatter_kws={'s':50,'edgecolors': 'black', 'linewidths': 0.5})
ax.set_xlabel("Normalized log2 FC", fontsize=30)
ax.set_ylabel("-log Q-value", fontsize=30)
ax.tick_params(labelsize=15)
ax.set_title('Teff1 Carcinoma vs Normal',size = 30)
    
    
    
# Label names and positions
x = [i for i in lowqval_de['normalized_effect']]
y = [i*1.02 for i in lowqval_de['-logQ']]
labels = lowqval_de['gene_short_name']
# Show only some labels to avoid overcrowding the figure
to_keep = np.where((np.array(lowqval_de['gene_short_name'].str.startswith('RP'))
                    |np.array(lowqval_de['gene_short_name'].str.startswith('MT'))
                     |(np.array(lowqval_de['normalized_effect']<fc_thresh*1)
                       &np.array(lowqval_de['normalized_effect']>0)
                           &np.array(lowqval_de['-logQ']<50))
                    |(np.array(abs(lowqval_de['normalized_effect'])<fc_thresh*1.5)
                      &np.array(lowqval_de['normalized_effect']<0)
                           &np.array(lowqval_de['-logQ']<50))
                   |np.array(lowqval_de['-logQ']<2)|np.array(lowqval_de['-logQ']>200) )==0)

labels = labels.iloc[to_keep]
texts = []
for i,txt in enumerate(labels):
    if txt in ['GZMB','GNLY','NKG7','GZMH','CD74','VCAM1','KLF2','CTLA4','ITGB2','IFNG',
               'ZNF683','FCER1G','GZMK','KLRG1','FGFBP2','FCGR3A','IL7R','PDCD1','CCL4','KLRK1','HAVCR2','KLRC2',
              'CD81','CD82','CD83']:
        texts.append(ax.text(x[to_keep[0][i]], y[to_keep[0][i]], txt,size = 15, color= '#0E46E2', weight = 'bold', bbox=dict(boxstyle="Square,pad=0.1", fc="white", ec="b", lw=2, alpha = 0.8)))
    else:
        texts.append(ax.text(x[to_keep[0][i]], y[to_keep[0][i]], txt,size = 11, color= '#904800', weight = 'bold'))
plt.ylim([-0.01,30])
plt.xlim([-4,4])
adjust_text(texts,arrowprops=dict(arrowstyle='-', color='red'))
#ax.xaxis.label.set_fontweight('bold')
#ax.yaxis.label.set_fontweight('bold')
#ax.yaxis.label.set_fontsize(22)
#ax.annotate(txt, (x[i], y[i]),size = 10,bbox=dict(boxstyle="Square,pad=0.3", fc="orange", ec="b", lw=2, alpha = 0.6))


plt.axhline(y=-np.log10(0.05), color="orange", linestyle="--")
plt.axvline(x = -fc_thresh, color="orange", linestyle="--")
plt.axvline(x = fc_thresh, color="orange", linestyle="--")
plt.savefig('Teff1_gd_tissue_DEG.png')
#plt.show()

In [None]:
adata_tissue = adata[adata.obs['tissue'].isin(['Carcinoma','Normal']),:]
adata_tissue = adata_tissue[adata_tissue.obs['cell type'].isin(['Poised Teff 2']),:]
sc.pp.highly_variable_genes(adata_tissue, flavor='seurat', n_top_genes = 5000)

In [None]:
counts = adata_tissue.layers['counts'].T[adata_tissue.var['highly_variable'],:]
tissue = adata_tissue.obs['tissue'].astype('str')
colnames = adata_tissue.obs_names
rownames = adata_tissue.var_names[adata_tissue.var['highly_variable']]

In [None]:
%%R -i counts -i colnames -i rownames -i tissue
colnames(counts) = colnames
rownames(counts) = rownames

srat <- CreateSeuratObject(counts = counts, project = "gd", min.cells = 0, min.features = 0, assay = "RNA")
srat[['tissue']] = tissue
saveRDS(srat, file = "Teff2_gd_for_DEG.rds")

In [None]:
terms = pd.read_csv('Teff2_gd_tissue_DEG.csv')
terms['-logQ'] = -np.log10(terms['q_value'])
terms = terms[terms['-logQ']>0]
fc_thresh = 1
lowqval_de = terms[np.array(terms['-logQ']>=-np.log10(0.05)) & np.array(abs(terms['normalized_effect'])>=fc_thresh)]
terms['normalized_effect'] = terms['normalized_effect']*-1

In [None]:
##### Volcano plot of results
from adjustText import adjust_text
terms['-logQ'] = -np.log10(terms['q_value'])
terms = terms[terms['-logQ']>0]
fc_thresh = 1
plt.rcParams['figure.figsize'] = [8,8]
plt.rcParams.update({'font.size': 14, 'font.weight': 'bold','axes.linewidth':2})
lowqval_de = terms[np.array(terms['-logQ']>=-np.log10(0.05)) & np.array(abs(terms['normalized_effect'])>=fc_thresh)]
other_de = terms[np.array(terms['-logQ']<-np.log10(0.05)) | np.array(abs(terms['normalized_effect'])<fc_thresh)] #actually that's other genes (non sig de)

fig, ax = plt.subplots()
sb.regplot(other_de['normalized_effect'], other_de['-logQ'], fit_reg=False, scatter_kws={'s':50, 'edgecolors': 'black', 'linewidths': 0.5})
sb.regplot(lowqval_de['normalized_effect'], lowqval_de['-logQ'], fit_reg=False, scatter_kws={'s':50,'edgecolors': 'black', 'linewidths': 0.5})
ax.set_xlabel("Normalized log2 FC", fontsize=30)
ax.set_ylabel("-log Q-value", fontsize=30)
ax.tick_params(labelsize=15)
ax.set_title('Teff2 Carcinoma vs Normal',size = 30)
    
    
    
# Label names and positions
x = [i for i in lowqval_de['normalized_effect']]
y = [i*1.02 for i in lowqval_de['-logQ']]
labels = lowqval_de['gene_short_name']
# Show only some labels to avoid overcrowding the figure
to_keep = np.where((np.array(lowqval_de['gene_short_name'].str.startswith('RP'))
                    |np.array(lowqval_de['gene_short_name'].str.startswith('MT'))
                     |np.array(lowqval_de['gene_short_name'].str.startswith('IG'))
                     |(np.array(lowqval_de['normalized_effect']<fc_thresh*1)
                       &np.array(lowqval_de['normalized_effect']>0)
                           &np.array(lowqval_de['-logQ']<50))
                    |(np.array(abs(lowqval_de['normalized_effect'])<fc_thresh*2.5)
                      &np.array(lowqval_de['normalized_effect']<0)
                           &np.array(lowqval_de['-logQ']<50))
                   |np.array(lowqval_de['-logQ']<2)|np.array(lowqval_de['-logQ']>20) )==0)

labels = labels.iloc[to_keep]
texts = []
for i,txt in enumerate(labels):
    if txt in ['GZMB','GNLY','NKG7','GZMH','CD74','VCAM1','KLF2','CTLA4','ITGB2','IFNG',
               'ZNF683','FCER1G','GZMK','KLRG1','FGFBP2','FCGR3A','IL7R','PDCD1','CCL4','KLRK1','CD44',
              'CD81','CD82','CD83','CCL5']:
        texts.append(ax.text(x[to_keep[0][i]], y[to_keep[0][i]], txt,size = 15, color= '#0E46E2', weight = 'bold', bbox=dict(boxstyle="Square,pad=0.1", fc="white", ec="b", lw=2, alpha = 0.8)))
    else:
        texts.append(ax.text(x[to_keep[0][i]], y[to_keep[0][i]], txt,size = 11, color= '#904800', weight = 'bold'))
plt.ylim([-0.01,30])
plt.xlim([-5,4])

adjust_text(texts,min_arrow_len = 2,force_pull = (0.1,0.1), max_move = (5,5),arrowprops=dict(arrowstyle='-', color='red'))
#ax.xaxis.label.set_fontweight('bold')
#ax.yaxis.label.set_fontweight('bold')
#ax.yaxis.label.set_fontsize(22)
#ax.annotate(txt, (x[i], y[i]),size = 10,bbox=dict(boxstyle="Square,pad=0.3", fc="orange", ec="b", lw=2, alpha = 0.6))


plt.axhline(y=-np.log10(0.05), color="orange", linestyle="--")
plt.axvline(x = -fc_thresh, color="orange", linestyle="--")
plt.axvline(x = fc_thresh, color="orange", linestyle="--")
plt.savefig('Teff2_gd_tissue_DEG.png')
#plt.show()