In [None]:
#import the library
import scanpy as sc
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib import colors
import seaborn as sb
from gprofiler import GProfiler
import seaborn as sns
import rpy2.rinterface_lib.callbacks
import logging
import tensorflow as tf
import scipy.sparse
import os

from rpy2.robjects import pandas2ri
import anndata2ri

import importlib
import warnings
warnings.filterwarnings("ignore")
import pickle as pkl
from matplotlib.colors import LinearSegmentedColormap

In [None]:
from matplotlib.colors import LinearSegmentedColormap
values = [0,1]
colors = [(227, 227, 227), (255, 42, 18)]
norm = plt.Normalize(min(values), max(values))
my_cmap = LinearSegmentedColormap.from_list(
    '', [(norm(value), tuple(np.array(color) / 255)) for value, color in zip(values, colors)])

In [None]:
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)
pandas2ri.activate()
anndata2ri.activate()
%load_ext rpy2.ipython

In [None]:
plt.rcParams['figure.figsize']=(8,8) #rescale figures
sc.settings.verbosity = 3
#sc.set_figure_params(dpi=200, dpi_save=300)
sc.logging.print_versions()

In [None]:
%%R
.libPaths(.libPaths('win-library\\4.3'))

library(scran)
library(Seurat)
library(RColorBrewer)
library(slingshot)
library(monocle)
library(gam)
library(ggplot2)
library(plyr)
library(MAST)
library(clusterExperiment)
library(monocle3)
library(SeuratWrappers)
library(magrittr)
library(dplyr)

In [None]:
%%R 
srat_combined = readRDS('F:/CRC/GSE188711_LeftnRight/integrated_data.rds')
HVG = VariableFeatures(srat_combined)

In [None]:
%R mat <- srat_combined@assays$integrated@scale.data

In [None]:
%%R
#srat_combined <- RunPCA(srat_combined)
pca <- srat_combined[["pca"]]

# Get the total variance:
total_variance <- sum(matrixStats::rowVars(mat))

eigValues = (pca@stdev)^2  ## EigenValues
varExplained = eigValues / total_variance

PCs = Loadings(srat_combined, reduction = "pca")

In [None]:
%%R -o logcounts -o counts
merged = JoinLayers(srat_combined@assays$RNA)
counts = merged@layers$counts
logcounts = merged@layers$data
#counts = srat_combined@assays$integrated@counts

In [None]:
%%R -o features -o HVG -o varExplained -o PCs -o mat -o obs_names -o PC_embeddings -o sample_origin
features = rownames(merged)
obs_names = colnames(merged)
HVG = rownames(srat_combined@assays$integrated)
PC_embeddings = srat_combined@reductions$pca@cell.embeddings
sample_origin = srat_combined@meta.data$orig.ident

In [None]:
import anndata
adata = anndata.AnnData(X = logcounts.T)
adata.var_names = features
adata.obs_names = obs_names
adata.uns['scaled'] = mat.T
adata.layers['counts'] = counts.T
#adata.uns['residuals_genes'] = HVG
adata.uns['residuals_genes'] = list(HVG)
adata.layers['logcounts'] = logcounts.T
adata.obsm['X_pca'] = PC_embeddings
adata.obs['sample_origin'] = sample_origin

In [None]:
adata.obs['n_counts'] = adata.layers['counts'].sum(1)
adata.obs['n_genes'] = (adata.layers['counts'] > 0).sum(1)
adata.var['highly_variable'] = adata.var_names.isin(HVG)
adata.uns['pca'] = dict({'variance_ratio': varExplained})

In [None]:
mt_gene_mask = [gene.startswith('MT-') for gene in adata.var_names]
adata.obs['mt_frac'] = np.array(adata.layers['counts'][:, mt_gene_mask].sum(1).ravel())[0]/adata.obs['n_counts']

In [None]:
ribo_gene_mask = [gene.startswith('RPL') or gene.startswith('RPS') for gene in adata.var_names]
adata.obs['ribo_frac'] = np.array(adata.layers['counts'][:, ribo_gene_mask].sum(1).ravel())[0]/adata.obs['n_counts']

In [None]:
sc.pp.neighbors(adata, n_pcs = 50)

In [None]:
sc.tl.leiden(adata, resolution = 0.8, key_added= 'leiden')

In [None]:
sc.pl.pca(adata, color=['leiden','CD4'], legend_loc = 'on data')

In [None]:
%matplotlib inline
sc.tl.tsne(adata)
#plt.close()
plt.rcParams['axes.linewidth'] = 2
sc.pl.tsne(adata, color=['leiden','CD3E','CD3G','CD3D','TRDC','ICOS','CD4','CD8A','CD8B','FOXP3','IFNG'],legend_loc = 'on data',cmap = my_cmap)

In [None]:
adata.write('F:/CRC/GSE188711_LeftnRight/integrated.h5ad')

In [None]:
adata = sc.read_h5ad('F:/CRC/AA_Done/GSE188711_LeftnRight/integrated.h5ad')

In [None]:
bioinfo = pd.read_csv('F:/CRC/AA_Done/GSE188711_LeftnRight/bioinfo.csv')

In [None]:
patient_dict = dict(zip(bioinfo['GSM'], bioinfo['Patient No.']))
patient = [patient_dict[i] for i in adata.obs['sample_origin']]

In [None]:
gender_dict = dict(zip(bioinfo['GSM'], bioinfo['Sex']))
gender = [gender_dict[i] for i in adata.obs['sample_origin']]

In [None]:
age_dict = dict(zip(bioinfo['GSM'], bioinfo['Age']))
age = [age_dict[i] for i in adata.obs['sample_origin']]

In [None]:
site_dict = dict(zip(bioinfo['GSM'], bioinfo['Location']))
site = [site_dict[i] for i in adata.obs['sample_origin']]

In [None]:
T_dict = dict(zip(bioinfo['GSM'], bioinfo['AJCC Stage']))
TNM_T = [T_dict[i] for i in adata.obs['sample_origin']]

In [None]:
N_dict = dict(zip(bioinfo['GSM'], bioinfo['LN Metastasis']))
TNM_N = [N_dict[i] for i in adata.obs['sample_origin']]

In [None]:
M_dict = dict(zip(bioinfo['GSM'], bioinfo['Distant Metastasis']))
TNM_M = [M_dict[i] for i in adata.obs['sample_origin']]

In [None]:
stage_dict = dict(zip(bioinfo['GSM'], bioinfo['AJCC Stage']))
adata.obs['stage'] = [stage_dict[i] for i in adata.obs['sample_origin']]

In [None]:
adata.obs['patient'] = patient
adata.obs['tissue'] = 'CRC'
adata.obs['gender'] = gender
adata.obs['age'] = age
adata.obs['site'] = site
adata.obs['TNM_T'] = TNM_T
adata.obs['TNM_N'] = TNM_N
adata.obs['TNM_M'] = TNM_M

In [None]:
adata.obs['gender'] = adata.obs['gender'].replace({'F':'Female','M':'Male'})

In [None]:
sc.pl.tsne(adata,color = ['patient','tissue','gender','age','site','TNM_T','TNM_N','TNM_M','stage'])

In [None]:
sc.pl.tsne(adata,color = ['leiden','RORC','IL17A'], cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata, restrict_to = ('leiden', ['2']), resolution = 0.6, key_added= 'leiden1')
sc.pl.tsne(adata, color=['leiden1','RORC','IL17A','IL17F'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata, restrict_to = ('leiden1', ['6']), resolution = 0.3, key_added= 'leiden2')
sc.pl.tsne(adata, color=['leiden2','RORC','IL17A','IL17F'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata, restrict_to = ('leiden2', ['15']), resolution = 0.3, key_added= 'leiden3')
sc.pl.tsne(adata, color=['leiden3','RORC','IL17A','IL17F'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata, restrict_to = ('leiden3', ['18']), resolution = 0.2, key_added= 'leiden4')
sc.pl.tsne(adata, color=['leiden4','RORC','IL17A','IL17F'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
IL17_genesmask = [gene.startswith("IL17") and gene.startswith("IL17R")==0 for gene in adata.var_names]
IL17_genes = adata.var_names[IL17_genesmask]
IL17_exp_set ={
    'cytokine': IL17_genes, 'TF': 'RORC'
}
sc.pl.dotplot(adata,IL17_exp_set,groupby = 'leiden4' , vmax = 1, swap_axes = False, dot_min =0.1, dot_max =1,standard_scale = 'var')

In [None]:
potential_17 = adata[adata.obs['leiden4'].isin(['2,1','2,4','6,0','15,2','18,2']),:]

In [None]:
potential_17.write('GSE188711_LeftnRight_T17.h5ad')

In [None]:
potential_gd = adata[adata.obs['leiden'].isin(['6','15']),:]

In [None]:
sc.pp.highly_variable_genes(potential_gd, flavor='seurat', n_top_genes=3000)
sc.pp.pca(potential_gd, n_comps=50, use_highly_variable=True, svd_solver='arpack')

In [None]:
sc.pp.neighbors(potential_gd, n_neighbors = 15, n_pcs = 50)
sc.tl.leiden(potential_gd, resolution = 1, key_added= 'leiden')                                                 

In [None]:
sc.tl.tsne(potential_gd)

In [None]:
%matplotlib inline
sc.pl.tsne(potential_gd, color=['leiden','CD3E','CD3G','CD3D','TRDC','ICOS','CD4','CD8A','CD8B','FOXP3'],cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_gd, restrict_to = ('leiden', ['1']), resolution = 0.8, key_added= 'leiden1')
sc.pl.tsne(potential_gd, color=['leiden1','CD3E','CD3G','CD3D','TRDC'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_gd, restrict_to = ('leiden1', ['6']), resolution = 0.6, key_added= 'leiden2')
sc.pl.tsne(potential_gd, color=['leiden2','CD3E','CD3G','CD3D','TRDC'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_gd, restrict_to = ('leiden2', ['10']), resolution = 0.3, key_added= 'leiden3')
sc.pl.tsne(potential_gd, color=['leiden3','CD3E','CD3G','CD3D','TRDC'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_gd, restrict_to = ('leiden3', ['3']), resolution = 0.5, key_added= 'leiden4')
sc.pl.tsne(potential_gd, color=['leiden4','CD3E','CD3G','CD3D','TRDC'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(potential_gd, restrict_to = ('leiden3', ['6']), resolution = 0.5, key_added= 'leiden4')
sc.pl.tsne(potential_gd, color=['leiden4','CD3E','CD3G','CD3D','TRDC'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
gamma_genesmask = [gene.startswith("TRG") for gene in potential_gd.var_names]
gamma_genes = potential_gd.var_names[gamma_genesmask]
delta_genesmask = [gene.startswith("TRD") for gene in potential_gd.var_names]
delta_genes = potential_gd.var_names[delta_genesmask]

In [None]:
TCR_exp_set ={
"gamma": gamma_genes, "delta": delta_genes, "CD3S": ['CD3E','CD3D','CD3G']
}
sc.pl.dotplot(potential_gd,TCR_exp_set,groupby = 'leiden2' , vmax = 1, swap_axes = False, dot_min =0, dot_max =1,standard_scale = 'var')

In [None]:
gd = potential_gd[potential_gd.obs['leiden2'].isin(['1,1','1,4','3','6,4','7']),:]

In [None]:
sc.pl.dotplot(gd,TCR_exp_set,groupby = 'leiden2' , vmax = 1, swap_axes = False, dot_min =0, dot_max =1,standard_scale = 'var')

In [None]:
gd.write('F:/CRC/GSE188711_LeftnRight/GSE188711_LeftnRight_gd.h5ad')

In [None]:
sc.pl.tsne(gd,color = ['RORC','IL17A'],cmap = my_cmap)

In [None]:
adata_gd = sc.read_h5ad('F:/CRC/AA_Done/GSE188711_LeftnRight/GSE188711_LeftnRight_gd.h5ad')

In [None]:
bioinfo = pd.read_csv('F:/CRC/AA_Done/GSE188711_LeftnRight/bioinfo.csv')

In [None]:
patient_dict = dict(zip(bioinfo['GSM'], bioinfo['Patient No.']))
patient = [patient_dict[i] for i in adata_gd.obs['sample_origin']]

In [None]:
gender_dict = dict(zip(bioinfo['GSM'], bioinfo['Sex']))
gender = [gender_dict[i] for i in adata_gd.obs['sample_origin']]

In [None]:
age_dict = dict(zip(bioinfo['GSM'], bioinfo['Age']))
age = [age_dict[i] for i in adata_gd.obs['sample_origin']]

In [None]:
site_dict = dict(zip(bioinfo['GSM'], bioinfo['Location']))
site = [site_dict[i] for i in adata_gd.obs['sample_origin']]

In [None]:
T_dict = dict(zip(bioinfo['GSM'], bioinfo['AJCC Stage']))
TNM_T = [T_dict[i] for i in adata_gd.obs['sample_origin']]

In [None]:
N_dict = dict(zip(bioinfo['GSM'], bioinfo['LN Metastasis']))
TNM_N = [N_dict[i] for i in adata_gd.obs['sample_origin']]

In [None]:
M_dict = dict(zip(bioinfo['GSM'], bioinfo['Distant Metastasis']))
TNM_M = [M_dict[i] for i in adata_gd.obs['sample_origin']]

In [None]:
stage_dict = dict(zip(bioinfo['GSM'], bioinfo['AJCC Stage']))
adata_gd.obs['stage'] = [stage_dict[i] for i in adata_gd.obs['sample_origin']]

In [None]:
adata_gd.obs['patient'] = patient
adata_gd.obs['tissue'] = 'CRC'
adata_gd.obs['gender'] = gender
adata_gd.obs['age'] = age
adata_gd.obs['site'] = site
adata_gd.obs['TNM_T'] = TNM_T
adata_gd.obs['TNM_N'] = TNM_N
adata_gd.obs['TNM_M'] = TNM_M

In [None]:
adata_gd.obs['gender'] = adata_gd.obs['gender'].replace({'F':'Female','M':'Male'})

In [None]:
sc.pl.tsne(adata_gd,color = ['patient','tissue','gender','age','site','TNM_T','TNM_N','TNM_M','stage'])

In [None]:
adata_gd.obs[['patient','tissue','gender','age','site','TNM_T','TNM_N','TNM_M','stage']].to_csv('F:/CRC/AA_Done/GSE188711_LeftnRight_gd.csv')

In [None]:
adata_gd.write('F:/CRC/AA_Done/GSE188711_LeftnRight/GSE188711_LeftnRight_gd.h5ad')

In [None]:
from matplotlib.colors import LinearSegmentedColormap
values = [0,1]
colors = [(227, 227, 227), (255, 42, 18)]
norm = plt.Normalize(min(values), max(values))
my_cmap = LinearSegmentedColormap.from_list(
    '', [(norm(value), tuple(np.array(color) / 255)) for value, color in zip(values, colors)])

In [None]:
adata = sc.read_h5ad('integrated.h5ad')
adata_17 = sc.read_h5ad('GSE188711_LeftnRight_T17.h5ad')
adata_gd = sc.read_h5ad('GSE188711_LeftnRight_gd.h5ad')

In [None]:
adata.obs['IL17 secreting selected'] = '0'
adata.obs['IL17 secreting selected'][adata.obs_names.isin(adata_17.obs_names)] = '1'

In [None]:
adata.obs['gdT selected'] = '0'
adata.obs['gdT selected'][adata.obs_names.isin(adata_gd.obs_names)] = '1'

In [None]:
plt.close()
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = [8,8]
fig = sc.pl.tsne(adata, color=['RORC','IL17A','IL17F','IL17 secreting selected'],
                 size =10, ncols = 2, palette = ['#E3E3E3', '#FF2A12'], cmap = my_cmap, return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
fig.savefig('17_selected.png',dpi = 300,bbox_inches='tight') 

In [None]:
plt.close()
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = [8,8]
fig = sc.pl.tsne(adata, color=['CD3E','CD3D','CD3G','CD247','TRDC','gdT selected'],
                 size =10, ncols = 2, palette = ['#E3E3E3', '#FF2A12'], cmap = my_cmap, return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
fig.savefig('gd_selected.png',dpi = 300,bbox_inches='tight') 