In [None]:
#import the library
import scanpy as sc
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib import colors
import seaborn as sb
from gprofiler import GProfiler
import seaborn as sns
import rpy2.rinterface_lib.callbacks
import logging
import tensorflow as tf
import os

from rpy2.robjects import pandas2ri
import anndata2ri

import importlib
import warnings
warnings.filterwarnings("ignore")
import pickle as pkl
from matplotlib.colors import LinearSegmentedColormap

In [None]:
def print_full(x):
    pd.set_option('display.max_rows', len(x))
    print(x)
    pd.reset_option('display.max_rows')

In [None]:
#This pallete is for colorblinds
my_palette = ['#0351A8','#8CB0E0','#D56D11','#FFBB78','#234E08','#53CB8B','#D30083','#CB788D','#4E195A','#C58CCF','#AA290F','#B03FD1','#E8BCCF','#64605F','#B2AD9A','#D2D30B','#D1BD4F','#06DCF2','#9EDAE5','#517219','#5B43CF','#D92F24','#FFD900','#002F33','#B8A3A3']

In [None]:
from matplotlib.colors import LinearSegmentedColormap
values = [0,1]
colors = [(227, 227, 227), (255, 42, 18)]
norm = plt.Normalize(min(values), max(values))
my_cmap = LinearSegmentedColormap.from_list(
    '', [(norm(value), tuple(np.array(color) / 255)) for value, color in zip(values, colors)])

In [None]:
# Ignore R warning messages
#Note: this can be commented out to get more verbose R output
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)

pandas2ri.activate()
anndata2ri.activate()
%load_ext rpy2.ipython

In [None]:
plt.rcParams['figure.figsize']=(8,8) #rescale figures
sc.settings.verbosity = 3
sc.logging.print_versions()

In [None]:
%%R
# Load libraries from correct lib Paths for my environment - ignore this!
.libPaths(.libPaths('C:\\Users\\16220\\AppData\\Local\\R\\win-library\\4.3'))
library(Seurat)

In [None]:
raw_table = pd.read_csv('F:/CRC/AAA_USELESS/GSE108989/GSE108989_CRC.TCell.S11138.count.txt',sep = '\t',index_col = 1)
del raw_table['geneID']

In [None]:
raw_table = raw_table.T

In [None]:
import anndata
adata = anndata.AnnData(X = raw_table.values)
adata.var_names = [str(i) for i in raw_table.columns]
adata.obs_names = raw_table.index

In [None]:
adata.var_names_make_unique()

In [None]:
adata_17_raw = adata[adata_17.obs_names,:]

In [None]:
colnames = adata_17_raw.obs_names
rownames = adata_17_raw.var_names
counts = adata_17_raw.X.T

In [None]:
%%R -i colnames -i rownames -i counts
colnames(counts) = colnames
rownames(counts) = rownames

srat <- CreateSeuratObject(counts = counts, project = "GSE108989", min.cells = 0, min.features = 0, assay = "RNA")

In [None]:
%R saveRDS(srat, 'GSE108989_T17.rds')

In [None]:
adata.obs['n_counts'] = adata.X.sum(1)
adata.obs['log_counts'] = np.log(adata.obs['n_counts'])
adata.obs['n_genes'] = (adata.X > 0).sum(1)
adata.obs['log10GenesPerUMI'] = np.log10(adata.obs['n_genes'])/np.log10(adata.obs['n_counts']) #This is the complexity

In [None]:
%matplotlib inline
t1 = sc.pl.violin(adata, 'n_counts', size=2, log=True, cut=0)
t2 = sc.pl.violin(adata, 'n_genes')

In [None]:
sc.pp.normalize_total(adata, key_added = 'normalization_factors')
sc.pp.log1p(adata)

In [None]:
sc.pp.highly_variable_genes(adata, flavor='seurat', n_top_genes=3000)
sc.pp.pca(adata, n_comps=50, use_highly_variable=True, svd_solver='arpack')

In [None]:
sc.pp.neighbors(adata, n_neighbors = 15, n_pcs = 50)
sc.tl.leiden(adata, resolution = 0.8, key_added= 'leiden')                                                 

In [None]:
sc.tl.tsne(adata)

In [None]:
%matplotlib inline
sc.pl.tsne(adata, color=['leiden','CD3D','CD3E','CD3G','IFNG','RORC','IL17A','IL17F','CD4','CD8A'],cmap = my_cmap)

In [None]:
adata.write('GSE108989.h5ad')

In [None]:
bioinfo = pd.read_excel('bioinfo.xlsx',index_col = 0).T

In [None]:
ID = pd.read_csv('ID.tsv', sep = '\t', index_col = 0)

In [None]:
adata.obs['patient'] = ID.loc[adata.obs_names,:]['Patient_ID']

In [None]:
adata.obs['patient'] = [i.split('_')[1] for i in adata.obs['sample_origin']]

In [None]:
adata.obs['tissue'] = 'Carcinoma'

In [None]:
gender_dict = dict(zip(bioinfo.index, bioinfo['Gender']))
adata.obs['gender'] = [gender_dict[i] for i in adata.obs['patient']]

In [None]:
age_dict = dict(zip(bioinfo.index, bioinfo['Age']))
adata.obs['age'] = [age_dict[i] for i in adata.obs['patient']]

In [None]:
site_dict = dict(zip(bioinfo.index, bioinfo['Histological type']))
adata.obs['site'] = [site_dict[i] for i in adata.obs['patient']]

In [None]:
T_dict = dict(zip(bioinfo.index, bioinfo['pTNM: T']))
adata.obs['TNM_T'] = [T_dict[i] for i in adata.obs['patient']]

In [None]:
N_dict = dict(zip(bioinfo.index, bioinfo['pTNM: N']))
adata.obs['TNM_N'] = [N_dict[i] for i in adata.obs['patient']]

In [None]:
M_dict = dict(zip(bioinfo.index, bioinfo['pTNM: M']))
adata.obs['TNM_M'] = [M_dict[i] for i in adata.obs['patient']]

In [None]:
stage_dict = dict(zip(bioinfo.index, bioinfo['Stage']))
adata.obs['stage'] = [stage_dict[i] for i in adata.obs['patient']]

In [None]:
sc.pl.tsne(adata,color = ['patient','tissue','gender','age','site','TNM_T','TNM_N','TNM_M'])

In [None]:
sc.pl.tsne(adata,color = ['leiden','RORC','IL17A'], cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata, restrict_to = ('leiden', ['7']), resolution = 0.3, key_added= 'leiden1')
sc.pl.tsne(adata, color=['leiden1','RORC','IL17A','IL17F'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata, restrict_to = ('leiden1', ['6']), resolution = 0.6, key_added= 'leiden2')
sc.pl.tsne(adata, color=['leiden2','RORC','IL17A','IL17F'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata, restrict_to = ('leiden2', ['2']), resolution = 0.6, key_added= 'leiden3')
sc.pl.tsne(adata, color=['leiden3','RORC','IL17A','IL17F'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
plt.rcParams['figure.figsize'] = [8,8]
sc.tl.leiden(adata, restrict_to = ('leiden3', ['0']), resolution = 0.8, key_added= 'leiden4')
sc.pl.tsne(adata, color=['leiden4','RORC','IL17A','IL17F'], size = 20, legend_loc = 'on data', cmap = my_cmap)

In [None]:
IL17_genesmask = [gene.startswith("IL17") and gene.startswith("IL17R")==0 for gene in adata.var_names]
IL17_genes = adata.var_names[IL17_genesmask]
IL17_exp_set ={
    'cytokine': IL17_genes, 'TF': 'RORC'
}
sc.pl.dotplot(adata,IL17_exp_set,groupby = 'leiden4' , vmax = 1, swap_axes = False, dot_min =0.1, dot_max =1,standard_scale = 'var')

In [None]:
adata_17 = adata[adata.obs['leiden4'].isin(['0,0','0,1','0,2','0,6','0,7','2,5','2,6','6,0','6,1','7,2']),:]

In [None]:
del adata_17.uns

In [None]:
sc.pp.highly_variable_genes(adata_17, flavor='seurat', n_top_genes=3000)
sc.pp.pca(adata_17, n_comps=50, use_highly_variable=True, svd_solver='arpack')

In [None]:
sc.pp.neighbors(adata_17, n_neighbors = 15, n_pcs = 50)
sc.tl.leiden(adata_17, resolution = 1, key_added= 'leiden')                                                 

In [None]:
sc.tl.tsne(adata_17)

In [None]:
%matplotlib inline
sc.pl.tsne(adata_17, color=['leiden','RORC','IL17A','IL17F'],size = 40,cmap = my_cmap)

In [None]:
adata_17_refined = adata_17[adata_17.obs['leiden'].isin(['0'])==0,:]

In [None]:
adata_17.write('GSE108989_T17.h5ad')

In [None]:
adata_17 = sc.read_h5ad('GSE108989_T17.h5ad')

In [None]:
adata.write('GSE108989.h5ad')

In [None]:
from matplotlib.colors import LinearSegmentedColormap
values = [0,1]
colors = [(227, 227, 227), (255, 42, 18)]
norm = plt.Normalize(min(values), max(values))
my_cmap = LinearSegmentedColormap.from_list(
    '', [(norm(value), tuple(np.array(color) / 255)) for value, color in zip(values, colors)])

In [None]:
#adata = sc.read_h5ad('GSE108989.h5ad')
adata_17 = sc.read_h5ad('GSE108989_T17.h5ad')
#adata_gd = sc.read_h5ad('GSE108989_T17.h5ad')

In [None]:
adata.obs['IL17 secreting selected'] = '0'
adata.obs['IL17 secreting selected'][adata.obs_names.isin(adata_17.obs_names)] = '1'

In [None]:
adata.obs['gdT selected'] = '0'
adata.obs['gdT selected'][adata.obs_names.isin(adata_gd.obs_names)] = '1'

In [None]:
plt.close()
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = [8,8]
fig = sc.pl.tsne(adata, color=['RORC','IL17A','IL17F','IL17 secreting selected'],
                 size =20, ncols = 2, palette = ['#E3E3E3', '#FF2A12'], cmap = my_cmap, return_fig = True, legend_fontsize = 'large', vmax = 4)
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
fig.savefig('17_selected.png',dpi = 300,bbox_inches='tight') 

In [None]:
plt.close()
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['figure.figsize'] = [8,12]
fig = sc.pl.tsne(adata, color=['CD3E','CD3D','CD3G','CD247','TRDC','gdT selected'],
                 size =20, ncols = 2, palette = ['#E3E3E3', '#FF2A12'], cmap = my_cmap, return_fig = True, legend_fontsize = 'large')
ax = fig.get_axes()
for i in range(0,len(ax)):
    ax[i].xaxis.label.set_fontsize(22)
    ax[i].xaxis.label.set_fontweight('bold')
    ax[i].yaxis.label.set_fontsize(22)
    ax[i].title.set_fontsize(30)
    ax[i].yaxis.label.set_fontweight('bold')
    ax[i].title.set_fontweight('bold')
fig.savefig('gd_selected.png',dpi = 300,bbox_inches='tight') 