# Gene expression & surface protein TIL 

## Load adata annotated 

In [None]:
# Libraries
import anndata as ad
import matplotlib as plt
import numpy as np
import pandas as pd
import sc_atlas_helpers as ah
import scanpy as sc
from matplotlib.pyplot import rc_context
from scipy.stats import median_abs_deviation

In [None]:
from functools import partial

import altair as alt

In [None]:
import seaborn as sns

In [None]:
import decoupler as dc

In [None]:
resDir = "/data/projects/2021/MicrobialMetabolites/single-cell-sorted-cd8/results/40_gex_surface_prot/"
inputDir = "/data/projects/2021/MicrobialMetabolites/single-cell-sorted-cd8/results/40_gex_surface_prot/"

In [None]:
adata = sc.read_h5ad(f"{resDir}adata_merged_til_annotated_new.h5ad")

### Functions

In [None]:
def is_outlier(adata, metric: str, nmads: int):
    M = adata.obs[metric]
    outlier = (M < np.median(M) - nmads * median_abs_deviation(M)) | (
        np.median(M) + nmads * median_abs_deviation(M) < M
    )
    return outlier

### QC clean

In [None]:
# mitochondrial genes
adata.var["mt"] = adata.var_names.str.startswith("mt-")
# ribosomal genes
adata.var["ribo"] = adata.var_names.str.startswith(("Rps", "Rpl"))


In [None]:
sc.pp.calculate_qc_metrics(
    adata, qc_vars=["mt", "ribo"], inplace=True, percent_top=[20], log1p=True
)
adata

In [None]:
sc.pl.violin(adata, "pct_counts_mt")

In [None]:
adata.obs["outlier"] = (
    is_outlier(adata, "log1p_total_counts", 5)
    | is_outlier(adata, "log1p_n_genes_by_counts", 5)
    | is_outlier(adata, "pct_counts_in_top_20_genes", 5)
)
adata.obs.outlier.value_counts()

In [None]:
adata.obs["mt_outlier"] = is_outlier(adata, "pct_counts_mt", 3) | (
    adata.obs["pct_counts_mt"] > 1.5
)
adata.obs.mt_outlier.value_counts()

In [None]:
print(f"Total number of cells: {adata.n_obs}")
adata = adata[(~adata.obs.outlier) & (~adata.obs.mt_outlier)].copy()

print(f"Number of cells after filtering of low quality cells: {adata.n_obs}")

In [None]:
sc.pl.violin(adata, "pct_counts_mt")

In [None]:
# Visualize
sc.pl.umap(adata, color='joint_leiden')

In [None]:
adata.obs.sample_id.value_counts()

### Split adata

In [None]:
adata_til1 = adata[adata.obs["joint_leiden"].isin(["2","5"])]

In [None]:
adata_til2 = adata[adata.obs["joint_leiden"].isin(["0","1","3","4","6"])]

In [None]:
adata_til2.var["ribo"] = adata.var_names.str.startswith(("Rps", "Rpl"))

In [None]:
adata_til1.var["ribo"] = adata.var_names.str.startswith(("Rps", "Rpl"))

In [None]:
adata_til1 = adata_til1[:, adata_til1.var['ribo'] == False].copy()

In [None]:
adata_til2 = adata_til2[:, adata_til2.var['ribo'] == False].copy()

### Working on TIL1 subset

In [None]:
sc.pp.neighbors(adata_til1, n_neighbors=10)
sc.tl.leiden(adata_til1, resolution=0.32, n_iterations=-1)
sc.tl.umap(adata_til1)

In [None]:
sc.pl.umap(adata_til1, color = "leiden")

In [None]:
annotation_dict = {
    '2':'MPEC_Progenitor',
    '0':'MPEC_Intermediate',
    '1':'MPEC_Effector',}

In [None]:
## Add cell type column based on annotation
adata_til1.obs['cell_type'] = [annotation_dict[clust] for clust in adata_til1.obs['leiden']]

# Visualize
sc.pl.umap(adata_til1, color='cell_type')

In [None]:
markers = {
    "Early A.": ["Cd69"],
    "Late A.": ["Il2ra"],
        "Naive": ["Sell"],
        "E.Memory": ["Cd44"],

       "MPEC": ["Il7r"],#Memory Precursor Effector Cells
       "SPEC": ["Klrg1","Tbx21"], #Short Lived Effector Cells
 
    "Exhaustion":["Havcr2","Entpd1","Tox"],
    "Cytotoxic":["Gzmb","Gzmk","Ifng"],
    "Sel-renewal":["Cxcr3","Casp3"],
    }

In [None]:
adata_til1

In [None]:
sc.pl.dotplot(adata_til1, groupby="cell_type",categories_order = [ 'MPEC_Progenitor', 'MPEC_Intermediate','MPEC_Effector'],var_names=markers, dendrogram=False,log=True)

In [None]:
marker_list = ["Cd69","Il2ra","Sell","Cd44","Il7r","Klrg1","Tbx21","Havcr2","Entpd1","Tox","Gzmb","Gzmk","Ifng","Cxcr3","Casp3"]

In [None]:
sc.pl.umap(
    adata_til1,
    color=marker_list,
    cmap="Reds",
    frameon=False,add_outline=True, vmax="p99"
)

### Working on TIL2 subset

In [None]:
sc.pp.neighbors(adata_til2, n_neighbors=10)
sc.tl.leiden(adata_til2, resolution=0.5,  n_iterations=-1)
sc.tl.umap(adata_til2)

In [None]:
sc.pl.umap(adata_til2, color = "leiden")

In [None]:
annotation_dict = {
    '0':'SLEC_Effector',
    '1':'SLEC_Effector',
    '2':'SLEC_Inf',
    '3':'SLEC_Terminal',
    '4':'SLEC_Plastic',
    '5':'SLEC_Terminal',
    '6':"SLEC_Progenitor",
    '7':'SLEC_Inf',
    '8':'SLEC_Intermediate',
    '9':'SLEC_Intermediate',
    '10':'SLEC_Terminal',
    '11':'SLEC_Terminal',
    '12':'SLEC_Terminal',
    '13':'SLEC_Terminal',
    '14':'SLEC_Terminal'}

In [None]:
sc.pl.umap(adata_til2, color='rna_leiden')

In [None]:
## Add cell type column based on annotation
adata_til2.obs['cell_type'] = [annotation_dict[clust] for clust in adata_til2.obs['rna_leiden']]

# Visualize
sc.pl.umap(adata_til2, color='cell_type')

In [None]:
sc.pl.dotplot(adata_til2, groupby="cell_type",categories_order = ['SLEC_Progenitor', 'SLEC_Plastic',
  'SLEC_Intermediate', 'SLEC_Inf','SLEC_Effector', 
 'SLEC_Terminal'],var_names=markers, dendrogram=False,log=True)

In [None]:
marker_list = ["Cd69","Il2ra","Sell","Cd44","Il7r","Klrg1","Tbx21","Havcr2","Entpd1","Tox","Gzmb","Gzmk","Ifng","Cxcr3","Casp3"]

In [None]:
sc.pl.umap(
    adata_til2,
    color=marker_list,
    cmap="Reds",
    frameon=False,
    vmax="p99",add_outline=True
)

### Reintegrate annotated subsets

In [None]:
adata.obs["cell_type"] = adata.obs["cell_type"].astype("str")
adata.obs.loc[adata_til2.obs.index, "cell_type"] = adata_til2.obs["cell_type"].astype("str")

In [None]:
adata.obs["cell_type"] = adata.obs["cell_type"].astype("str")
adata.obs.loc[adata_til1.obs.index, "cell_type"] = adata_til1.obs["cell_type"].astype("str")

In [None]:
sc.pl.umap(adata, color='cell_type')#, legend_loc="on data",legend_fontsize=5)

In [None]:
adata.obs["cell_type2"] = adata.obs["cell_type"] 

In [None]:
set(adata.obs["cell_type2"])

In [None]:
set(adata.obs["cell_type"])

In [None]:
adata.obs["cell_type2"].replace('Plastic', 'SLEC_Plastic', inplace=True)
adata.obs["cell_type2"].replace('Tex_prog', 'SLEC_Progenitor', inplace=True)
adata.obs["cell_type2"].replace('Tex_int', 'SLEC_Intermediate', inplace=True)
adata.obs["cell_type2"].replace('Interferon_response', 'SLEC_Inf', inplace=True)
adata.obs["cell_type2"].replace('Tex_eff', 'SLEC_Effector', inplace=True) 
adata.obs["cell_type2"].replace('Tex_term', 'SLEC_Terminal', inplace=True)
adata.obs["cell_type2"].replace('Zc3h7a', 'SLEC_Terminal', inplace=True)

In [None]:
sc.pl.umap(adata, color=['cell_type2'], legend_loc ="on data",sort_order=True, legend_fontsize=6)

In [None]:
markers = {
    "Early A.": ["Cd69"],
    "Late A.": ["Il2ra"],
        "Naive": ["Sell"],
        "E.Memory": ["Cd44"],

       "MPEC": ["Il7r"],#Memory Precursor Effector Cells
       "SPEC": ["Klrg1","Tbx21"], #Short Lived Effector Cells
 
    "Exhaustion":["Havcr2","Entpd1","Tox"],
    "Cytotoxic":["Gzmb","Gzmk","Ifng"],
    "Sel-renewal":["Cxcr3","Casp3"],
    }

In [None]:
sc.pl.dotplot(adata, groupby="cell_type2",categories_order = [ 'MPEC_Progenitor', 'MPEC_Intermediate','MPEC_Effector', 'SLEC_Progenitor', 'SLEC_Plastic',
  'SLEC_Intermediate', 'SLEC_Inf','SLEC_Effector', 
 'SLEC_Terminal'],var_names=markers, dendrogram=False,log=True)

In [None]:
marker_list = ["Cd69","Il2ra","Sell","Cd44","Il7r","Klrg1","Tbx21","Havcr2","Entpd1","Tox","Gzmb","Gzmk","Ifng","Cxcr3","Casp3"]

In [None]:
sc.pl.umap(
    adata,
    color=marker_list,
    cmap="Reds",
    frameon=False,
    vmax="p99",add_outline=True
)

### Save annotated adata TIL 

In [None]:
#adata.write_h5ad("/data/projects/2021/MicrobialMetabolites/single-cell-sorted-cd8/results/adata_merged_til_slec_mpec_annotation_tryneighbors.h5ad", )

In [None]:
#adata= sc.read_h5ad("/data/projects/2021/MicrobialMetabolites/single-cell-sorted-cd8/results/adata_merged_til_slec_mpec_annotation_tryneighbors.h5ad")

## PAGA analysis

In [None]:
# Libraries
import anndata as ad
import matplotlib as plt
import matplotlib.pyplot as pl
import numpy as np
import pandas as pd
#import sc_atlas_helpers as ah
import scanpy as sc
from matplotlib.pyplot import rc_context
from scipy.stats import median_abs_deviation
from scipy.stats import mode
from collections import Counter

In [None]:
sc.pp.neighbors(adata, n_neighbors=7, n_pcs=20)

In [None]:
sc.tl.leiden(adata)

In [None]:
sc.tl.louvain(adata)

In [None]:
sc.pl.umap(adata, color=['leiden','louvain'], sort_order=True)

In [None]:
#adata.write_h5ad("adata_neighbors.h5ad")
#adata = sc.read_h5ad("adata_neighbors.h5ad")

In [None]:
sc.tl.paga(adata)

In [None]:
sc.pl.umap(adata, color=['leiden','louvain'], sort_order=True)

In [None]:
sc.pl.umap(adata, color=['cell_type','cell_type2'], sort_order=True)

In [None]:
sc.tl.diffmap(adata)

In [None]:
sc.pp.neighbors(adata, n_neighbors=15, use_rep='X_diffmap')

In [None]:
adata.obs['clusters'] = adata.obs['cell_type']

In [None]:
sc.tl.paga(adata, groups='clusters')

In [None]:
sc.pl.paga(adata, threshold=0.02, edge_width_scale=1, layout='fr', random_state=0)

In [None]:
pos = adata.uns['paga']['pos']

In [None]:
sc.pl.paga(adata, threshold=0.02, edge_width_scale=1, layout='fr', pos=pos)

In [None]:
sc.pl.umap(adata, color='clusters', legend_loc='on data')

In [None]:
axs = sc.pl.paga_compare(
    adata, title='', right_margin=0.2, size=10,
    threshold=0.02,
    legend_fontsize=8, fontsize=8, frameon=False, show=False)

In [None]:
adata.uns['iroot'] = np.flatnonzero(adata.obs['clusters'] == 'MPEC_Progenitor')[0]

In [None]:
sc.tl.dpt(adata)

In [None]:
sc.pl.umap(adata, color=['cell_type2','dpt_pseudotime'], vmax="p99", cmap = "viridis",legend_fontsize=8)

In [None]:
## colon & til 

In [None]:
path = "/data/projects/2021/MicrobialMetabolites/single-cell-sorted-cd8/results/solo"

In [None]:
adata_solo = sc.read_h5ad(f"{path}/adata_nodoublet.h5ad")