In [None]:
import scirpy as ir

In [None]:
from glob import glob
import warnings
import tarfile

import anndata
import matplotlib.pyplot as plt
import pandas as pd
import scanpy as sc

In [None]:
import muon as mu

In [None]:

samples = {
    "10mix1": {"group": "10mix"},
    "10mix2": {"group": "10mix"},
    "11mix1": {"group": "11mix"},
    "11mix2": {"group": "11mix"},
    "GF1": {"group": "GF"},
    "GF2": {"group": "GF"},

}    
    
# Create a list of AnnData objects (one for each sample)
adatas_tcr = {}
adatas_gex = {}
for sample, sample_meta in samples.items():
    adata_gex = sc.read_10x_h5(f"/data/projects/2021/MicrobialMetabolites/single-cell-sorted-cd8/2019-10-29_sorted_cd8/analyses_icbi/{sample}/outs/per_sample_outs/{sample}/count/sample_filtered_feature_bc_matrix.h5")
    adata_tcr = ir.io.read_10x_vdj(f"/data/projects/2021/MicrobialMetabolites/single-cell-sorted-cd8/2019-10-29_sorted_cd8/analyses_icbi/{sample}/outs/per_sample_outs/{sample}/vdj_t/filtered_contig_annotations.csv")
    # concatenation only works with unique gene names
    adata_gex.var_names_make_unique()
    adatas_tcr[sample] = adata_tcr
    adatas_gex[sample] = adata_gex

In [None]:

samples = {
    "10mix1": {"group": "10mix"},
    "10mix2": {"group": "10mix"},
    "11mix1": {"group": "11mix"},
    "11mix2": {"group": "11mix"},
    "GF1": {"group": "GF"},
    "GF2": {"group": "GF"},

}    
    
# Create a list of AnnData objects (one for each sample)
adatas_tcr = {}
adatas_gex = {}
for sample, sample_meta in samples.items():
    adata_gex = sc.read_10x_h5(f"/data/projects/2021/MicrobialMetabolites/single-cell-sorted-cd8/2019-10-29_sorted_cd8/analyses_icbi/{sample}/outs/per_sample_outs/{sample}/count/sample_filtered_feature_bc_matrix.h5")
    adata_tcr = ir.io.read_10x_vdj(f"/data/projects/2021/MicrobialMetabolites/single-cell-sorted-cd8/2019-10-29_sorted_cd8/analyses_icbi/{sample}/outs/per_sample_outs/{sample}/vdj_t/filtered_contig_annotations.csv")
    # concatenation only works with unique gene names
    adata_gex.var_names_make_unique()
    adatas_tcr[sample] = adata_tcr
    adatas_gex[sample] = adata_gex

In [None]:
# Merge anndata objects
adata_gex = anndata.concat(adatas_gex, index_unique="_")
adata_tcr = anndata.concat(adatas_tcr, index_unique="_")
mdata = mu.MuData({"gex": adata_gex, "airr": adata_tcr})

# Set global metadata on `mdata.obs`
mdata.obs["sample"] = mdata.obs_names.to_series().str.split("_", expand=True)[1]
mdata.obs["group"] = mdata.obs["sample"].map(lambda x: samples[x]["group"])

In [None]:
sc.pp.filter_genes(mdata["gex"], min_cells=10)
sc.pp.filter_cells(mdata["gex"], min_genes=100)

In [None]:
sc.pp.normalize_per_cell(mdata["gex"])
sc.pp.log1p(mdata["gex"])
sc.pp.highly_variable_genes(mdata["gex"], flavor="cell_ranger", n_top_genes=5000)
sc.tl.pca(mdata["gex"])
sc.pp.neighbors(mdata["gex"])

In [None]:
sc.tl.umap(mdata["gex"])

In [None]:
sc.tl.leiden(mdata["gex"])

In [None]:
mdata["gex"].var

In [None]:
mdata["gex"].obs

In [None]:
mdata["gex"].obs['sample_id'] = mdata["gex"].obs.index.str.split('_').str[1]

In [None]:
def update_columns(row):
   
    if "ICI" not in row['sample_id']:
        row['origin'] = "colon"
    else:
         row['origin'] = "til"
        
    return row

# Apply the function to each row in the DataFrame
mdata["gex"].obs= mdata["gex"].obs.apply(update_columns, axis=1)

In [None]:
mdata.update()

In [None]:
sc.pl.umap(mdata["gex"], color = "leiden")

In [None]:
mu.pl.embedding(
    mdata,
    basis="gex:umap",
    color=["gex:sample_id",  "gex:leiden"],
    ncols=3,
    wspace=0.7,
)


In [None]:
mdata

In [None]:
ir.pp.index_chains(mdata)

In [None]:
ir.tl.chain_qc(mdata)

In [None]:
_ = ir.pl.group_abundance(mdata, groupby="airr:receptor_subtype", target_col="gex:sample_id")

In [None]:
_ = ir.pl.group_abundance(mdata, groupby="airr:receptor_subtype", target_col="gex:origin")

In [None]:
samples = {
    "10mix-ICI1": {"group": "10mix", "condition":"ICI"},
    "10mix-ICI2": {"group": "10mix","condition":"ICI"},
    "11mix-ICI1": {"group": "11mix","condition":"ICI"},
    "11mix-ICI2": {"group": "11mix","condition":"ICI"},
    "GF-ICI1": {"group": "GF","condition":"No ICI"},
    "GF-ICI2": {"group": "GF","condition":"No ICI"},
    "GF-ICI1-plus": {"group": "GF-plus","condition":"ICI"},
    "GF-ICI2-plus": {"group": "GF-plus","condition":"ICI"},


}    
    
# Create a list of AnnData objects (one for each sample)
adatas_tcr = {}
adatas_gex = {}
for sample, sample_meta in samples.items():
    adata_gex = sc.read_10x_h5(f"/data/projects/2021/MicrobialMetabolites/single-cell-sorted-cd8/2021-02-01_sorted_cd8_til/analyses_icbi/{sample}/outs/per_sample_outs/{sample}/count/sample_filtered_feature_bc_matrix.h5")
    adata_tcr = ir.io.read_10x_vdj(f"/data/projects/2021/MicrobialMetabolites/single-cell-sorted-cd8/2021-02-01_sorted_cd8_til/analyses_icbi/{sample}/outs/per_sample_outs/{sample}/vdj_t/filtered_contig_annotations.csv")
    # concatenation only works with unique gene names
    adata_gex.var_names_make_unique()
    adatas_tcr[sample] = adata_tcr
    adatas_gex[sample] = adata_gex

In [None]:
path = "/data/projects/2021/MicrobialMetabolites/single-cell-sorted-cd8/2019-10-29_sorted_cd8/analyses_icbi"

In [None]:
adata.var_names_make_unique()

In [None]:
adata_tcr.shape

In [None]:
adata.shape

In [None]:
sc.pp.log1p(mdata["gex"])
sc.pp.pca(mdata["gex"], svd_solver="arpack")
sc.pp.neighbors(mdata["gex"])
sc.tl.umap(mdata["gex"])

In [None]:
ir.pp.index_chains(mdata)
ir.tl.chain_qc(mdata)

In [None]:
mdata.mod["gex"].var

In [None]:
mdata.mod["gex"].uns["umap"]

In [None]:
fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(10, 4), gridspec_kw={"wspace": 0.5})
mu.pl.embedding(mdata, basis="gex:umap", color=["Xkr4"], ax=ax0, show=False)
mu.pl.embedding(mdata, basis="gex:umap", color=["airr:receptor_type"], ax=ax1)

In [None]:
for sample in samples:
    print(sample)
    adata = = sc.read_10x_h5(f"{sample}/outs/per_sample_outs/{sample}/count/sample_filtered_feature_bc_matrix.h5")
    adata_tcr = ir.io.read_10x_vdj(f"{sample}/outs/per_sample_outs/{sample}/vdj_t/filtered_contig_annotations.csv")

In [None]:

samples = {
    "10mix1": {"group": "10mix"},
    "10mix2": {"group": "10mix"},
    "11mix1": {"group": "11mix"},
    "11mix2": {"group": "11mix"},
    "GF1": {"group": "GF"},
    "GF2": {"group": "GF"},

}    
    
# Create a list of AnnData objects (one for each sample)
adatas_tcr = {}
adatas_gex = {}
for sample, sample_meta in samples.items():
    adata_gex = sc.read_10x_h5(f"/data/projects/2021/MicrobialMetabolites/single-cell-sorted-cd8/2019-10-29_sorted_cd8/analyses_icbi/{sample}/outs/per_sample_outs/{sample}/count/sample_filtered_feature_bc_matrix.h5")
    adata_tcr = ir.io.read_10x_vdj(f"/data/projects/2021/MicrobialMetabolites/single-cell-sorted-cd8/2019-10-29_sorted_cd8/analyses_icbi/{sample}/outs/per_sample_outs/{sample}/vdj_t/filtered_contig_annotations.csv")
    # concatenation only works with unique gene names
    adata_gex.var_names_make_unique()
    adatas_tcr[sample] = adata_tcr
    adatas_gex[sample] = adata_gex

In [None]:
# Merge anndata objects
adata_gex = anndata.concat(adatas_gex, index_unique="_")
adata_tcr = anndata.concat(adatas_tcr, index_unique="_")
mdata = mu.MuData({"gex": adata_gex, "airr": adata_tcr})

# Set global metadata on `mdata.obs`
mdata.obs["sample"] = mdata.obs_names.to_series().str.split("_", expand=True)[1]
mdata.obs["group"] = mdata.obs["sample"].map(lambda x: samples[x]["group"])

In [None]:
mdata

In [None]:
sc.pp.log1p(mdata["gex"])
sc.pp.pca(mdata["gex"], svd_solver="arpack")
sc.pp.neighbors(mdata["gex"])
sc.tl.umap(mdata["gex"])
ir.pp.index_chains(mdata)
ir.tl.chain_qc(mdata)

In [None]:
fig, (ax0, ax1, ax2) = plt.subplots(1, 3, figsize=(15, 4), gridspec_kw={"wspace": 0.5})
#mu.pl.embedding(mdata, basis="gex:umap", color="CD3E", ax=ax0, show=False)
mu.pl.embedding(mdata, basis="gex:umap", color=["Cd44"], ax=ax0, show=False)
mu.pl.embedding(mdata, basis="gex:umap", color="sample", ax=ax1, show=False)
mu.pl.embedding(mdata, basis="gex:umap", color="airr:receptor_type", ax=ax2)

In [None]:
fig, (ax0, ax1, ax2) = plt.subplots(1, 3, figsize=(15, 4), gridspec_kw={"wspace": 0.5})
mu.pl.embedding(mdata, basis="gex:umap", color=["Cd8a"], ax=ax0, show=False)
mu.pl.embedding(mdata, basis="gex:umap", color="group", ax=ax1, show=False)
mu.pl.embedding(mdata, basis="gex:umap", color="airr:receptor_type", ax=ax2)

In [None]:
mdata

In [None]:
tisg_genes = ["Isg15", "Ifi44l","Ifit3", "Ifit1"]
tn_genes = ["Ltb", "Sell", "Lef1", "Nosip"]
trm_genes = ["Itga1", "Cpne7", "Txnip", "Il7r"]
tsen_genes = ["Plcg2", "Igkc", "Klrk1"]
tsrt_genes = ["Hspa1b", "Hspa1a","Dnajb1", "Hsph1"]

In [None]:
mu.pl.embedding(
    mdata,
    basis="gex:umap",
    color=["group", "sample"],
    ncols=3,
    wspace=0.7,
)
mu.pl.embedding(
    mdata,
    basis="gex:umap",
    color=["Cd8a","Cd44"],
    ncols=3,
    wspace=0.7,
)

In [None]:
mu.pl.embedding(
    mdata,
    basis="gex:umap",
    color=tisg_genes,
    ncols=3,
    wspace=0.7,
)

In [None]:
mu.pl.embedding(
    mdata,
    basis="gex:umap",
    color=tn_genes,
    ncols=3,
    wspace=0.7,
)

In [None]:
ir.tl.chain_qc(mdata)

In [None]:
mdata

In [None]:
ir.tl.clonal_expansion(mdata)

In [None]:
_ = ir.pl.group_abundance(
    mdata, groupby="airr:clone_id", target_col="group", max_cols=10
)

In [None]:
_ = ir.pl.group_abundance(
    mdata, groupby="airr:receptor_subtype", target_col="group")

In [None]:
_ = ir.pl.group_abundance(
    mdata, groupby="airr:receptor_subtype", target_col="sample")

In [None]:
_ = ir.pl.group_abundance(mdata, groupby="airr:chain_pairing", target_col="group")

In [None]:
_ = ir.pl.group_abundance(mdata, groupby="airr:chain_pairing", target_col="sample")