In [None]:
import anndata
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc

matplotlib.rcParams.update({'font.size': 12})
%config InlineBackend.figure_format = 'retina'

#Load in the integrated spleen dataset
adata_pl_raw=anndata.read_h5ad('/home/nikvaku/snic2022-6-312/LabMemberScratchDir/Nikhilesh/Inter_data/spleen_merged_raw.h5ad')

In [1]:
#Subset for B cells

adata_b_cells=adata_pl_raw[adata_pl_raw.obs['leiden_spleen'].isin(['0', '5', '7','1','8'])]

NameError: name 'adata_pl_raw' is not defined

In [None]:
#Preprocess the data
mt_gene_patterns = ['COX1', 'COX2', 'ATP8', 'ATP6', 'COX3', 'NU1M', 'NU2M', 'NU3M', 'NU4M', 'NU4LM', 'NU5M', 'NU6M', 'CYB']
mt_gene_pattern = '|'.join(mt_gene_patterns)
sc.pp.filter_cells(adata_b_cells, min_genes=400)
sc.pp.filter_genes(adata_b_cells, min_cells=3)
adata_b_cells.var['mt'] = adata_b_cells.var_names.str.match(mt_gene_pattern)
sc.pp.calculate_qc_metrics(adata_b_cells, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
adata_b_cells = adata_b_cells[adata_b_cells.obs.pct_counts_mt < 10, :]
adata_b_cells.var_names_make_unique()

In [None]:
#Normalize the data
sc.pp.normalize_total(adata_b_cells, target_sum=1e4)
sc.pp.log1p(adata_b_cells)
sc.pp.scale(adata_b_cells, max_value=10)
sc.pp.pca(adata_b_cells, n_comps=50,svd_solver='arpack')
sc.pl.pca_variance_ratio(adata_b_cells, n_pcs=50)

In [None]:
#Clustering

#UMAP
sc.pp.neighbors(adata_b_cells, n_neighbors=10, n_pcs=20)
sc.tl.umap(adata_b_cells)
sc.pl.umap(adata_b_cells,color=['leiden_spleen'])

In [None]:
#DE analysis
sc.tl.rank_genes_groups(adata_b_cells, 'leiden_spleen', method='wilcoxon')
sc.pl.rank_genes_groups(adata_b_cells, n_genes=25, sharey=False)
sc.tl.leiden(adata_b_cells, resolution=0.5)
sc.pl.umap(adata_b_cells,color=['leiden','assignment'])

In [None]:
#Batch correction using Harmony
sc.external.pp.harmony_integrate(adata_b_cells, key='assignment', max_iter_harmony=100)
#Re Preprocess the data
sc.pp.neighbors(adata_b_cells, n_neighbors=10, n_pcs=20,use_rep='X_pca_harmony')
sc.tl.umap(adata_b_cells)
sc.tl.leiden(adata_b_cells, resolution=0.5)
sc.pl.umap(adata_b_cells,color=['leiden','leiden_spleen','assignment'])

In [None]:
#Remove cells in cluster 6,11
adata_b_cells=adata_b_cells[adata_b_cells.obs['leiden'].isin(['0','1','2','3','5','7','4','8','9','10','12','13'])]
#Recluster
sc.pp.neighbors(adata_b_cells, n_neighbors=10, n_pcs=20)
sc.tl.umap(adata_b_cells)
sc.tl.leiden(adata_b_cells, resolution=0.5)
#Batch correction using Harmony
sc.external.pp.harmony_integrate(adata_b_cells, key='assignment', max_iter_harmony=100)
#Re Preprocess the data
sc.pp.neighbors(adata_b_cells, n_neighbors=10, n_pcs=20,use_rep='X_pca_harmony')
sc.tl.umap(adata_b_cells)
sc.tl.leiden(adata_b_cells, resolution=0.5)
sc.pl.umap(adata_b_cells,color=['leiden','leiden_spleen','assignment'])
#Plot the markers
sc.pl.umap(adata_b_cells,color=['PTPRC','CD79A','CD79B','CD3E','leiden'],legend_loc='on data')
#DE analysis
sc.tl.rank_genes_groups(adata_b_cells, 'leiden', method='wilcoxon')
sc.pl.rank_genes_groups(adata_b_cells, n_genes=25, sharey=False)

In [None]:
#Subset for T cells
adata_t_cells=adata_pl_raw[adata_pl_raw.obs['leiden_spleen'].isin(['2','14','12'])]
#Preprocess the data
mt_gene_patterns = ['COX1', 'COX2', 'ATP8', 'ATP6', 'COX3', 'NU1M', 'NU2M', 'NU3M', 'NU4M', 'NU4LM', 'NU5M', 'NU6M', 'CYB']
mt_gene_pattern = '|'.join(mt_gene_patterns)
sc.pp.filter_cells(adata_t_cells, min_genes=400)
sc.pp.filter_genes(adata_t_cells, min_cells=3)
adata_t_cells.var['mt'] = adata_t_cells.var_names.str.match(mt_gene_pattern)
sc.pp.calculate_qc_metrics(adata_t_cells, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
adata_t_cells = adata_t_cells[adata_t_cells.obs.pct_counts_mt < 10, :]
adata_t_cells.var_names_make_unique()
#Normalize the data
sc.pp.normalize_total(adata_t_cells, target_sum=1e4)
sc.pp.log1p(adata_t_cells)
sc.pp.scale(adata_t_cells, max_value=10)
sc.pp.pca(adata_t_cells, n_comps=50,svd_solver='arpack')
sc.pl.pca_variance_ratio(adata_t_cells, n_pcs=50)
#UMAP
sc.pp.neighbors(adata_t_cells, n_neighbors=10, n_pcs=20)
sc.tl.umap(adata_t_cells)
sc.pl.umap(adata_t_cells,color=['leiden_spleen'])
#DE analysis
sc.tl.rank_genes_groups(adata_t_cells, 'leiden_spleen', method='wilcoxon')
sc.pl.rank_genes_groups(adata_t_cells, n_genes=25, sharey=False)
#Clustering again
sc.tl.leiden(adata_t_cells, resolution=0.5)
sc.pl.umap(adata_t_cells,color=['leiden','leiden_spleen','assignment'])

In [None]:
#Batch correction using Harmony
sc.external.pp.harmony_integrate(adata_t_cells, key='assignment', max_iter_harmony=100)
#Re Preprocess the data
sc.pp.neighbors(adata_t_cells, n_neighbors=10, n_pcs=20,use_rep='X_pca_harmony')
sc.tl.umap(adata_t_cells)
sc.tl.leiden(adata_t_cells, resolution=0.5)
sc.pl.umap(adata_t_cells,color=['leiden','leiden_spleen','assignment'],legend_loc='on data')

In [None]:
#Further subset for T cells based on T cell marker expression
adata_t_cells=adata_t_cells[adata_t_cells.obs['leiden'].isin(['0','3','6','8','9','10','7'])]

#Recluster after batch correction
sc.external.pp.harmony_integrate(adata_t_cells, key='assignment', max_iter_harmony=100)
sc.pp.neighbors(adata_t_cells, n_neighbors=10, n_pcs=20,use_rep='X_pca_harmony')
sc.tl.umap(adata_t_cells)
sc.tl.leiden(adata_t_cells, resolution=0.5)

#DEG
sc.tl.rank_genes_groups(adata_t_cells, 'leiden', method='wilcoxon')
sc.pl.rank_genes_groups(adata_t_cells, n_genes=25, sharey=False)

In [None]:
adata_b_cells.write_h5ad('/home/nikvaku/snic2022-6-312/LabMemberScratchDir/Nikhilesh/Final_Data/spleen_b_cells.h5ad')
adata_t_cells.write_h5ad('/home/nikvaku/snic2022-6-312/LabMemberScratchDir/Nikhilesh/Final_Data/spleen_t_cells.h5ad')