In [2]:
#Import packages
import os
import pandas as pd
from json import dumps
from matplotlib.pyplot import rc_context 
import scanpy as sc
import leidenalg

In [None]:
# loading the file
import scanpy as sc
adata = sc.read_h5ad(file_name) 
adata

Plot a UMAP

In [None]:
sc.pl.umap(adata,color=['clusters'])

In [None]:
adata.obs['sample'].unique()

Dropping columns

In [None]:
adata.obs.columns

In [None]:
adata.obs = adata.obs.drop(columns=['kw_curated_cell_type'],errors='ignore')
adata.obs = adata.obs.drop(columns=['kw_curated_marker_present'],errors='ignore')

adata.obs = adata.obs.drop(columns=['kw_curated_marker_absent'], errors='ignore')
adata.obs = adata.obs.drop(columns=['curated_cell_ontology_id'], errors='ignore')
adata.obs = adata.obs.drop(columns=['clusters'], errors='ignore'   )
adata.obs = adata.obs.drop(columns=['kw_curated_raw_cell_type'], errors='ignore')   
#checking if columns are dropped                               
adata.obs.columns

In [35]:
adata_all = adata[adata.obs[''].isin([])]

In [None]:
print(adata_all.X.max())
adata_all.raw.X.max()

In [None]:
adata_all = adata_all.raw.to_adata()
adata_all.X.max()

In [None]:
adata_all.raw = adata_all
adata_all.X.shape

Filtering of cells and genes on the basis of :
- Minimum number of genes present in a cell
- A perticular gene being present in minimum 3 cells

In [39]:
sc.pp.filter_cells(adata_all, min_genes=200)
sc.pp.filter_genes(adata_all, min_cells=3)

In [None]:
adata_all.X.shape

In [None]:
adata_all.var['n_cells'].hist()

In [None]:
adata_all.obs['n_genes'].hist()

Visually explore the highest expressing genes in the dataset

In [None]:
sc.pl.highest_expr_genes(adata_all, n_top=20, )

Checking for mitochondrial genes and accounting for death cells

In [44]:
adata_all.var['mt'] = adata_all.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata_all, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

In [None]:
sc.pl.violin(adata_all, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
             jitter=0.01, multi_panel=True)

In [None]:
sc.pl.scatter(adata_all, x='total_counts', y='pct_counts_mt')
sc.pl.scatter(adata_all, x='total_counts', y='n_genes_by_counts')

In [48]:
adata_all = adata_all[adata_all.obs.n_genes_by_counts < 5000, :]
#adata_all = adata_all[adata_all.obs.n_genes_by_counts >750, :]
adata_all = adata_all[adata_all.obs.pct_counts_mt < 15, :]


In [None]:
sc.pl.scatter(adata_all, x='total_counts', y='pct_counts_mt')
sc.pl.scatter(adata_all, x='total_counts', y='n_genes_by_counts')

In [None]:
adata_all.shape

Normalization

In [None]:
sc.pp.normalize_total(adata_all, target_sum=1e4)

In [None]:
sc.pp.log1p(adata_all)
sc.pp.highly_variable_genes(adata_all, min_mean=0.0125, max_mean=3, min_disp=0.5)
sc.pl.highly_variable_genes(adata_all)

In [53]:
adata_all = adata_all[:, adata_all.var.highly_variable]

In [54]:
#sc.pp.regress_out(adata_all, ['total_counts',])

In [55]:
sc.pp.regress_out(adata_all, ['sample',])

In [56]:
sc.pp.scale(adata_all, max_value=10)

PCA and Clustering

In [None]:
sc.tl.pca(adata_all, svd_solver='arpack',)
sc.pl.pca(adata_all, color='sample')

In [None]:
sc.pl.pca_variance_ratio(adata_all,)

In [62]:
sc.pp.neighbors(adata_all, n_neighbors=10, n_pcs=13)

In [8]:
sc.tl.umap(adata_all)

In [None]:
sc.tl.tsne(adata_all)

In [9]:
sc.tl.leiden(adata_all)

In [67]:
adata_all.write_h5ad("id_lieden.h5ad")

In [7]:
adata_all = sc.read_h5ad('change.h5ad')

Defining markers

In [13]:
markers = []

In [14]:
marker_genes_dict = {"":[]}

In [15]:
#check to see if markers are present in highly variable genes
hvg=(adata_all.var.highly_variable.keys())
#(adata_all.var.index) if present in raw data
 
for i in markers:
    if i not in(hvg):
            print(i)

In [None]:

with rc_context({'figure.figsize': (10, 10)}):
    sc.pl.umap(adata_all,color=markers,add_outline=True,legend_fontoutline=4,legend_loc="on data")

In [None]:
sc.pl.stacked_violin(adata_all,markers,groupby='leiden',rotation=90)

In [None]:
sc.pl.dotplot(adata_all,markers,groupby='leiden',)

In [None]:
sc.pl.violin(adata_all, markers, groupby='leiden')

In [None]:
sc.tl.dendrogram(adata_all,groupby='leiden')
sc.pl.dotplot(adata_all, marker_genes_dict, 'leiden', dendrogram=True)

In [None]:
from matplotlib.pyplot import rc_context
with rc_context({'figure.figsize': (10, 10)}):
    sc.pl.umap(adata_all,color=['leiden'],legend_loc="on data",add_outline=True,legend_fontoutline=4)

In [None]:
from matplotlib.pyplot import rc_context
with rc_context({'figure.figsize': (10, 10)}):
    sc.pl.tsne(adata_all,color=['leiden'],legend_loc="on data",add_outline=True,legend_fontoutline=4)