In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt

In [None]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')

In [None]:
df_data = pd.read_pickle('count_matrix_filtered.pkl').T 
adata = sc.AnnData(df_data)
adata.var.index.name = "Cell indices"
adata.var_names_make_unique()

In [None]:
sc.pl.highest_expr_genes(adata, n_top=20)
adata.var['mt'] = adata.var_names.str.startswith('mt-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
             jitter=0.4, multi_panel=True)

In [None]:
adata.raw = adata
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

In [None]:
sc.tl.pca(adata, svd_solver='arpack')
sc.pl.pca(adata, color='Sst')
sc.pl.pca_variance_ratio(adata, log=True)

In [None]:
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)
sc.tl.umap(adata, min_dist=0.6)
sc.tl.leiden(adata, resolution=0.1)

In [None]:
sc.set_figure_params(dpi=200, frameon=False, figsize=(6, 6), facecolor='white', fontsize=6)
sc.pl.umap(adata, color=['Slc17a6', 'Gad1', 'Mbp', 'Aldh1a1', 'leiden'], ncols=2, vmax=['p99', 'p99', 'p99', 'p99', None])

In [None]:
marker_genes = [
'Drd1',
'Drd2',
'Drd3',
'Drd4', 
'Drd5',
'Slc6a3',
'Slc6a4',
'Slc32a1',
'Slc17a8',
'Slc17a6',
'Htr7',
'Htr6',
'Htr5b',
'Htr5a',
'Htr4',
'Htr3b',
'Htr3a',
'Htr2c',
'Htr2b',
'Htr2a',
'Htr1d',
'Htr1b',
'Htr1a',
'Pitx2',
'Ntsr1',
'Grp',
'Tac1',
'Cbln2',
'Camk2a',
'Cntnap5a',
'Cacna2d1',
'Lypd1',
'Ntsr1',
'Ntng2',
'Pitx2',
'Pvalb',
'Sst',
'Vip',
'Tshz3',
'Tmem163',
'Tcf7l2',
]

In [None]:
sc.set_figure_params(dpi=200, frameon=False, figsize=(6, 6), facecolor='white', fontsize=12)
sc.pl.dotplot(adata, marker_genes, groupby='leiden')

In [None]:
#Below is older code for reference
sc.tl.rank_genes_groups(adata, 'leiden', method='logreg')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)
sc.pl.stacked_violin(adata, marker_genes, groupby='leiden', rotation=90)

In [None]:
#Equivalent to seurat NormalizeData
sc.pp.normalize_total(adata, target_sum=10_000, exclude_highly_expressed=True)
lognormadata = sc.pp.log1p(adata)

In [None]:
#Equivalent to seurat FindVariableGenes
sc.pp.highly_variable_genes(adata, n_top_genes=2000)

In [None]:
#Equivalent to seurat RunPCA
sc.pp.pca(adata, use_highly_variable=True)

In [None]:
#Equivalent to using fastMNN
sc.external.pp.mnn_correct(adata)

In [None]:
#perplexity = 5
sc.pl.tsne(adata, color=['Slc17a6', 'Gad1', 'Mbp', 'Pdgfra', 'Aldh1a1', 'Cx3cr1', 'Cldn5', 'Foxc1', 'Pitx2', 'Cbln2', 'Drd1', 'Tac1'], ncols=4)
sc.pl.tsne(adata, color=['leiden'])