In [None]:
import os
import sys
import scanpy as sc
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import session_info

In [None]:
input_dir = '/path/to/data/h5ad/'
input_adata = os.path.join(input_dir, '01_adata_cd45.h5ad')
output_dir = input_dir

In [None]:
adata = sc.read_h5ad(input_adata)

In [None]:
adata.layers['counts'] = adata.X.copy()

In [None]:
adata.obs.columns

In [None]:
adata.obs.solo_prediction.value_counts()

In [None]:
adata.obs.side.value_counts()

In [None]:
adata = adata[adata.obs['solo_prediction'] == 'singlet'].copy()
adata = adata[adata.obs['side'] == 'Ipsilateral'].copy()
adata.obs.side.value_counts()

In [None]:
adata.obs.sample_id.value_counts()

In [None]:
adata.var['mt'] = adata.var_names.str.startswith('mt-')  # for mouse
adata.var['ribosomal'] = adata.var_names.str.match(r'^(Rpl|Rps)\d+')

In [None]:
sc.pp.calculate_qc_metrics(
    adata,
    qc_vars=['mt', 'ribosomal'],
    percent_top=None,
    log1p=False,
    inplace=True
)

In [None]:
adata = adata[
    (adata.obs['total_counts'] > 750) &
    (adata.obs['pct_counts_mt'] < 20)
].copy()

In [None]:
sc.pp.filter_genes(adata, min_cells=5)
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.scale(adata, max_value=10)
sc.pp.pca(adata, n_comps=50, svd_solver='arpack')
sc.pl.pca_variance_ratio(adata, log=True)
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=10)
sc.tl.umap(adata)
sc.tl.leiden(adata, resolution = 1)

In [None]:
adata.obs.group.value_counts()

In [None]:
sc.pl.umap(adata, color = ['leiden', 'Rbfox3', 'Ptprc', 'Gfap', 'Mrc1', 'Tmem119', 'Hexb', 'P2ry12', 'Apoe', 'H2-Ab1', 'Mbp', 'Itgax'])

In [None]:
cell_type= { 
"0": "Microglia", 
"1": "Microglia",
"2": "Microglia",
"3": "Microglia",
"4": "Microglia",
"5": "Neuron",
"6": "Neuron",
"7": "Neuron",
"8": "Microglia",
"9": "Microglia",
"10": "Neuron",
"11": "PVM",
"12": "Oligo",
"13": "Astrocyte"
}

adata.obs['cell_type'] = adata.obs.leiden.map(cell_type)

In [None]:
sc.pl.umap(adata, color = ['cell_type'])

In [None]:
sc.tl.rank_genes_groups(adata, groupby = 'leiden', method = 'wilcoxon')
sc.pl.rank_genes_groups(adata, fontsize = 16)

In [None]:
microglia = adata[adata.obs['cell_type'] == 'Microglia'].copy()

In [None]:
microglia

In [None]:
microglia.obs.group.value_counts()

In [None]:
#adata.write(os.path.join(output_dir, '02_adata-cd45-annotated.h5ad'))

microglia.write(os.path.join(output_dir, '02_mg-annotated.h5ad'))