# Mid-gestation fetal cortex dataset: Cluster identification and characterization

__Upstream Steps__

* QC filter on cells
* Expression filter on genes
* Normalization and log10 transformation by Scanpy
* HVG by Triku
* Integration by Harmony
* Dimensionality reduction after integration
* Cluster identification
* Cluster characterization

__This notebook__

* Define pseudo-ordering and identify the correct way to do it



----

# 1. Environment Set Up

## 1.1 Library upload

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import scanpy.external as sce
import seaborn as sns
import igraph as ig
from scipy.sparse import csr_matrix, isspmatrix
from datetime import datetime

from gprofiler import GProfiler

In [None]:
sc.settings.verbosity = 3
sc.settings.set_figure_params(dpi=80)

## 1.2 Start Computation time

In [None]:
print(datetime.now())

## 1.3 Result file

In [None]:
#results_file = '/home/..../brainomics/Dati/4_AdataClusters.h5ad'
#results_file = '/group/brainomics/Intermediate/5_AdataDPT.h5ad'

----

# 2. Read input files  

In [None]:
adata = sc.read('/group/brainomics/Intermediate/4_AdataClusters.h5ad')

In [None]:
print('Loaded Normalizes AnnData object: number of cells', adata.n_obs)
print('Loaded Normalizes AnnData object: number of genes', adata.n_vars)

print('Available metadata for each cell: ', adata.obs.columns)

----

# 3. Identify root cells

If you are considering the whole dataset, usually the a good strategy is to rely on a marker specific for your starting population. In this case, we can use markers for proliferating cells such as MKI67 and CDC20. This is system-dependent, so has to be defined for your own biological model. 

## 3.1 Check in diffusion and draw graph 

In [None]:
sc.pl.draw_graph(adata,color=["MKI67","CDC20"])

In [None]:
sc.get.obs_df(adata,"MKI67").idxmax()

In [None]:
adata.obs['root']=0
adata.obs.loc[sc.get.obs_df(adata,"MKI67").idxmax(),'root']=1

In [None]:
sc.pl.draw_graph(adata,color="root",size=120)

In [None]:
putativeRoot = sc.get.obs_df(adata,"CDC20").idxmax()

In [None]:
adata.obs['root']=0
adata.obs.loc[putativeRoot,'root']=1

In [None]:
sc.pl.draw_graph(adata,color="root",size=120)

In [None]:
sc.tl.score_genes(adata,['MKI67',"CDC20"],score_name="scoreGeneRoot")

In [None]:
putativeRoot = sc.get.obs_df(adata,"scoreGeneRoot").idxmax()
adata.obs['root']=0
adata.obs.loc[putativeRoot,'root']=1
sc.pl.draw_graph(adata,color="root",size=120)

In [None]:
sns.histplot(data=sc.get.obs_df(adata,"CDC20"),log=True)

In [None]:
adata.obsm.keys()

In [None]:
adata[sc.get.obs_df(adata,"CDC20")>2].obsm['X_draw_graph_fa'][:,1].min()

In [None]:
putativeRoot = sc.get.obs_df(adata,obsm_keys=[("X_draw_graph_fa", 1)])[sc.get.obs_df(adata,"CDC20")>2].idxmax()
adata.obs['root']=0
adata.obs.loc[putativeRoot,'root']=1
sc.pl.draw_graph(adata,color="root",size=120)

In [None]:
putativeRoot[0]

In [None]:
adata.uns['iroot'] = np.flatnonzero(adata.obs_names == putativeRoot[0])[0]

In [None]:
adata.uns['iroot']

## 3.2 diffusion pseudotime

In [None]:
sc.tl.dpt(adata,n_dcs=10)

In [None]:
sc.pl.draw_graph(adata,color="dpt_pseudotime")
sc.pl.umap(adata,color="dpt_pseudotime")

In [None]:
sc.tl.paga(adata, groups='FinalLeiden')

In [None]:
def gen_mpl_labels(adata, groupby, exclude=()):
    medians = {}
    mediansL=[]

    for g, g_idx in adata.obs.groupby(groupby).groups.items():
        if g in exclude:
            continue
        medians[g] = np.median(adata[g_idx].obsm["X_umap"], axis=0)
        mediansL.append(medians[g])
    return(medians,mediansL)

In [None]:
LeidenCentroid,LeidenCentroidList = gen_mpl_labels(adata,"FinalLeiden")

In [None]:
np.array(LeidenCentroidList)

In [None]:
sc.pl.umap(adata,color="FinalLeiden")
sc.pl.paga(adata, color=['FinalLeiden', 'MKI67', 'DLX5', 'NEUROD2',"HOPX","dpt_pseudotime"],layout="fa",pos=np.array(LeidenCentroidList))


In [None]:
sc.pl.paga_compare(
    adata, threshold=0.03, title='', right_margin=0.2, size=10, edge_width_scale=0.5,
    legend_fontsize=12, fontsize=12, frameon=False, edges=False, save=True)

----

# 4. Key neurodevelopmenta markers

In [None]:
paths = [('excitatory', [3, 9, 1, 6,2, 0, ]),
         ('inibitory', [3, 9, 1, 6,2, 0, 8, 4]),
         ('astrocite',[3,7])]

In [None]:
adata.obs['distance'] = adata.obs['dpt_pseudotime']

In [None]:
adata.obs['clusters'] = adata.obs['FinalLeiden']  # just a cosmetic change
adata.uns['clusters_colors'] = adata.uns['FinalLeiden_colors']
!mkdir write

In [None]:
'DCX', 'NEUROD2',"HOPX"

In [None]:
gene_names = ['NEUROD2', 'NEUROD6', 'STMN2',    # exitatory neuron
 'EOMES', 'PAX6', 'MKI67', "GAP43",                      # intermediat and prolifereting 
 'DLX6-AS1', 'GAD2', 'CALB2',                    # inhibitory neuron
 'HOPX', 'GFAP']                                # astrocite

In [None]:
import matplotlib.pyplot as pl

_, axs = pl.subplots(ncols=3, figsize=(6, 2.5), gridspec_kw={'wspace': 0.05, 'left': 0.12})
pl.subplots_adjust(left=0.05, right=0.98, top=0.82, bottom=0.2)
for ipath, (descr, path) in enumerate(paths):
    _, data = sc.pl.paga_path(
        adata, path, gene_names,
        show_node_names=False,
        ax=axs[ipath],
        ytick_fontsize=12,
        left_margin=0.15,
        n_avg=50,
        annotations=['distance'],
        show_yticks=True if ipath==0 else False,
        show_colorbar=False,
        color_map='Greys',
        groups_key='clusters',
        color_maps_annotations={'distance': 'viridis'},
        title='{} path'.format(descr),
        return_data=True,
        show=False)
    data.to_csv('./write/paga_path_{}.csv'.format(descr))
pl.savefig('./figures/paga_path_paul15.pdf')
pl.show()

# 5. Save

## 5.1 Save AData

In [None]:
adata.write(results_file)

## 5.2 Timestamp finished computations 

In [None]:
print(datetime.now())

## 5.3 Save python and html versions

In [None]:
nb_fname = '5_Pseudotime_easy'
nb_fname

In [None]:
%%bash -s "$nb_fname"
jupyter nbconvert "$1".ipynb --to="python"
jupyter nbconvert "$1".ipynb --to="html"