# Env setupu

In [1]:
import scanpy as sc, anndata as ad, numpy as np, pandas as pd
from scipy import sparse
from anndata import AnnData
from matplotlib import pylab
import warnings
import networkx as nx
import socket
import scvelo as scv
import yaml
import itertools
import seaborn as sns
import os
import sys

warnings.filterwarnings('ignore')

In [2]:
sc.settings.verbosity = 4   
# verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()


scanpy==1.8.0 anndata==0.8.0 umap==0.4.6 numpy==1.22.2 scipy==1.6.2 pandas==1.2.3 scikit-learn==0.24.1 statsmodels==0.13.5 python-igraph==0.9.1 louvain==0.7.0 leidenalg==0.8.3


# Configure paths

In [3]:
outdir = "../data/output"

# DPT adatas

In [4]:

# We load refined lineages anndata to store final barcodelists and branch specific dpts 
# Note: we are doing this since from adataPaga further branch-specific refined was performed (Notebooks 03/Trajectories/*lineage*.ipynb) and after that branch-specific dpt was computed

adataTags = ["AdataLineage.Migrating.h5ad",
"AdataLineage.Astro.h5ad",
"AdataLineage.Cajal.h5ad",
"AdataLineage.Exc_early.h5ad",
"AdataLineage.Exc_late.h5ad",
"AdataLineage.Interneurons.h5ad"]

adataLineagesDict = dict(zip(
    ["Migrating","Astro","Cajal","Exc_early","Exc_late","Interneurons"],
    ["/adatas/".join(l) for l in list(itertools.product([outdir],adataTags))]
))

for k in adataLineagesDict.keys():
    adataLineagesDict[k] = sc.read_h5ad(adataLineagesDict[k])

    
tmpadata = ad.concat(list(adataLineagesDict.values()), join = "outer")
allbarcodes = list(set(tmpadata.obs_names))
del tmpadata

# Paga adata

In [5]:
adataPaga = sc.read_h5ad(outdir+"/adatas/adataPaga.h5ad")[allbarcodes]
adataPaga

View of AnnData object with n_obs × n_vars = 13850 × 3499
    obs: 'dataset', 'cellID', 'cellID_newName', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'stage', 'type', 'id_stage', 'cellID_newName_type', 'S_score', 'G2M_score', 'phase', 'leidenAnnotated', 'leiden_1.2', 'endpoint_GlutamatergicNeurons_late', 'endpoint_GlutamatergicNeurons_early', 'endpoint_MigratingNeurons', 'endpoint_OuterRadialGliaAstrocytes', 'endpoint_Interneurons', 'endpoint_Interneurons_GAD2', 'endpoint_CajalR_like', 'Exc_Lineage', 'endpoint_GlutamatergicNeurons_both'
    var: 'highly_variable', 'mean', 'std'
    uns: 'Exc_Lineage_colors', 'cellID_colors', 'cellID_newName_colors', 'cellID_newName_type_colors', 'cluster_colors', 'dataset_colors', 'diffmap_evals', 'draw_graph', 'leiden', 'leidenAnnotated_colors', 'leiden_1.2_colors', 'leiden_1.2_sizes'

# varAdatas

In [6]:

VarAdataDict = {
"UpD50" : "../data/Sample_S20272_157/varAdata.h5ad",
"DownD50" : "../data/Sample_S20273_158/varAdata.h5ad",
"UpD100_1" : "../data/Sample_S20812_258/varAdata.h5ad",
"UpD100_2" : "../data/Sample_S20813_259/varAdata.h5ad",
"DownD250" : "../data/Sample_S20814_260/varAdata.h5ad",
"DownD100" : "../data/Sample_S31807_MET6/varAdata.h5ad",
"UpD300" : "../data/Sample_S33846_C_GEX/varAdata.h5ad"
}

for k in list(VarAdataDict.keys()):
    VarAdataDict[k] = sc.read_h5ad(VarAdataDict[k])
    VarAdataDict[k].obs_names = [bc +"_"+str(k) for bc in VarAdataDict[k].obs_names.tolist() ]
    
    
    
varAdata = ad.concat(list(VarAdataDict.values()), join = "outer")[allbarcodes]
varAdata

View of AnnData object with n_obs × n_vars = 13850 × 83430
    layers: 'AltReads', 'RefReads'

# Branch membership ad lineage specific dpt transfer

In [7]:
adataLineagesDict.keys()

dict_keys(['Migrating', 'Astro', 'Cajal', 'Exc_early', 'Exc_late', 'Interneurons'])

In [8]:
for k in list(adataLineagesDict.keys()):
    varAdata.obs[k+"_pseudotime"] = np.nan
    varAdata.obs.loc[adataLineagesDict[k].obs_names,k+"_pseudotime"] = adataLineagesDict[k].obs["dpt_pseudotime"].copy()

        

In [9]:
varAdata.obs

Unnamed: 0,Migrating_pseudotime,Astro_pseudotime,Cajal_pseudotime,Exc_early_pseudotime,Exc_late_pseudotime,Interneurons_pseudotime
ATCATGGCACCCAGTG-1_DownD50,,,0.544692,,,
TTCCCAGTCGATCCCT-1_UpD50,0.368105,,,0.697228,,
TAGGCATGTAAGGATT-1_DownD50,0.384284,,,0.729012,,
TGAGCATAGGCATTGG-1_DownD100,,,,0.904658,,
ACAGCTATCCAGGGCT-1_DownD100,0.386297,,,0.724688,,
...,...,...,...,...,...,...
TCAGGTATCCCACTTG-1_UpD50,,,,0.890487,,
GCTGCGATCTTAGCCC-1_DownD250,,,,,,0.934203
CCGTTCAGTACTCTCC-1_DownD50,0.459232,,,0.778597,,
AGTTGGTCACGGTAGA-1_DownD250,,,,,0.785716,


# Transfering other metadata

In [10]:
varAdata.obs = pd.concat([varAdata.obs, adataPaga.obs[["dataset","cellID_newName","leidenAnnotated","stage","type"]]], axis = 1)
varAdata.obs.type.replace({"downstream":"non_chimeric","upstream":"chimeric"}, inplace=True)

In [11]:
varAdata.write_h5ad(outdir+"/adatas/VarAnndata_complete.h5ad")