In [16]:
import warnings
import scanpy as sc
import anndata as an
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns


import os
from tqdm.notebook import tqdm

sc.settings.set_figure_params(dpi=80)
#sc.set_figure_params(facecolor="white", figsize=(8, 8))
warnings.simplefilter(action='ignore', category=FutureWarning)
sc.settings.verbosity = 3

# Global variables

In [17]:
pb_dir_path = 'data/pseudobulks/'

In [18]:
os.listdir('data/pseudobulks/')

['pb_age.h5ad',
 'pb_chimp.h5ad',
 'pb_human.h5ad',
 'pb_macaque.h5ad',
 'pb_mammals.h5ad',
 'pb_spatial_libd_human.h5ad',
 'pseudobulk_all_four.h5ad',
 'pseudobulk_all_mammals.h5ad',
 'pseudobulk_humans.h5ad']

# Convert 4 specie pseudobulk

In [19]:
results_dir = "data/validate_layers"
os.makedirs(results_dir, exist_ok=True)

## Load pseudobulk files

In [20]:
adata_pb = sc.read_h5ad(pb_dir_path + 'pseudobulk_all_four.h5ad')
adata_pb = adata_pb[adata_pb.obs.sample_id != 'chimp_13302'].copy()
adata_pb

AnnData object with n_obs × n_vars = 165 × 15367
    obs: 'layer', 'sample_id', 'psbulk_cells', 'psbulk_counts', 'condition', 'lib_size', 'log_lib_size'
    var: 'feature_types', 'mt', 'hb', 'ribo'
    uns: 'X_pca_mean_norm', 'condition_colors', 'layer_colors', 'log1p', 'pca'
    obsm: 'X_pca', 'X_pca_mean_norm'
    varm: 'PCs', 'X_pca_mean_norm'
    layers: 'counts', 'mean_norm', 'psbulk_props'

## Create pseudobulk .csv files

Let's create `.csv` file with expressions

In [21]:
expression_df = pd.DataFrame(adata_pb.layers["counts"].T, index=adata_pb.var_names, columns=adata_pb.obs_names)
expression_df.head()

Unnamed: 0,human_759_L1,human_j12_L1,human_j3_L1,human_j4_L1,human_j6_L1,human_759_L2,human_j12_L2,human_j3_L2,human_j4_L2,human_j6_L2,...,151509_WM,151510_WM,151669_WM,151670_WM,151671_WM,151672_WM,151673_WM,151674_WM,151675_WM,151676_WM
DUSP18,0.008,0.039275,0.063636,0.021008,0.014925,0.017241,0.125714,0.183673,0.025641,0.091743,...,0.0,0.005587,0.014151,0.0,0.004577,0.002571,0.012,0.011494,0.007911,0.007707
ANGPTL4,0.008,0.175227,0.018182,0.042017,0.052239,0.004926,0.348571,0.05102,0.029915,0.082569,...,0.0625,0.055866,0.037736,0.040936,0.01373,0.015424,0.126,0.16092,0.113924,0.100193
NUP133,0.088,0.099698,0.327273,0.184874,0.059701,0.169951,0.342857,0.826531,0.452991,0.247706,...,0.08125,0.055866,0.084906,0.105263,0.02746,0.028278,0.116,0.178982,0.112342,0.140655
SLC6A20,0.006,0.015106,0.0,0.0,0.089552,0.002463,0.011429,0.0,0.012821,0.073394,...,0.0,0.0,0.0,0.005848,0.0,0.0,0.002,0.001642,0.0,0.0
FRY,0.22,0.329305,0.545455,0.60084,0.216418,0.662562,1.702857,2.683673,1.961538,0.816514,...,0.0875,0.050279,0.169811,0.111111,0.052632,0.028278,0.096,0.182266,0.080696,0.098266


In [22]:
expression_df.to_csv(f'{results_dir}/expression.csv')

In the same way let's create `.csv` file with annotation

In [23]:
# add continious layer annotation
adata_pb.obs['layer_c'] = adata_pb.obs.layer.map({'L1': 1, 'L2': 2, 'L3': 3, 'L4': 4, 'L5': 5, "L6": 6, 'WM': 7})
adata_pb.obs.head()

Unnamed: 0,layer,sample_id,psbulk_cells,psbulk_counts,condition,lib_size,log_lib_size,layer_c
human_759_L1,L1,human_759,500.0,1400502.0,human,2801.004,7.937733,1
human_j12_L1,L1,human_j12,331.0,1168612.0,human,3530.549849,8.169209,1
human_j3_L1,L1,human_j3,110.0,536103.0,human,4873.663636,8.491601,1
human_j4_L1,L1,human_j4,238.0,1038124.0,human,4361.865546,8.380655,1
human_j6_L1,L1,human_j6,134.0,339404.0,human,2532.865672,7.837107,1


In [24]:
adata_pb.obs.to_csv(f'{results_dir}/annotation.csv')

# Convert pseudobulks for newborns

In [25]:
results_dir = "data/human_specific_genes"
os.makedirs(results_dir, exist_ok=True)

## Load pseudobulk files

In [26]:
adata_pb = sc.read_h5ad(pb_dir_path + 'pseudobulk_all_mammals.h5ad')
adata_pb = adata_pb[adata_pb.obs.sample_id != 'chimp_13302'].copy()
adata_pb

AnnData object with n_obs × n_vars = 89 × 9209
    obs: 'layer', 'sample_id', 'psbulk_cells', 'psbulk_counts', 'condition', 'lib_size', 'log_lib_size'
    var: 'feature_types', 'mt', 'hb', 'ribo'
    uns: 'X_pca_mean_norm', 'condition_colors', 'layer_colors', 'log1p', 'pca'
    obsm: 'X_pca', 'X_pca_mean_norm'
    varm: 'PCs', 'X_pca_mean_norm'
    layers: 'counts', 'mean_norm', 'psbulk_props'

## Create pseudobulk .csv files

Let's create `.csv` file with expressions

In [27]:
expression_df = pd.DataFrame(adata_pb.layers["counts"].T, index=adata_pb.var_names, columns=adata_pb.obs_names)
expression_df.head()

Unnamed: 0,human_759_L1,human_j12_L1,human_j3_L1,human_j4_L1,human_j6_L1,human_759_L2,human_j12_L2,human_j3_L2,human_j4_L2,human_j6_L2,...,macaque_0704_L5,macaque_nb0904_L5,macaque_0701_L6,macaque_0703_L6,macaque_0704_L6,macaque_nb0904_L6,macaque_0701_WM,macaque_0703_WM,macaque_0704_WM,macaque_nb0904_WM
DUSP18,0.008,0.039275,0.063636,0.021008,0.014925,0.017241,0.125714,0.183673,0.025641,0.091743,...,0.259259,0.044571,0.167883,0.200382,0.214984,0.032951,0.074074,0.085714,0.103896,0.011364
ANGPTL4,0.008,0.175227,0.018182,0.042017,0.052239,0.004926,0.348571,0.05102,0.029915,0.082569,...,0.015251,0.012571,0.10219,0.091603,0.052117,0.02149,0.160494,0.057143,0.038961,0.022727
NUP133,0.088,0.099698,0.327273,0.184874,0.059701,0.169951,0.342857,0.826531,0.452991,0.247706,...,0.394336,0.118857,0.20073,0.28626,0.257329,0.101719,0.123457,0.190476,0.155844,0.034091
FRY,0.22,0.329305,0.545455,0.60084,0.216418,0.662562,1.702857,2.683673,1.961538,0.816514,...,0.002179,0.004571,0.007299,0.005725,0.003257,0.005731,0.0,0.004762,0.012987,0.0
FKBP7,0.01,0.018127,0.045455,0.021008,0.029851,0.034483,0.028571,0.234694,0.059829,0.110092,...,0.084967,0.052571,0.047445,0.055344,0.071661,0.041547,0.049383,0.071429,0.090909,0.045455


In [28]:
expression_df.to_csv(f'{results_dir}/expression.csv')

In the same way let's create `.csv` file with annotation

In [29]:
# add continious layer annotation
adata_pb.obs['layer_c'] = adata_pb.obs.layer.map({'L1': 1, 'L2': 2, 'L3': 3, 'L4': 4, 'L5': 5, "L6": 6, 'WM': 7})
adata_pb.obs.head()

Unnamed: 0,layer,sample_id,psbulk_cells,psbulk_counts,condition,lib_size,log_lib_size,layer_c
human_759_L1,L1,human_759,500.0,1400502.0,human,2801.004,7.937733,1
human_j12_L1,L1,human_j12,331.0,1168612.0,human,3530.549849,8.169209,1
human_j3_L1,L1,human_j3,110.0,536103.0,human,4873.663636,8.491601,1
human_j4_L1,L1,human_j4,238.0,1038124.0,human,4361.865546,8.380655,1
human_j6_L1,L1,human_j6,134.0,339404.0,human,2532.865672,7.837107,1


In [30]:
adata_pb.obs.to_csv(f'{results_dir}/annotation.csv')