In [1]:
import warnings
import scanpy as sc
import anndata as an
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns


import os
from tqdm.notebook import tqdm

sc.settings.set_figure_params(dpi=80)
#sc.set_figure_params(facecolor="white", figsize=(8, 8))
warnings.simplefilter(action='ignore', category=FutureWarning)
sc.settings.verbosity = 3

# Global variables

In [2]:
os.makedirs('results', exist_ok=True)

## Load pseudobulk files

In [3]:
pb_dir_path = '../data/pseudobulks/'

In [4]:
os.listdir('../data/pseudobulks/')

['4spe_pb_mean_filt.h5ad',
 '4spe_pb_mean_filt_nor_mean.h5ad',
 'pb_age.h5ad',
 'pb_age_sample_norm.h5ad',
 'pb_mammals.h5ad',
 'pb_mammals_filtered.h5ad',
 'pb_mammals_samplenorm_mean.h5ad',
 'pb_mammals_samplenorm_mean_filtered.h5ad',
 'pb_mammals_samplenorm_zscore.h5ad']

In [5]:
adata_pb = sc.read_h5ad(pb_dir_path + 'pb_mammals.h5ad')
adata_pb = adata_pb[adata_pb.obs.sample_id != 'chimp_13302'].copy()
adata_pb

AnnData object with n_obs × n_vars = 89 × 14963
    obs: 'layer', 'sample_id', 'psbulk_n_cells', 'psbulk_counts', 'condition'
    var: 'feature_types', 'mt', 'hb', 'ribo'
    layers: 'psbulk_props'

In [6]:
adata_pb_adult = sc.read_h5ad(pb_dir_path + 'pb_age.h5ad')
adata_pb_adult

AnnData object with n_obs × n_vars = 111 × 17919
    obs: 'layer', 'sample_id', 'psbulk_n_cells', 'psbulk_counts', 'condition'
    var: 'feature_types', 'mt', 'hb', 'ribo'
    layers: 'psbulk_props'

## Create pseudobulk .csv files

Let's create `.csv` file with expressions

In [7]:
expression_df = pd.DataFrame(adata_pb.X.T, index=adata_pb.var_names, columns=adata_pb.obs_names)
expression_df.head()

Unnamed: 0,chimp_11454_L1,chimp_13309_L1,chimp_j8_L1,human_759_L1,human_j12_L1,human_j3_L1,human_j4_L1,human_j6_L1,macaque_0701_L1,macaque_0703_L1,...,chimp_j8_WM,human_759_WM,human_j12_WM,human_j3_WM,human_j4_WM,human_j6_WM,macaque_0701_WM,macaque_0703_WM,macaque_0704_WM,macaque_nb0904_WM
A1BG,0.024752,0.004975,0.032663,0.144,0.145015,0.190909,0.252101,0.067164,0.003497,0.0,...,0.012605,0.186047,0.061433,0.12239,0.09078,0.048913,0.0,0.0,0.0,0.011364
A1CF,0.0,0.0,0.0,0.002,0.0,0.0,0.008403,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.001418,0.0,0.0,0.0,0.0,0.0
A2ML1,0.0,0.0,0.007538,0.01,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.003876,0.000683,0.00144,0.0,0.004076,0.0,0.0,0.0,0.0
A4GALT,0.0,0.0,0.0,0.004,0.009063,0.0,0.0,0.014925,0.034965,0.010204,...,0.0,0.0,0.00273,0.006479,0.002837,0.004076,0.037037,0.019048,0.025974,0.011364
AAAS,0.059406,0.0199,0.050251,0.056,0.048338,0.036364,0.063025,0.074627,0.066434,0.035714,...,0.028011,0.034884,0.025256,0.030238,0.034043,0.033967,0.018519,0.042857,0.025974,0.0


In [8]:
expression_df.to_csv('results/expression.csv')

In the same way let's create `.csv` file with annotation

In [9]:
# add continious layer annotation
adata_pb.obs['layer_c'] = adata_pb.obs.layer.map({'L1': 1, 'L2': 2, 'L3': 3, 'L4': 4, 'L5': 5, "L6": 6, 'WM': 7})
adata_pb.obs.head()

Unnamed: 0,layer,sample_id,psbulk_n_cells,psbulk_counts,condition,layer_c
chimp_11454_L1,L1,chimp_11454,202.0,360976.0,chimp,1
chimp_13309_L1,L1,chimp_13309,201.0,313326.0,chimp,1
chimp_j8_L1,L1,chimp_j8,398.0,862771.0,chimp,1
human_759_L1,L1,human_759,500.0,961453.0,human,1
human_j12_L1,L1,human_j12,331.0,827957.0,human,1


In [10]:
adata_pb.obs.to_csv('results/annotation.csv')

Let's do the same for adult human data

In [11]:
expression_df = pd.DataFrame(adata_pb_adult.X.T, index=adata_pb_adult.var_names, columns=adata_pb_adult.obs_names)
expression_df.head()

Unnamed: 0,151507_L1,151508_L1,151509_L1,151510_L1,151673_L1,151674_L1,151675_L1,151676_L1,human_759_L1,human_j12_L1,...,151672_WM,151673_WM,151674_WM,151675_WM,151676_WM,human_759_WM,human_j12_WM,human_j3_WM,human_j4_WM,human_j6_WM
A1BG,0.087879,0.049858,0.064204,0.067736,0.058824,0.132597,0.036036,0.071429,0.144,0.145015,...,0.046272,0.358,0.344828,0.221519,0.211946,0.186047,0.061433,0.12239,0.09078,0.048913
A1CF,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.002,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001418,0.0
A2M,0.272727,0.2849,0.260334,0.254902,0.352941,0.569061,0.27027,0.5,0.564,0.217523,...,0.092545,0.4,0.574713,0.311709,0.404624,0.414729,0.154266,0.277178,0.238298,0.192935
A2ML1,0.004545,0.012821,0.007036,0.002674,0.004525,0.005525,0.0,0.0,0.01,0.0,...,0.0,0.008,0.003284,0.003165,0.007707,0.003876,0.000683,0.00144,0.0,0.004076
A4GALT,0.006061,0.009972,0.009675,0.004456,0.004525,0.019337,0.013514,0.005102,0.004,0.009063,...,0.002571,0.008,0.024631,0.004747,0.00578,0.0,0.00273,0.006479,0.002837,0.004076


In [12]:
expression_df.to_csv('results/expression_adults.csv')

In the same way let's create `.csv` file with annotation

In [13]:
# add continious layer annotation
adata_pb_adult.obs['layer_c'] = adata_pb_adult.obs.layer.map({'L1': 1, 'L2': 2, 'L3': 3, 'L4': 4, 'L5': 5, "L6": 6, 'WM': 7})
adata_pb_adult.obs.head()

Unnamed: 0,layer,sample_id,psbulk_n_cells,psbulk_counts,condition,layer_c
151507_L1,L1,151507,660.0,952011.0,spatial_libd_human,1
151508_L1,L1,151508,702.0,831032.0,spatial_libd_human,1
151509_L1,L1,151509,1137.0,1593570.0,spatial_libd_human,1
151510_L1,L1,151510,1122.0,1484205.0,spatial_libd_human,1
151673_L1,L1,151673,221.0,399606.0,spatial_libd_human,1


In [14]:
adata_pb_adult.obs.to_csv('results/annotation_adult.csv')