In [1]:
import warnings
import scanpy as sc
import anndata as an
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns


import os
from tqdm.notebook import tqdm

sc.settings.set_figure_params(dpi=80)
#sc.set_figure_params(facecolor="white", figsize=(8, 8))
warnings.simplefilter(action='ignore', category=FutureWarning)
sc.settings.verbosity = 3

# Global variables

In [8]:
os.makedirs('results', exist_ok=True)

## Load pseudobulk files

In [5]:
pb_dir_path = '../data/pseudobulks/'

In [6]:
os.listdir('../data/pseudobulks/')

['4spe_pb_mean_filt.h5ad',
 '4spe_pb_mean_filt_nor_mean.h5ad',
 'pb_age.h5ad',
 'pb_age_sample_norm.h5ad',
 'pb_mammals.h5ad',
 'pb_mammals_filtered.h5ad',
 'pb_mammals_samplenorm_mean.h5ad',
 'pb_mammals_samplenorm_mean_filtered.h5ad',
 'pb_mammals_samplenorm_zscore.h5ad']

In [9]:
adata_pb= sc.read_h5ad(pb_dir_path + 'pb_mammals.h5ad')
adata_pb

AnnData object with n_obs × n_vars = 94 × 14963
    obs: 'layer', 'sample_id', 'psbulk_n_cells', 'psbulk_counts', 'condition'
    var: 'feature_types', 'mt', 'hb', 'ribo'
    layers: 'psbulk_props'

## Create pseudobulk .csv files

Let's create `.csv` file with expressions

In [16]:
expression_df = pd.DataFrame(adata_pb.X.T, index=adata_pb.var_names, columns=adata_pb.obs_names)
expression_df.head()

Unnamed: 0,chimp_11454_L1,chimp_13309_L1,chimp_j8_L1,human_759_L1,human_j12_L1,human_j3_L1,human_j4_L1,human_j6_L1,macaque_0701_L1,macaque_0703_L1,...,chimp_j8_WM,human_759_WM,human_j12_WM,human_j3_WM,human_j4_WM,human_j6_WM,macaque_0701_WM,macaque_0703_WM,macaque_0704_WM,macaque_nb0904_WM
A1BG,0.024752,0.004975,0.032663,0.144,0.145015,0.190909,0.252101,0.067164,0.003497,0.0,...,0.012605,0.186047,0.061433,0.12239,0.09078,0.048913,0.0,0.0,0.0,0.011364
A1CF,0.0,0.0,0.0,0.002,0.0,0.0,0.008403,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.001418,0.0,0.0,0.0,0.0,0.0
A2ML1,0.0,0.0,0.007538,0.01,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.003876,0.000683,0.00144,0.0,0.004076,0.0,0.0,0.0,0.0
A4GALT,0.0,0.0,0.0,0.004,0.009063,0.0,0.0,0.014925,0.034965,0.010204,...,0.0,0.0,0.00273,0.006479,0.002837,0.004076,0.037037,0.019048,0.025974,0.011364
AAAS,0.059406,0.0199,0.050251,0.056,0.048338,0.036364,0.063025,0.074627,0.066434,0.035714,...,0.028011,0.034884,0.025256,0.030238,0.034043,0.033967,0.018519,0.042857,0.025974,0.0


In [17]:
expression_df.to_csv('results/expression.csv')

In the same way let's create `.csv` file with annotation

In [21]:
adata_pb.obs.head()

Unnamed: 0,layer,sample_id,psbulk_n_cells,psbulk_counts,condition
chimp_11454_L1,L1,chimp_11454,202.0,360976.0,chimp
chimp_13309_L1,L1,chimp_13309,201.0,313326.0,chimp
chimp_j8_L1,L1,chimp_j8,398.0,862771.0,chimp
human_759_L1,L1,human_759,500.0,961453.0,human
human_j12_L1,L1,human_j12,331.0,827957.0,human


In [20]:
adata_pb.obs.to_csv('results/annotation.csv')