# Pseudobulk and Differential Expression

- This notebook explains how pseudobulk data and differential expression analysis was conducted to generate the volcano plot in Figure 4E
- Further, Figure 4F-G used the differential expression generated below to generate top biological pathways in the lung and liver using the Metascape tool. See the text for more information.  

In [None]:
import numpy as np #v1.26.4
import pandas as pd #v2.2.0
import anndata #v0.10.5.post1
import scanpy as sc #v1.9.8
import pydeseq2 #v0.4.10
import decoupler as dc #v1.6.0
import random  

c_iSeed = 6161904
np.random.seed(c_iSeed)
random.seed(c_iSeed)

In [None]:
from pydeseq2.dds import DeseqDataSet
from pydeseq2.ds import DeseqStats
from pydeseq2.utils import load_example_data

In [None]:
#Import annData hdf5
final_filtered_object = anndata.read_h5ad(filename=___) #Replace ___ with path to file "02_final_filtered_object.hdf5"

In [None]:
pseudo_data = dc.get_pseudobulk(
    final_filtered_object,
    sample_col='sample',
    groups_col=None,
    layer='raw_counts',
    mode='sum',
    min_cells=0,
    min_counts=0
)

In [None]:
dds = DeseqDataSet(adata=pseudo_data,
                  design_factors='organ',
                  refit_cooks=True)

In [None]:
dds.deseq2()

In [None]:
dds.fit_LFC()

In [None]:
stat_res = DeseqStats(dds)

In [None]:
stat_res.summary()