In [2]:
import scanpy as sc
import pandas as pd
import numpy as np
import anndata
import scipy
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
warnings.simplefilter("ignore", UserWarning)
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

In [3]:
import importlib.util
import sys
spec = importlib.util.spec_from_file_location("module.name", "/nfs/team205/kk18/function/python/utils.py")
utils = importlib.util.module_from_spec(spec)
sys.modules["module.name"] = utils
spec.loader.exec_module(utils)

In [4]:
os.getcwd()

'/nfs/team205/kk18/notebooks/Foetal/Trisomy21/RNA/milo_Mar2025_refine-nhoods'

# Read in DEG results

In [5]:
deg_res = pd.read_csv('/nfs/team205/kk18/notebooks/Foetal/Trisomy21/RNA/milo_Mar2025_refine-nhoods/milo_nhood-reassigned_pseudobulk_edgeR_res_tri21-nhoods_vs_euploid-nhoods_finegrain.csv',
                          index_col=0)
print(deg_res.shape)
deg_res.head()

(42412, 7)


Unnamed: 0,gene_name,logFC,logCPM,F,PValue,FDR,celltype
1,ARL17B,-1.745741,6.847549,94.587301,5.74302e-15,2.15995e-11,AtrialCardiomyocytesRight
2,NEXN,-1.570495,10.283666,73.534824,8.173152e-13,1.222062e-09,AtrialCardiomyocytesRight
3,FHL2,1.364728,9.409786,72.871337,9.747901e-13,1.222062e-09,AtrialCardiomyocytesRight
4,CALD1,-1.808502,11.626061,67.648059,3.860583e-12,3.1994e-09,AtrialCardiomyocytesRight
5,MYH9,-1.932809,8.469364,67.324364,4.25339e-12,3.1994e-09,AtrialCardiomyocytesRight


# GSEA analysis

In [6]:
import blitzgsea as blitz

## Euploid-enriched nhoods vs Others

In [7]:
%%time
geneset_list = ['MSigDB_Hallmark_2020','KEGG_2021_Human','GO_Biological_Process_2023','Reactome_2022'] # 
deg_fdr_thresh = 0.1
deg_p_thresh = None

################################################
count1 = 0
for celltype in deg_res['celltype'].unique():
    print(celltype)
    # subset
    res_sub = deg_res[deg_res['celltype']==celltype]
    # filter or not based on stats of the DEtest
    if deg_fdr_thresh!=None:
        res_sub = res_sub[res_sub['FDR']<deg_fdr_thresh]
    elif deg_p_thresh!=None:
        res_sub = res_sub[res_sub['PValue']<deg_p_thresh]
    else:
        pass
    # GSEA
    if res_sub.shape[0]>=5:
        res_sub = res_sub[['gene_name','logFC']]
        res_sub.columns = [0,1]
        count2 = 0
        for i, geneset in enumerate(geneset_list):
            # use enrichr submodule to retrieve gene set library
            library = blitz.enrichr.get_library(geneset)
            # run enrichment analysis
            if __name__ == "__main__":  # make sure process is main, when run in a script it can cause errors otherwise
                result = blitz.gsea(res_sub, library)
                result = result.reset_index()
            if result.shape[0]>0:
                count2 += 1
                result['geneset'] = geneset
                # concatenate
                if count2==1:
                    result_concat = result.copy()
                else:
                    result_concat = pd.concat([result_concat,result])
            else:
                print(f'gsea, {geneset}: no results')
        if count2==0: # no results at all
            continue
    else:
        print(f'significant genes({list(res_sub["gene_name"])}) is less than 5')
        continue
    
    # concatenate
    count1 += 1
    result_concat['celltype'] = celltype
    if count1==1:
        result_concat_concat = result_concat.copy()
    else:
        result_concat_concat = pd.concat([result_concat_concat,result_concat])
    del result_concat

# save
result_concat_concat.to_csv('/nfs/team205/kk18/notebooks/Foetal/Trisomy21/RNA/milo_Mar2025_refine-nhoods/GSEA_trisomy21-vs-euploid-nhoods_finegrain.csv')
result_concat_concat

AtrialCardiomyocytesRight
AtrialCardiomyocytesLeft
CoronaryCapillaryEndothelialCells
EndocardialCells
SubEpicardialFibroblasts
GreatVesselAdventitialFibroblasts
GreatVesselSmoothMuscleCells
MacrophagesATF3pos
MacrophagesLYVE1pos
MyocardialInterstitialFibroblasts
VentricularCardiomyocytesCycling
VentricularCardiomyocytesLeftCompact
VentricularCardiomyocytesLeftTrabeculated
VentricularCardiomyocytesRightCompact
CPU times: user 21.6 s, sys: 1.09 s, total: 22.6 s
Wall time: 37.6 s


Unnamed: 0,Term,es,nes,pval,sidak,fdr,geneset_size,leading_edge,geneset,celltype
0,Hypoxia,-0.675073,-5.711375,1.120671e-08,4.594750e-07,4.594751e-07,34,"GYS1,PGK1,PPARGC1A,HK2,GBE1,PPP1R3C,VEGFA,SLC2...",MSigDB_Hallmark_2020,AtrialCardiomyocytesRight
1,Myogenesis,-0.498307,-4.349660,1.363488e-05,5.588778e-04,2.795151e-04,37,"SVIL,MYOM1,IFRD1,CNN3,SORBS1,CSRP3,PPP1R3C,ACT...",MSigDB_Hallmark_2020,AtrialCardiomyocytesRight
2,Mitotic Spindle,-0.443736,-4.240042,2.234783e-05,9.158517e-04,3.054204e-04,46,"NEDD9,FLNB,PDLIM5,VCL,MYH9,PXN,STK38L,AKAP13,S...",MSigDB_Hallmark_2020,AtrialCardiomyocytesRight
3,Glycolysis,-0.668281,-3.888252,1.009689e-04,4.131377e-03,1.034931e-03,25,"GYS1,FAM162A,PYGB,ERO1A,EGLN3,HOMER1,LDHA,GOT1...",MSigDB_Hallmark_2020,AtrialCardiomyocytesRight
4,Oxidative Phosphorylation,-0.572412,-3.168643,1.531524e-03,6.090686e-02,1.255850e-02,25,"DLST,MGST3,ATP5ME,AFG3L2,ETFDH,ACO2,SDHB,OGDH,...",MSigDB_Hallmark_2020,AtrialCardiomyocytesRight
...,...,...,...,...,...,...,...,...,...,...
416,Disassembly Of Destruction Complex And Recruit...,-0.213652,-0.010757,9.914170e-01,1.000000e+00,9.976947e-01,6,"CTNNB1,CSNK1A1,GSK3B,CAV1,PPP2R5A,PPP2R5C",Reactome_2022,VentricularCardiomyocytesRightCompact
417,Stimuli-sensing Channels R-HSA-2672351,-0.215302,-0.007630,9.939125e-01,1.000000e+00,9.976947e-01,10,"ANO5,CALM1,WWP1,WNK2,TRDN,TTYH3,UBC,TRPM7,TRPC...",Reactome_2022,VentricularCardiomyocytesRightCompact
418,Cellular Response To Heat Stress R-HSA-3371556,-0.144760,-0.006831,9.945497e-01,1.000000e+00,9.976947e-01,8,"VCP,HSPH1,GSK3B,DNAJB6,HSPA8,MAPKAPK2,BAG3,PTGES3",Reactome_2022,VentricularCardiomyocytesRightCompact
419,Signaling By NOTCH R-HSA-157118,-0.212737,-0.004456,9.964443e-01,1.000000e+00,9.976947e-01,14,"ATP2A2,HDAC5,NCOR2,JUN,ST3GAL4,TBL1X,UBC,B4GALT1",Reactome_2022,VentricularCardiomyocytesRightCompact
