# Emma Pan Neuro (Control + ND75KD) - AUCell Oxphos scoring
*This script is to be run before the Rmd script, for computing the AUC values with pySCENIC* 

**Author:** Vincent Gardeux

**Date Created:** 03/04/2024
**Date Last Modified:** 12/02/2025

In [1]:
# import dependencies
import pandas as pd
import h5py
from ctxcore.genesig import GeneSignature
from pyscenic.aucell import create_rankings, enrichment

In [5]:
GENE_SIGNATURE_FNAME = './data/Oxphos_genes.xlsx'
GENE_SIGNATURE_SNAME = '68 OXPHOS genes'
EXPRESSION_MTX_FNAME = './data/Pan_neuro_integrated.loom' # Gene expression as (cell, gene) - matrix.

In [6]:
data_excel = pd.read_excel(io = GENE_SIGNATURE_FNAME, sheet_name = GENE_SIGNATURE_SNAME)
data_excel

Unnamed: 0,Complex,Complex_Name,Complex_Subunit,Gene_Name,Gene_CG,Gene_Ensembl_ID,Gene_Symbol,Gene_Symbol_clean
0,I,NADH ubiquinone oxidoreductase,34,13 kDa A,CG8680,FBgn0031684,ND-13A,ND-13A
1,I,NADH ubiquinone oxidoreductase,34,13 kDa B,CG6463,FBgn0047038,ND-13B,ND-13B
2,I,NADH ubiquinone oxidoreductase,34,15 kDa,CG11455,FBgn0031228,ND-15,ND-15
3,I,NADH ubiquinone oxidoreductase,34,18 kDa,CG12203,FBgn0031021,ND-18,ND-18
4,I,NADH ubiquinone oxidoreductase,34,19 kDa,CG3683,FBgn0035046,ND-19,ND-19
...,...,...,...,...,...,...,...,...
63,V,F0/F1 ATP synthase,13,F,CG4692,FBgn0035032,ATPsynF,ATPsynF
64,V,F0/F1 ATP synthase,13,G,CG6105,FBgn0010612,ATPsynG,ATPsynG
65,V,F0/F1 ATP synthase,13,Coupling factor 6,CG4412,FBgn0016119,ATPsynCF6,ATPsynCF6
66,V,F0/F1 ATP synthase,13,Lipid-binding protein,CG1746,FBgn0039830,ATPsynC,ATPsynC


In [7]:
#gs = GeneSignature('Flybase - GO - Oxidative phosphorylation', data_excel["Gene_Ensembl_ID"].tolist())
gs = GeneSignature('Flybase - GO - Oxidative phosphorylation', data_excel["Gene_Ensembl_ID"].tolist())
gs

GeneSignature(name='Flybase - GO - Oxidative phosphorylation', gene2weight=frozendict.frozendict({'FBgn0031684': 1.0, 'FBgn0047038': 1.0, 'FBgn0031228': 1.0, 'FBgn0031021': 1.0, 'FBgn0035046': 1.0, 'FBgn0030718': 1.0, 'FBgn0017567': 1.0, 'FBgn0030853': 1.0, 'FBgn0266582': 1.0, 'FBgn0037001': 1.0, 'FBgn0019957': 1.0, 'FBgn0039909': 1.0, 'FBgn0031771': 1.0, 'FBgn0017566': 1.0, 'FBgn0040705': 1.0, 'FBgn0034645': 1.0, 'FBgn0033570': 1.0, 'FBgn0025839': 1.0, 'FBgn0031505': 1.0, 'FBgn0034576': 1.0, 'FBgn0033961': 1.0, 'FBgn0029868': 1.0, 'FBgn0001989': 1.0, 'FBgn0031436': 1.0, 'FBgn0030605': 1.0, 'FBgn0032511': 1.0, 'FBgn0011361': 1.0, 'FBgn0029888': 1.0, 'FBgn0052230': 1.0, 'FBgn0029971': 1.0, 'FBgn0021967': 1.0, 'FBgn0011455': 1.0, 'FBgn0058002': 1.0, 'FBgn0085468': 1.0, 'FBgn0261439': 1.0, 'FBgn0014028': 1.0, 'FBgn0037873': 1.0, 'FBgn0039112': 1.0, 'FBgn0034245': 1.0, 'FBgn0011227': 1.0, 'FBgn0260008': 1.0, 'FBgn0030733': 1.0, 'FBgn0021906': 1.0, 'FBgn0035600': 1.0, 'FBgn0038271': 1.0, 'F

In [8]:
# Open Loom file in reading mode
f = h5py.File(EXPRESSION_MTX_FNAME, 'r')
m = f["/matrix"][:,:]
gene_names = f["/row_attrs/Accession"].asstr()[:]
cell_names = f["/col_attrs/CellID"].asstr()[:]
f.close()

m

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [9]:
m.shape

(23932, 23179)

In [10]:
cell_names

array(['AAACCCAAGGTGATAT-1_ctrl', 'AAACCCACAAATAGCA-1_ctrl',
       'AAACCCACAACAAAGT-1_ctrl', ..., 'TTTGTTGTCCTTCAGC-1_ndkd',
       'TTTGTTGTCGAACGCC-1_ndkd', 'TTTGTTGTCGTGTTCC-1_ndkd'], dtype=object)

In [11]:
len(cell_names)

23179

In [12]:
gene_names

array(['FBgn0250732', 'FBti0060344', 'FBgn0286036', ..., 'FBgn0085506',
       'FBgn0259870', 'FBgn0085511'], dtype=object)

In [13]:
len(gene_names)

23932

In [14]:
ex_matrix = pd.DataFrame(m, columns = cell_names, index = gene_names)
ex_matrix

Unnamed: 0,AAACCCAAGGTGATAT-1_ctrl,AAACCCACAAATAGCA-1_ctrl,AAACCCACAACAAAGT-1_ctrl,AAACCCACACTCATAG-1_ctrl,AAACCCACAGAGAGGG-1_ctrl,AAACCCACAGCCTATA-1_ctrl,AAACCCAGTACCTTCC-1_ctrl,AAACCCAGTACTGCCG-1_ctrl,AAACCCAGTGTTCGTA-1_ctrl,AAACCCAGTTCAGGTT-1_ctrl,...,TTTGTTGCAAGCACCC-1_ndkd,TTTGTTGCAGTTACCA-1_ndkd,TTTGTTGGTGAGATAT-1_ndkd,TTTGTTGGTGTCACAT-1_ndkd,TTTGTTGGTTAAGAAC-1_ndkd,TTTGTTGGTTTGGAGG-1_ndkd,TTTGTTGTCCGTTTCG-1_ndkd,TTTGTTGTCCTTCAGC-1_ndkd,TTTGTTGTCGAACGCC-1_ndkd,TTTGTTGTCGTGTTCC-1_ndkd
FBgn0250732,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FBti0060344,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FBgn0286036,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FBgn0037409,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FBgn0027948,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
FBgn0267595,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FBgn0259864,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FBgn0085506,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FBgn0259870,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
# Compute the intersection
intersection = list(set(gene_names).intersection(set(data_excel["Gene_Ensembl_ID"].tolist())))
print(intersection)

['FBgn0035046', 'FBgn0020235', 'FBgn0016691', 'FBgn0029888', 'FBgn0052230', 'FBgn0040773', 'FBgn0010217', 'FBgn0034877', 'FBgn0040529', 'FBgn0031505', 'FBgn0032511', 'FBgn0017567', 'FBgn0250814', 'FBgn0030853', 'FBgn0037001', 'FBgn0036728', 'FBgn0030718', 'FBgn0033961', 'FBgn0058002', 'FBgn0037873', 'FBgn0021906', 'FBgn0011211', 'FBgn0035032', 'FBgn0031021', 'FBgn0031066', 'FBgn0034576', 'FBgn0038224', 'FBgn0034645', 'FBgn0034245', 'FBgn0039830', 'FBgn0021967', 'FBgn0017566', 'FBgn0040705', 'FBgn0015031', 'FBgn0047038', 'FBgn0025839', 'FBgn0028342', 'FBgn0260008', 'FBgn0031830', 'FBgn0014391', 'FBgn0039909', 'FBgn0029868', 'FBgn0038271', 'FBgn0010612', 'FBgn0035600', 'FBgn0031771', 'FBgn0019957', 'FBgn0039112', 'FBgn0032833', 'FBgn0029971', 'FBgn0085468', 'FBgn0016120', 'FBgn0031436', 'FBgn0033570', 'FBgn0031228', 'FBgn0016119', 'FBgn0019624', 'FBgn0031684', 'FBgn0014028', 'FBgn0011361', 'FBgn0266582', 'FBgn0030605', 'FBgn0011455', 'FBgn0011227', 'FBgn0030733', 'FBgn0261439', 'FBgn0019

In [16]:
len(intersection)

68

In [17]:
# Run AUCell
# 1. Rankings (~2mn)
rnk_mtx = create_rankings(ex_matrix.transpose(), seed=42)

In [18]:
# 2. Enrichment (~5s)
aucs = enrichment(rnk_mtx, gs)
aucs

Unnamed: 0_level_0,Unnamed: 1_level_0,AUC
Cell,Regulon,Unnamed: 2_level_1
AAACCCAAGGTGATAT-1_ctrl,Flybase - GO - Oxidative phosphorylation,0.440722
AAACCCACAAATAGCA-1_ctrl,Flybase - GO - Oxidative phosphorylation,0.138840
AAACCCACAACAAAGT-1_ctrl,Flybase - GO - Oxidative phosphorylation,0.152084
AAACCCACACTCATAG-1_ctrl,Flybase - GO - Oxidative phosphorylation,0.223340
AAACCCACAGAGAGGG-1_ctrl,Flybase - GO - Oxidative phosphorylation,0.360865
...,...,...
TTTGTTGGTTTGGAGG-1_ndkd,Flybase - GO - Oxidative phosphorylation,0.261794
TTTGTTGTCCGTTTCG-1_ndkd,Flybase - GO - Oxidative phosphorylation,0.130940
TTTGTTGTCCTTCAGC-1_ndkd,Flybase - GO - Oxidative phosphorylation,0.112057
TTTGTTGTCGAACGCC-1_ndkd,Flybase - GO - Oxidative phosphorylation,0.108359


In [19]:
min(aucs["AUC"])

0.011020197552705292

In [20]:
max(aucs["AUC"])

0.7085974740773502

In [21]:
# Save DataFrame to TSV file
aucs.to_csv("./data/Pan_neuro_integrated_68_Oxphos_AUCell_auc.tsv", sep='\t', index=True)