# Emma Pan Neuro (Control + ND75KD) - AUCell Oxphos scoring
*This script is to be run before the Rmd script, for computing the AUC values with pySCENIC* 

**Author:** Vincent Gardeux

**Date Created:** 03/04/2024
**Date Last Modified:** 24/10/2024

In [1]:
# import dependencies
import pandas as pd
import h5py
from ctxcore.genesig import GeneSignature
from pyscenic.aucell import create_rankings, enrichment

In [2]:
GENE_SIGNATURE_FNAME = './data/Oxphos_genes.xlsx'
GENE_SIGNATURE_SNAME = 'Flybase - GO Oxidative Phosphor'
EXPRESSION_MTX_FNAME = './data/Pan_neuro_integrated.loom' # Gene expression as (cell, gene) - matrix.

In [3]:
data_excel = pd.read_excel(io = GENE_SIGNATURE_FNAME, sheet_name = GENE_SIGNATURE_SNAME)
data_excel

Unnamed: 0,Complex,Complex_Name,Complex_Subunit,Gene_Symbol_clean,Gene_Symbol,Gene_Name,Gene_CG,Scaffold,Gene_Ensembl_ID
0,GO-BP,Oxidative phosphorylation,117,ND-13B,ND-13B,NADH dehydrogenase (ubiquinone) 13 kDa B subunit,CG6463,3L,FBgn0047038
1,GO-BP,Oxidative phosphorylation,117,ATPsynF,ATPsynF,"ATP synthase, subunit F",CG4692,2R,FBgn0035032
2,GO-BP,Oxidative phosphorylation,117,ND-51L1,ND-51L1,NADH dehydrogenase (ubiquinone) 51 kDa subunit...,CG11423,2R,FBgn0034251
3,GO-BP,Oxidative phosphorylation,117,mt:ND5,mt:ND5,mitochondrial NADH-ubiquinone oxidoreductase c...,CG34083,mitochondrion_genome,FBgn0013684
4,GO-BP,Oxidative phosphorylation,117,Pink1,Pink1,PTEN-induced putative kinase 1,CG4523,X,FBgn0029891
...,...,...,...,...,...,...,...,...,...
112,GO-BP,Oxidative phosphorylation,117,UQCR-14L,UQCR-14L,Ubiquinol-cytochrome c reductase 14 kDa subuni...,CG17856,3R,FBgn0039576
113,GO-BP,Oxidative phosphorylation,117,COX5A,COX5A,Cytochrome c oxidase subunit 5A,CG14724,3R,FBgn0019624
114,GO-BP,Oxidative phosphorylation,117,ND-75,ND-75,NADH dehydrogenase (ubiquinone) 75 kDa subunit,CG2286,X,FBgn0017566
115,GO-BP,Oxidative phosphorylation,117,Nipsnap,Nipsnap,Nipsnap,CG9212,X,FBgn0030724


In [4]:
#gs = GeneSignature('Flybase - GO - Oxidative phosphorylation', data_excel["Gene_Ensembl_ID"].tolist())
gs = GeneSignature('Flybase - GO - Oxidative phosphorylation', data_excel["Gene_Ensembl_ID"].tolist())
gs

GeneSignature(name='Flybase - GO - Oxidative phosphorylation', gene2weight=frozendict.frozendict({'FBgn0047038': 1.0, 'FBgn0035032': 1.0, 'FBgn0034251': 1.0, 'FBgn0013684': 1.0, 'FBgn0029891': 1.0, 'FBgn0260747': 1.0, 'FBgn0027794': 1.0, 'FBgn0033961': 1.0, 'FBgn0034877': 1.0, 'FBgn0031831': 1.0, 'FBgn0284256': 1.0, 'FBgn0266582': 1.0, 'FBgn0036728': 1.0, 'FBgn0034645': 1.0, 'FBgn0030092': 1.0, 'FBgn0085468': 1.0, 'FBgn0085736': 1.0, 'FBgn0030292': 1.0, 'FBgn0044419': 1.0, 'FBgn0019957': 1.0, 'FBgn0051477': 1.0, 'FBgn0037001': 1.0, 'FBgn0031392': 1.0, 'FBgn0013678': 1.0, 'FBgn0262952': 1.0, 'FBgn0260008': 1.0, 'FBgn0040773': 1.0, 'FBgn0039651': 1.0, 'FBgn0037860': 1.0, 'FBgn0037248': 1.0, 'FBgn0035046': 1.0, 'FBgn0040931': 1.0, 'FBgn0039331': 1.0, 'FBgn0040793': 1.0, 'FBgn0037579': 1.0, 'FBgn0010217': 1.0, 'FBgn0038871': 1.0, 'FBgn0034245': 1.0, 'FBgn0001989': 1.0, 'FBgn0031021': 1.0, 'FBgn0033523': 1.0, 'FBgn0029868': 1.0, 'FBgn0031830': 1.0, 'FBgn0036706': 1.0, 'FBgn0034007': 1.0, 'F

In [5]:
# Open Loom file in reading mode
f = h5py.File(EXPRESSION_MTX_FNAME, 'r')
m = f["/matrix"][:,:]
gene_names = f["/row_attrs/Accession"].asstr()[:]
cell_names = f["/col_attrs/CellID"].asstr()[:]
f.close()

m

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [6]:
m.shape

(23932, 23179)

In [7]:
cell_names

array(['AAACCCAAGGTGATAT-1_ctrl', 'AAACCCACAAATAGCA-1_ctrl',
       'AAACCCACAACAAAGT-1_ctrl', ..., 'TTTGTTGTCCTTCAGC-1_ndkd',
       'TTTGTTGTCGAACGCC-1_ndkd', 'TTTGTTGTCGTGTTCC-1_ndkd'], dtype=object)

In [8]:
len(cell_names)

23179

In [9]:
gene_names

array(['FBgn0250732', 'FBti0060344', 'FBgn0286036', ..., 'FBgn0085506',
       'FBgn0259870', 'FBgn0085511'], dtype=object)

In [10]:
len(gene_names)

23932

In [11]:
ex_matrix = pd.DataFrame(m, columns = cell_names, index = gene_names)
ex_matrix

Unnamed: 0,AAACCCAAGGTGATAT-1_ctrl,AAACCCACAAATAGCA-1_ctrl,AAACCCACAACAAAGT-1_ctrl,AAACCCACACTCATAG-1_ctrl,AAACCCACAGAGAGGG-1_ctrl,AAACCCACAGCCTATA-1_ctrl,AAACCCAGTACCTTCC-1_ctrl,AAACCCAGTACTGCCG-1_ctrl,AAACCCAGTGTTCGTA-1_ctrl,AAACCCAGTTCAGGTT-1_ctrl,...,TTTGTTGCAAGCACCC-1_ndkd,TTTGTTGCAGTTACCA-1_ndkd,TTTGTTGGTGAGATAT-1_ndkd,TTTGTTGGTGTCACAT-1_ndkd,TTTGTTGGTTAAGAAC-1_ndkd,TTTGTTGGTTTGGAGG-1_ndkd,TTTGTTGTCCGTTTCG-1_ndkd,TTTGTTGTCCTTCAGC-1_ndkd,TTTGTTGTCGAACGCC-1_ndkd,TTTGTTGTCGTGTTCC-1_ndkd
FBgn0250732,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FBti0060344,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FBgn0286036,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FBgn0037409,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FBgn0027948,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
FBgn0267595,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FBgn0259864,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FBgn0085506,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FBgn0259870,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
# Compute the intersection
intersection = list(set(gene_names).intersection(set(data_excel["Gene_Ensembl_ID"].tolist())))
print(intersection)

['FBgn0037873', 'FBgn0262842', 'FBgn0037544', 'FBgn0013684', 'FBgn0037001', 'FBgn0035046', 'FBgn0051477', 'FBgn0031228', 'FBgn0016691', 'FBgn0260747', 'FBgn0036568', 'FBgn0284248', 'FBgn0033274', 'FBgn0010217', 'FBgn0259722', 'FBgn0050093', 'FBgn0013681', 'FBgn0031771', 'FBgn0013685', 'FBgn0017566', 'FBgn0014028', 'FBgn0260008', 'FBgn0025839', 'FBgn0019624', 'FBgn0035032', 'FBgn0037506', 'FBgn0263911', 'FBgn0033570', 'FBgn0021906', 'FBgn0039651', 'FBgn0030975', 'FBgn0013678', 'FBgn0029971', 'FBgn0039576', 'FBgn0039112', 'FBgn0036706', 'FBgn0047038', 'FBgn0250814', 'FBgn0039802', 'FBgn0039909', 'FBgn0044419', 'FBgn0040529', 'FBgn0031021', 'FBgn0013683', 'FBgn0011227', 'FBgn0013676', 'FBgn0036830', 'FBgn0265342', 'FBgn0031684', 'FBgn0030853', 'FBgn0037248', 'FBgn0031505', 'FBgn0037860', 'FBgn0058002', 'FBgn0030605', 'FBgn0032511', 'FBgn0037172', 'FBgn0085201', 'FBgn0031263', 'FBgn0013675', 'FBgn0033961', 'FBgn0027794', 'FBgn0034251', 'FBgn0036728', 'FBgn0029891', 'FBgn0037579', 'FBgn0013

In [13]:
len(intersection)

117

In [14]:
# Run AUCell
# 1. Rankings (~2mn)
rnk_mtx = create_rankings(ex_matrix.transpose(), seed=42)

In [15]:
# 2. Enrichment (~5s)
aucs = enrichment(rnk_mtx, gs)
aucs

Unnamed: 0_level_0,Unnamed: 1_level_0,AUC
Cell,Regulon,Unnamed: 2_level_1
AAACCCAAGGTGATAT-1_ctrl,Flybase - GO - Oxidative phosphorylation,0.263051
AAACCCACAAATAGCA-1_ctrl,Flybase - GO - Oxidative phosphorylation,0.090718
AAACCCACAACAAAGT-1_ctrl,Flybase - GO - Oxidative phosphorylation,0.087534
AAACCCACACTCATAG-1_ctrl,Flybase - GO - Oxidative phosphorylation,0.137309
AAACCCACAGAGAGGG-1_ctrl,Flybase - GO - Oxidative phosphorylation,0.227185
...,...,...
TTTGTTGGTTTGGAGG-1_ndkd,Flybase - GO - Oxidative phosphorylation,0.165128
TTTGTTGTCCGTTTCG-1_ndkd,Flybase - GO - Oxidative phosphorylation,0.085534
TTTGTTGTCCTTCAGC-1_ndkd,Flybase - GO - Oxidative phosphorylation,0.070190
TTTGTTGTCGAACGCC-1_ndkd,Flybase - GO - Oxidative phosphorylation,0.080072


In [16]:
min(aucs["AUC"])

0.016801262415297502

In [17]:
max(aucs["AUC"])

0.450685117351784

In [18]:
# Save DataFrame to TSV file
aucs.to_csv("./data/Pan_neuro_integrated_117_Oxphos_AUCell_auc.tsv", sep='\t', index=True)