# Trm Signature Enrichment with AUCell in Decoupler
- This notebook contains the source code for calculating the AUCell scores for previously published tissue resident memory T cell signatures using decoupler as seen in Figure S4. Please refer to the figure legend and manuscript for signature references.
- The list of genes in the signatures of interest have been provided as an excel file named **"Signature_Gene_Lists"**. Please call this file for the object **dfSignatureTable**

In [None]:
import numpy as np #v1.26.4
import pandas as pd #v2.2.0
import anndata #v0.10.5.post1
import scanpy as sc #v1.9.8
import decoupler as dc #v1.6.0
import pybiomart #v0.2.0
import random  

c_iSeed = 6161904
np.random.seed(c_iSeed)
random.seed(c_iSeed)

In [None]:
#Import annData hdf5
final_filtered_object = anndata.read_h5ad(filename=___) #Replace ___ with path to file "02_final_filtered_object.hdf5"

In [None]:
# Import Gene Sets
dfSignatureTable = pd.read_excel(___) #Replace ___ with path to file "Signature_Gene_Lists.xlsx"

In [None]:
# Import Human Gene IDs from Ensembl
bmRhesus = pybiomart.Dataset(name='mmulatta_gene_ensembl',host='http://www.ensembl.org')
dfAttributes = bmRhesus.list_attributes()
dfAttributes[ (dfAttributes['name'].str.contains("olog")) & (dfAttributes['name'].str.contains("sapiens")) ]

dfEnsembl_To_Human = bmRhesus.query(attributes=['ensembl_gene_id',
                           'hsapiens_homolog_ensembl_gene',
                           'hsapiens_homolog_associated_gene_name'])
dfEnsembl_To_Human = dfEnsembl_To_Human.rename(columns={"Gene stable ID":"gene_ids",
                                                       "Human gene name":"genesymbol"})

In [None]:
# Find Gene Symbols in annData Object
missing_genes = dfSignatureTable.copy()
gene_list = final_filtered_object.var.index.tolist()
match = missing_genes['genesymbol'].isin(gene_list)
missing_genes['match'] = match
missing_genes = missing_genes.merge(dfEnsembl_To_Human,on='genesymbol',how='inner')
missing_genes = missing_genes.sort_values(by=['genesymbol'])
missing_gene_ids = missing_genes.copy()
missing_gene_ids = missing_gene_ids.query('match == False')
missing_gene_ids = missing_gene_ids['gene_ids'].tolist()

In [None]:
# Return List of Genes in the Signature List that are Unmapped to a Gene ID
unmatched_gene_list = final_filtered_object.var[final_filtered_object.var.index.isin(missing_gene_ids)]
unmatched_gene_list = unmatched_gene_list.index.tolist()

unmatched_gene_table = dfEnsembl_To_Human[dfEnsembl_To_Human['gene_ids'].isin(unmatched_gene_list)]
unmatched_gene_table = unmatched_gene_table.sort_values(by=['genesymbol'])

In [None]:
# Remove Duplicates from Table
unmatched_gene_table = unmatched_gene_table.drop_duplicates(subset=['gene_ids'])
unmatched_gene_table['genesymbol'] = unmatched_gene_table['genesymbol'].replace('XCL1','XCL2') # XCL1 is a duplicate
gene_id_list = unmatched_gene_table['gene_ids'].tolist()
gene_symbol_list = unmatched_gene_table['genesymbol'].tolist()

In [None]:
# Manually Rename Gene IDs
enrichment_annData = final_filtered_object.copy()
enrichment_annData.var['gene_name'] = enrichment_annData.var.index

for x, y in zip(gene_id_list, gene_symbol_list):
    enrichment_annData.var.loc[x,'gene_name'] = y

enrichment_annData.var = enrichment_annData.var.set_index('gene_name')

In [None]:
# Check that All Missing Genes Have Been Accounted For
unmatched_gene_list = enrichment_annData.var[enrichment_annData.var.index.isin(missing_gene_ids)]
unmatched_gene_list = unmatched_gene_list.index.tolist()
unmatched_gene_list

In [None]:
# Create Signatures List
signatures = dfSignatureTable['geneset'].unique().tolist()
signatures

In [None]:
# Run DecoupleR AUCell Analysis

all_annData_enrichment = enrichment_annData.copy()

for x in signatures:
    
    geneset_table = dfSignatureTable[(dfSignatureTable['geneset']== x)]
    geneset_table = geneset_table.drop_duplicates(subset=['geneset', 'genesymbol'])
    
    dc.run_aucell(mat=all_annData_enrichment,net= geneset_table, source='geneset',target='genesymbol',
            verbose=False, min_n = 0, use_raw=False)
    
    aucell_score = pd.DataFrame(all_annData_enrichment.obsm['aucell_estimate'], index=all_annData_enrichment.obs.index)
    all_annData_enrichment.obs = pd.concat([all_annData_enrichment.obs, aucell_score], axis=1)
    all_annData_enrichment.obs = all_annData_enrichment.obs.rename(columns={x: x + "_AUCell_Estimate"})

In [None]:
all_annData_enrichment.obs