In [1]:
import pandas as pd

from datasets import load_all_orfs

In [2]:
df = pd.read_csv('../supplementary_tables/functional_predictions_filtered.tsv', sep='\t')
all_orfs = load_all_orfs()
updated = pd.read_csv('../data/external/all_yeast_gene_names_2025-10-07.tsv', 
                      sep='\t')

all_orfs['gene_name_current'] = all_orfs['orf_name'].map(
    updated.set_index('Gene > Systematic Name')['Gene > Standard Name']
)
all_orfs['gene_name_current_full'] = all_orfs['orf_name'].map(
    updated.set_index('Gene > Systematic Name')['Gene > Name']
)
all_orfs['description_current'] = all_orfs['orf_name'].map(
    updated.set_index('Gene > Systematic Name')['Gene > Mod Description']
)

gene_names = all_orfs.dropna(subset=['gene_name']).set_index('orf_name')['gene_name'].to_dict()
gene_names_current = all_orfs.dropna(subset=['gene_name_current']).set_index('orf_name')['gene_name_current'].to_dict()
gene_names_current_full = all_orfs.dropna(subset=['gene_name_current_full']).set_index('orf_name')['gene_name_current_full'].to_dict()
description_current = all_orfs.dropna(subset=['description_current']).set_index('orf_name')['description_current'].to_dict()
df['gene_name'] = df['orf_name'].map(gene_names)
df['gene_name_current'] = df['orf_name'].map(gene_names_current)
df['gene_name_current_full'] = df['orf_name'].map(gene_names_current_full)
df['description_current'] = df['orf_name'].map(description_current)
df['updated_name'] = (df['gene_name_current'].notnull()
                        & (df['gene_name'] != df['gene_name_current']))

In [3]:
print('\n'.join(df.loc[df['updated_name'],
    'gene_name_current'
].sort_values().unique()))

AAN1
CAL4
CSA1
PEX35
VPR1


In [4]:
gene = 'CSA1'
df.loc[(df['gene_name_current'] == gene)
                         & (df['z-score'] > 5)
                         & (df['n_interactors_with_GO_term'] >= 2), 
                         ['GO_ID', 'GO_term_name', 'n_interactors_with_GO_term', 'z-score', 'effect_size', 'dataset', 'n_proteins_with_GO_term_in_network', ]
                         ].sort_values('z-score', ascending=False)

Unnamed: 0,GO_ID,GO_term_name,n_interactors_with_GO_term,z-score,effect_size,dataset,n_proteins_with_GO_term_in_network
114,GO:0016021,membrane,3,6.09698,1.86202,Y2H-union-25,19


In [5]:
updated_genes = df.loc[df['updated_name'],
    'gene_name_current'
    ].sort_values().unique()
for gene in updated_genes:
    predictions = df.loc[(df['gene_name_current'] == gene)
                         & (df['z-score'] > 5)
                         & (df['n_interactors_with_GO_term'] >= 2)
                         , :].sort_values('z-score', ascending=False)
    if len(predictions) == 0:
        continue
    print()
    print()
    print(gene, '–', df.loc[df['gene_name_current'] == gene, 'gene_name_current_full'].iloc[0])
    print(df.loc[df['gene_name_current'] == gene, 'description_current'].iloc[0])
    print()
    print('Predictions:')
    print(predictions)



AAN1 – Actin, Aging and Nutrient modulator
Protein of unknown function; regulates actin cable stability and branched-chain amino acid (BCAA) metabolism; involved in mitochondrial quality control and longevity; proposed to be involved in resistance to streptozotocin and camptothecin; localizes to punctate cytosolic structures 

Predictions:
          GO_ID orf_name  n_proteins_with_GO_term_in_network  \
132  GO:0034553  YKL075C                                   2   
36   GO:0034553  YKL075C                                   2   
60   GO:0000480  YKL075C                                  12   
75   GO:0003724  YKL075C                                   7   
58   GO:0000447  YKL075C                                  19   
83   GO:0005759  YKL075C                                  31   
18   GO:0005759  YKL075C                                  23   

     n_interactors_with_GO_term  mean_in_random_networks  \
132                           2                    0.002   
36                     

In [6]:
df.loc[df['updated_name'], 'description_current'].value_counts()

Regulator of peroxisome abundance; peroxisomal membrane protein, remote homolog to several curvature-generating human proteins; functionally interacts with vesicle-budding-inducing ADP-ribosylation factor Arf1p                                                                                                                                                                                      13
Protein of unknown function                                                                                                                                                                                                                                                                                                                                                                              7
Protein of unknown function; regulates actin cable stability and branched-chain amino acid (BCAA) metabolism; involved in mitochondrial quality control and longevity; proposed to be involved in resistance to st

In [7]:
df.loc[df['updated_name']].sort_values(['gene_name_current', 'z-score'], ascending=[True, False])

Unnamed: 0,GO_ID,orf_name,n_proteins_with_GO_term_in_network,n_interactors_with_GO_term,mean_in_random_networks,std_in_random_networks,p-value,z-score,effect_size,dataset,GO_term_name,gene_name,gene_name_current,gene_name_current_full,description_current,updated_name
132,GO:0034553,YKL075C,2,2,0.002,0.044677,0.0,44.7214,1.90865,Y2H-union-25,mitochondrial respiratory chain complex II ass...,,AAN1,"Actin, Aging and Nutrient modulator",Protein of unknown function; regulates actin c...,True
36,GO:0034553,YKL075C,2,2,0.005,0.070534,0.0,28.2844,1.85393,YeRI,mitochondrial respiratory chain complex II ass...,,AAN1,"Actin, Aging and Nutrient modulator",Protein of unknown function; regulates actin c...,True
60,GO:0000480,YKL075C,12,2,0.031,0.173318,0.0,11.3606,1.62236,Y2H-union-25,endonucleolytic cleavage in 5'-ETS of tricistr...,,AAN1,"Actin, Aging and Nutrient modulator",Protein of unknown function; regulates actin c...,True
75,GO:0003724,YKL075C,7,2,0.035,0.189143,0.0005,10.389,1.58671,Y2H-union-25,RNA helicase activity,,AAN1,"Actin, Aging and Nutrient modulator",Protein of unknown function; regulates actin c...,True
58,GO:0000447,YKL075C,19,2,0.065,0.254509,0.001,7.60286,1.42598,Y2H-union-25,endonucleolytic cleavage in ITS1 to separate S...,,AAN1,"Actin, Aging and Nutrient modulator",Protein of unknown function; regulates actin c...,True
83,GO:0005759,YKL075C,31,2,0.103,0.313673,0.0015,6.04769,1.26965,Y2H-union-25,mitochondrial matrix,,AAN1,"Actin, Aging and Nutrient modulator",Protein of unknown function; regulates actin c...,True
18,GO:0005759,YKL075C,23,2,0.125,0.365205,0.006,5.1341,1.14459,YeRI,mitochondrial matrix,,AAN1,"Actin, Aging and Nutrient modulator",Protein of unknown function; regulates actin c...,True
121,GO:0030015,YJR011C,9,2,0.073,0.260137,0.0,7.40765,1.40673,Y2H-union-25,CCR4-NOT core complex,,CAL4,Caf130-Associated regulator of RpL4,Accessory component of the Ccr4-NOT complex; G...,True
69,GO:0002098,YJR011C,15,2,0.123,0.346224,0.003,5.42135,1.18455,Y2H-union-25,tRNA wobble uridine modification,,CAL4,Caf130-Associated regulator of RpL4,Accessory component of the Ccr4-NOT complex; G...,True
11,GO:0002098,YJR011C,14,2,0.131,0.354738,0.003,5.26868,1.15952,YeRI,tRNA wobble uridine modification,,CAL4,Caf130-Associated regulator of RpL4,Accessory component of the Ccr4-NOT complex; G...,True


In [8]:
df.loc[df['updated_name']].sort_values(['gene_name_current', 'z-score'], ascending=[True, False]).to_csv(
    '../output/functional_predictions_with_names.tsv', sep='\t', index=False
)