In [1]:
from goatools import obo_parser
from zipfile import ZipFile

import os
import gzip
import graco
import shutil
import requests
import numpy as np
import pandas as pd
import networkx as nx
import Bio.UniProt.GOA as GOA

In [2]:
pd.set_option("display.max_columns", 50)
pd.set_option("display.max_rows", 50)

DATA_DIRECTORY = "/media/clusterduck123/joe/data/"
RAW_DATA_DIRECTORY = f"{DATA_DIRECTORY}/raw_data"
YEAST_DIRECTORY = f"{DATA_DIRECTORY}/processed_data/yeast"
NETWORK_DIRECTORY = f"{YEAST_DIRECTORY}/networks"
ANNOTATION_DIRECTORY = f"{YEAST_DIRECTORY}/annotations"

# Downloads

## COEXPRESdb

In [3]:
if not os.path.exists(RAW_DATA_DIRECTORY):
     os.makedirs(directory)

COEXPRESdb_FILENAME = f"Sce-u.v18-11.G5879-S4798.combat_pca_subagging.mrgeo.d.zip"
COEXPRESdb_URL = f"https://coxpresdb.jp/download/Sce-u.c2-0/coex/"
COEXPRESdb_FILEPATH = f"{RAW_DATA_DIRECTORY}/{COEXPRESdb_FILENAME}" 

In [4]:
# Download
r = requests.get(f"{COEXPRESdb_URL}", allow_redirects=True)
with open(COEXPRESdb_FILEPATH, 'wb') as f:
    f.write(r.content)

In [8]:
# Unzip folder

COEXPRESdb_FOLDER = "Sce-u.v18-11.G5879-S4798.combat_pca_subagging.mrgeo.d"
with ZipFile(COEXPRESdb_FILEPATH, 'r') as z:
    z.extractall(RAW_DATA_DIRECTORY)

## BioGRID

In [3]:
if not os.path.exists(RAW_DATA_DIRECTORY):
     os.makedirs(directory)

VERSION = "3.5.181"
BioGRID_FILENAME = f"BIOGRID-ORGANISM-{VERSION}.tab2.zip"
BioGRID_URL = f"https://downloads.thebiogrid.org/Download/BioGRID/Release-Archive/BIOGRID-{VERSION}"
BioGRID_FILEPATH = f"{RAW_DATA_DIRECTORY}/{BioGRID_FILENAME}" 

In [4]:
# Download
r = requests.get(f"{BioGRID_URL}/{BioGRID_FILENAME}", allow_redirects=True)
with open(BioGRID_FILEPATH, 'wb') as f:
    f.write(r.content)

### select yeast

In [5]:
# Unzip Saccharomyces cerevisiae file
with ZipFile(BioGRID_FILEPATH, 'r') as z:
    BioGRID_sc_FILENAME, = [name for name in z.namelist() if 'cerevisiae' in name.lower()]
    z.extract(BioGRID_sc_FILENAME, RAW_DATA_DIRECTORY)

## SGD annotations

In [6]:
SGD_FILENAME = "sgd.gaf.gz"
SGD_URL = "http://current.geneontology.org/annotations"
SGD_FILEPATH = f"{RAW_DATA_DIRECTORY}/{SGD_FILENAME}"

In [7]:
# Download
r = requests.get(f"{SGD_URL}/{SGD_FILENAME}", allow_redirects=True)
with open(SGD_FILEPATH, 'wb') as f:
    f.write(r.content)

## GO basic

In [10]:
GO_FILENAME = "go-basic.obo"
GO_URL = "http://purl.obolibrary.org/obo/go"
GO_FILEPATH = f"{RAW_DATA_DIRECTORY}/{GO_FILENAME}"

In [11]:
# Download
r = requests.get(f"{GO_URL}/{GO_FILENAME}", allow_redirects=True)
with open(GO_FILEPATH, 'wb') as f:
    f.write(r.content)

## DEG

In [3]:
DEG_FILENAME = "deg-e-15.2.zip"
DEG_URL = f"http://origin.tubic.org/deg/public/static/download/{DEG_FILENAME}"
DEG_FILEPATH = f"{RAW_DATA_DIRECTORY}/{DEG_FILENAME}"

In [4]:
# Download
r = requests.get(f"{DEG_URL}", allow_redirects=True)
with open(DEG_FILEPATH, 'wb') as f:
    f.write(r.content)

### select yeast

In [5]:
# Unzip Saccharomyces cerevisiae file
with ZipFile(DEG_FILEPATH, 'r') as z:
    DEG_ANN_FILENAME, = ['degannotation-e.dat']
    z.extract(DEG_ANN_FILENAME, RAW_DATA_DIRECTORY)

# Full PPI (BioGRID)

### Load BioGRID as dataframe

In [3]:
VERSION = "3.5.181"

# load BioGRID file as dataframe
organism_FILENAME = f"BIOGRID-ORGANISM-Saccharomyces_cerevisiae_S288c-{VERSION}.tab2.txt"
organism_FILEPATH = f"{RAW_DATA_DIRECTORY}/{organism_FILENAME}"

# remove entrez_id column and score_column because of mixed datatypes 
BioGRID_df = pd.read_csv(organism_FILEPATH, delimiter='\t',
                           usecols=[index for index in range(24) if index not in {1,2,18}])

### Flilter for evidence code

In [4]:
# Filter for (reliable) physical interactions
EXPERIMENTAL_SYSTEM = {'Two-hybrid', 
                       'Affinity Capture-Luminescence',
                       'Affinity Capture-MS', 
                       'Affinity Capture-RNA', 
                       'Affinity Capture-Western'}

physical_interaction_df = BioGRID_df[BioGRID_df['Experimental System'].isin(EXPERIMENTAL_SYSTEM)]

# Check if all evidence codes measure physical interactions
EXPERIMENTAL_SYSTEM_TYPE = {'physical'} 
assert physical_interaction_df['Experimental System Type'].isin(EXPERIMENTAL_SYSTEM_TYPE).all()

### Define PPI

In [5]:
# Reduce PPI to simple network
PPI_nx = nx.from_pandas_edgelist(physical_interaction_df,
                                 'BioGRID ID Interactor A', 
                                 'BioGRID ID Interactor B')
PPI_nx.remove_edges_from(nx.selfloop_edges(PPI_nx))

### Summary

In [6]:
print(f"PPI nodes  : {PPI_nx.number_of_nodes():>6d}")
print(f"PPI edges  : {PPI_nx.number_of_edges():>6d}")
print(f"PPI density: {nx.density(PPI_nx):.4f}")

PPI nodes  :   6715
PPI edges  :  96618
PPI density: 0.0043


### Save

In [7]:
if not os.path.exists(NETWORK_DIRECTORY):
    os.makedirs(NETWORK_DIRECTORY)

nx.write_edgelist(PPI_nx, f"{NETWORK_DIRECTORY}/full_PPI_BioGRID.txt", data=False)

# Systematic PPI (BioGRID)

### Define systematic dictionary

In [20]:
identifier_df = pd.read_csv(f"{RAW_DATA_DIRECTORY}/BIOGRID_SC_IDENTIFIERS-3.5.181.csv", index_col=0)
identifier_df = identifier_df[identifier_df.IDENTIFIER_TYPE == 'SYSTEMATIC NAME']
identifier_s = pd.Series(identifier_df.IDENTIFIER_VALUE.values, index=identifier_df.BIOGRID_ID)

### Define systematic PPI

In [21]:
PPI_nx = nx.read_edgelist(f"{NETWORK_DIRECTORY}/full_PPI_BioGRID.txt", nodetype=int)

sys_PPI_nx = nx.relabel_nodes(PPI_nx, identifier_s)

nodes = filter(lambda x:str(x).startswith('Y'),sys_PPI_nx.nodes())
sys_PPI_nx = nx.induced_subgraph(sys_PPI_nx, nodes)
sys_PPI_nx = nx.induced_subgraph(sys_PPI_nx,
                                 max(nx.connected_components(sys_PPI_nx), 
                                     key=len))

### Summary

In [22]:
print(f"      full PPI nodes  : {PPI_nx.number_of_nodes():>6d}")
print(f"      full PPI edges  : {PPI_nx.number_of_edges():>6d}")
print(f"      full PPI density: {nx.density(PPI_nx):.4f}")
print('------------------------------')
print(f"systematic PPI nodes  : {sys_PPI_nx.number_of_nodes():>6d}")
print(f"systematic PPI edges  : {sys_PPI_nx.number_of_edges():>6d}")
print(f"systematic PPI density: {nx.density(sys_PPI_nx):.4f}")

      full PPI nodes  :   6715
      full PPI edges  :  96618
      full PPI density: 0.0043
------------------------------
systematic PPI nodes  :   5726
systematic PPI edges  :  92974
systematic PPI density: 0.0057


### Save

In [23]:
if not os.path.exists(NETWORK_DIRECTORY):
    os.makedirs(NETWORK_DIRECTORY)

nx.write_edgelist(sys_PPI_nx, f"{NETWORK_DIRECTORY}/systematic_PPI_BioGRID.txt", data=False)

# Official PPI (BioGRID)

### Define official dictionary

In [14]:
identifier_df = pd.read_csv(f"{RAW_DATA_DIRECTORY}/BIOGRID_SC_IDENTIFIERS-3.5.181.csv", index_col=0)
identifier_df = identifier_df[identifier_df.IDENTIFIER_TYPE == 'OFFICIAL SYMBOL']
identifier_s = pd.Series(identifier_df.IDENTIFIER_VALUE.values, index=identifier_df.BIOGRID_ID)

### Define official PPI

In [15]:
PPI_nx = nx.read_edgelist(f"{NETWORK_DIRECTORY}/full_PPI_BioGRID.txt", nodetype=int)

off_PPI_nx = nx.relabel_nodes(PPI_nx, identifier_s)

nodes = filter(lambda x:isinstance(x,str), off_PPI_nx.nodes())
off_PPI_nx = nx.induced_subgraph(off_PPI_nx, nodes)
off_PPI_nx = nx.induced_subgraph(off_PPI_nx,
                                 max(nx.connected_components(off_PPI_nx), 
                                     key=len))

### Summary

In [17]:
print(f"    full PPI nodes  : {PPI_nx.number_of_nodes():>6d}")
print(f"    full PPI edges  : {PPI_nx.number_of_edges():>6d}")
print(f"    full PPI density: {nx.density(PPI_nx):.4f}")
print('------------------------------')
print(f"official PPI nodes  : {off_PPI_nx.number_of_nodes():>6d}")
print(f"official PPI edges  : {off_PPI_nx.number_of_edges():>6d}")
print(f"official PPI density: {nx.density(off_PPI_nx):.4f}")

    full PPI nodes  :   6715
    full PPI edges  :  96618
    full PPI density: 0.0043
------------------------------
official PPI nodes  :   5881
official PPI edges  :  93805
official PPI density: 0.0054


### Save

In [18]:
if not os.path.exists(NETWORK_DIRECTORY):
    os.makedirs(NETWORK_DIRECTORY)

nx.write_edgelist(off_PPI_nx, f"{NETWORK_DIRECTORY}/official_PPI_BioGRID.txt", data=False)

# Full CoEx (COEXPRESdb)

### Define CoEx

In [3]:
COEXPRESdb_DIRPATH = f"{RAW_DATA_DIRECTORY}/Sce-u.v18-11.G5879-S4798.combat_pca_subagging.mrgeo.d/"
TOP_PERCENTAGE = 0.01

edge_dict = dict()
for source in os.listdir(COEXPRESdb_DIRPATH):
    with open(COEXPRESdb_DIRPATH + source, 'r') as f:
        content = f.readlines()
    selected_lines = iter(content[1:int(len(content) * TOP_PERCENTAGE)+1])
    target_list   = map(lambda x:x.split()[0], selected_lines)
    edge_dict[source] = target_list

CoEx_nx = nx.Graph()
CoEx_nx.add_edges_from([(source,target) for source,target_list in edge_dict.items() 
                                  for target in target_list])

### Summary

In [4]:
print(f"CoEx nodes  : {CoEx_nx.number_of_nodes():>6d}")
print(f"CoEx edges  : {CoEx_nx.number_of_edges()}")
print(f"CoEx density: {nx.density(CoEx_nx):.4f}")

CoEx nodes  :   5879
CoEx edges  : 231974
CoEx density: 0.0134


### Save

In [5]:
if not os.path.exists(NETWORK_DIRECTORY):
    os.makedirs(NETWORK_DIRECTORY)

nx.write_edgelist(CoEx_nx, f"{NETWORK_DIRECTORY}/full_CoEx_COEXPRESdb.txt", data=False)

# Systematic CoEx (COEXPRESdb)

### Define systematic dictionary

In [3]:
identifiers = {'SYSTEMATIC NAME', 'ENTREZ_GENE'}
identifier_df = pd.read_csv(f"{RAW_DATA_DIRECTORY}/BIOGRID_SC_IDENTIFIERS-3.5.181.csv", index_col=0)
identifier_df = identifier_df[identifier_df.IDENTIFIER_TYPE.isin(identifiers)]

unmelted_identifier_df = identifier_df.pivot(index='BIOGRID_ID', columns='IDENTIFIER_TYPE')['IDENTIFIER_VALUE']
identifier_s = pd.Series(data  = unmelted_identifier_df['SYSTEMATIC NAME'].values, 
                         index = unmelted_identifier_df['ENTREZ_GENE'])

### Define systematic CoEx

In [9]:
CoEx_nx = nx.read_edgelist(f"{NETWORK_DIRECTORY}/full_CoEx_COEXPRESdb.txt")

sys_CoEx_nx = nx.relabel_nodes(CoEx_nx, identifier_s)

nodes = filter(lambda x:str(x).startswith('Y'),sys_CoEx_nx.nodes())
sys_CoEx_nx = nx.induced_subgraph(sys_CoEx_nx, nodes)
sys_CoEx_nx = nx.induced_subgraph(sys_CoEx_nx,
                                 max(nx.connected_components(sys_CoEx_nx), 
                                     key=len))

### Summary

In [12]:
print(f"      full CoEx nodes  : {CoEx_nx.number_of_nodes():>6d}")
print(f"      full CoEx edges  : {CoEx_nx.number_of_edges():>6d}")
print(f"      full CoEx density: {nx.density(CoEx_nx):.4f}")
print('------------------------------')
print(f"systematic CoEx nodes  : {sys_CoEx_nx.number_of_nodes():>6d}")
print(f"systematic CoEx edges  : {sys_CoEx_nx.number_of_edges():>6d}")
print(f"systematic CoEx density: {nx.density(sys_CoEx_nx):.4f}")

      full CoEx nodes  :   5879
      full CoEx edges  : 231974
      full CoEx density: 0.0134
------------------------------
systematic CoEx nodes  :   5696
systematic CoEx edges  : 222695
systematic CoEx density: 0.0137


### Save

In [16]:
if not os.path.exists(NETWORK_DIRECTORY):
    os.makedirs(NETWORK_DIRECTORY)

nx.write_edgelist(sys_CoEx_nx, f"{NETWORK_DIRECTORY}/systematic_CoEx_COEXPRESdb.txt", data=False)

# Systematic GI (Costanzo, 2016)

### Load raw data

In [3]:
GI_df = pd.read_csv(f"{RAW_DATA_DIRECTORY}/Work_Correlation.csv", 
                 delimiter=' ', 
                 index_col=1).drop('Unnamed: 0', axis=1)

### Define GI network

In [12]:
GI_nx = nx.from_pandas_adjacency(GI_df>=0.2)

#remove isolated edges
GI_nx.remove_edges_from([tuple(test) for test in nx.connected_components(GI_nx) if len(test) == 2])

# remove isolated nodes
GI_nx.remove_nodes_from(list(nx.isolates(GI_nx)))

print("Size of connected components: ", tuple(map(len,list(nx.connected_components(GI_nx)))))

Size of connected components:  (4628, 3, 3, 4, 3, 3, 5, 4, 3)


### Summary

In [16]:
print(f"GI nodes  : {GI_nx.number_of_nodes():>6d}")
print(f"GI edges  : {GI_nx.number_of_edges():>6d}")
print(f"GI density: {nx.density(GI_nx):.4f}")

GI nodes  :   4656
GI edges  :  30099
GI density: 0.0028


### Save

In [17]:
if not os.path.exists(NETWORK_DIRECTORY):
    os.makedirs(NETWORK_DIRECTORY)

nx.write_edgelist(GI_nx, f"{NETWORK_DIRECTORY}/GI_Constanzo2016.txt", data=False)

# Annotations

## SGD (S. cerevisiae)

In [3]:
SGD_FILENAME = "sgd.gaf.gz"
SGD_FILEPATH = f"{RAW_DATA_DIRECTORY}/{SGD_FILENAME}"

# load SGD gaf-file as dataframe 
with gzip.open(SGD_FILEPATH, 'rt') as gz:
    SGD_df = pd.DataFrame(annotation for annotation in GOA.gafiterator(gz))
    
# Define column with systematic gene names. In SGD this is always the first synonym.
SGD_df['Systematic_ID']  = SGD_df.Synonym.apply(lambda list_:list_[0])

In [5]:
# Filter for proteins
lc_protein_gaf_df = SGD_df[SGD_df['Systematic_ID'].str.startswith('Y')]

# Filter through evidence code
protein_gaf_df = lc_protein_gaf_df[lc_protein_gaf_df['Evidence'].isin(['EXP', 'IDA', 'IPI', 'IMP', 'IGI', 'IEP'])]

# Split into the three GOs
protein_BP_gaf_df = protein_gaf_df[protein_gaf_df['Aspect']=='P']
protein_MF_gaf_df = protein_gaf_df[protein_gaf_df['Aspect']=='F']
protein_CC_gaf_df = protein_gaf_df[protein_gaf_df['Aspect']=='C']

In [6]:
# Get rid of all unnecesarry columns in the GAFs
high_IC_annotations_df = protein_gaf_df[['Systematic_ID', 'GO_ID']].dropna().drop_duplicates()

# Split into the three GOs
high_IC_BP_annotations_df = protein_BP_gaf_df[['Systematic_ID', 'GO_ID']].dropna().drop_duplicates()
high_IC_MF_annotations_df = protein_MF_gaf_df[['Systematic_ID', 'GO_ID']].dropna().drop_duplicates()
high_IC_CC_annotations_df = protein_CC_gaf_df[['Systematic_ID', 'GO_ID']].dropna().drop_duplicates()

### GO DAG extention (Yeast2GO)

In [7]:
# Load obo and gaf files
GO_FILENAME = "go-basic.obo"
GO_FILEPATH = f"{RAW_DATA_DIRECTORY}/{GO_FILENAME}"

# Create annotations with all upstream terms in dict
go_dag = obo_parser.GODag(GO_FILEPATH)
go2parents = {go_id:{*go_dag[go_id].get_all_parents(), go_id} for go_id in go_dag.keys()}

/media/clusterduck123/joe/data//raw_data/go-basic.obo: fmt(1.2) rel(2019-10-07) 47,285 GO Terms


In [8]:
# Create full list/dataframe of annotations
all_annotations_list = [(gene_id, go_term, go_dag[go_term].level) 
                                for _, (gene_id, go_id) in high_IC_annotations_df.iterrows()
                                    for go_term in go2parents[go_id]]
all_annotations_df = pd.DataFrame(
                            all_annotations_list,  
                            columns = ['Systematic_ID', 'GO_ID', 'Level']).drop_duplicates()

# Split into the three GOs
all_BP_annotations_list = [entry for entry in all_annotations_list 
                               if go_dag[entry[1]].namespace == "biological_process"]
all_MF_annotations_list = [entry for entry in all_annotations_list 
                               if go_dag[entry[1]].namespace == "molecular_function"]
all_CC_annotations_list = [entry for entry in all_annotations_list 
                               if go_dag[entry[1]].namespace == "cellular_component"]

all_BP_annotations_df = pd.DataFrame(
                            all_BP_annotations_list,  
                            columns = ['Systematic_ID', 'GO_ID', 'Level']).drop_duplicates()
all_MF_annotations_df = pd.DataFrame(
                            all_MF_annotations_list,  
                            columns = ['Systematic_ID', 'GO_ID', 'Level']).drop_duplicates()
all_CC_annotations_df = pd.DataFrame(
                            all_CC_annotations_list,  
                            columns = ['Systematic_ID', 'GO_ID', 'Level']).drop_duplicates()

### Summary

In [9]:
print("SGD universe sizes:")
print("============================================")
print("Biological Process : " 
    f"{len(set(map(lambda x:x[0],all_BP_annotations_list)))} genes, "
    f"{len(set(map(lambda x:x[1],all_BP_annotations_list)))} GO-IDs ")
print("Molecular Functions: " 
    f"{len(set(map(lambda x:x[0],all_MF_annotations_list)))} genes, "
    f"{len(set(map(lambda x:x[1],all_MF_annotations_list)))} GO-IDs ")
print("Cellular Components: " 
    f"{len(set(map(lambda x:x[0],all_CC_annotations_list)))} genes, "
    f"{len(set(map(lambda x:x[1],all_CC_annotations_list)))}  GO-IDs ")
print('--------------------------------------------')
print("All annotations    : " 
    f"{len(set(map(lambda x:x[0],all_annotations_list)))} genes, "
    f"{len(set(map(lambda x:x[1],all_annotations_list)))} GO-IDs ")

SGD universe sizes:
Biological Process : 4534 genes, 4701 GO-IDs 
Molecular Functions: 3569 genes, 2117 GO-IDs 
Cellular Components: 4014 genes, 910  GO-IDs 
--------------------------------------------
All annotations    : 4969 genes, 7728 GO-IDs 


### Save

In [10]:
if not os.path.exists(ANNOTATION_DIRECTORY):
    os.makedirs(ANNOTATION_DIRECTORY)

all_annotations_df.to_csv(f"{ANNOTATION_DIRECTORY}/GO_all_systematic_SGD.csv", index=False)

all_BP_annotations_df.to_csv(f"{ANNOTATION_DIRECTORY}/GO_BP_systematic_SGD.csv", index=False)
all_MF_annotations_df.to_csv(f"{ANNOTATION_DIRECTORY}/GO_MF_systematic_SGD.csv", index=False)
all_CC_annotations_df.to_csv(f"{ANNOTATION_DIRECTORY}/GO_CC_systematic_SGD.csv", index=False)

## systematic BioGRID $\cap$ SGD

In [10]:
PPI_nx = nx.read_edgelist(f"{NETWORK_DIRECTORY}/systematic_PPI_BioGRID.txt")

all_annotations_df = pd.read_csv(f"{ANNOTATION_DIRECTORY}/GO_all_SGD.csv")

all_BP_annotations_df = pd.read_csv(f"{ANNOTATION_DIRECTORY}/GO_BP_systematic_SGD.csv")
all_MF_annotations_df = pd.read_csv(f"{ANNOTATION_DIRECTORY}/GO_MF_systematic_SGD.csv")
all_CC_annotations_df = pd.read_csv(f"{ANNOTATION_DIRECTORY}/GO_CC_systematic_SGD.csv")

In [11]:
PPI_annotations_df = all_annotations_df[all_annotations_df.Systematic_ID.isin(PPI_nx.nodes)]

PPI_BP_annotations_df = all_BP_annotations_df[all_BP_annotations_df.Systematic_ID.isin(PPI_nx.nodes)]
PPI_MF_annotations_df = all_MF_annotations_df[all_MF_annotations_df.Systematic_ID.isin(PPI_nx.nodes)]
PPI_CC_annotations_df = all_CC_annotations_df[all_CC_annotations_df.Systematic_ID.isin(PPI_nx.nodes)]

### Summary

In [12]:
print(r"SGD ∩ BioGRID universe sizes:")
print("============================================")
print("Biological Process : " 
    f"{len(set(PPI_BP_annotations_df.Systematic_ID))} genes, "
    f"{len(set(PPI_BP_annotations_df.GO_ID))} GO-IDs ")
print("Molecular Functions: " 
    f"{len(set(PPI_MF_annotations_df.Systematic_ID))} genes, "
    f"{len(set(PPI_MF_annotations_df.GO_ID))} GO-IDs ")
print("Cellular Components: " 
    f"{len(set(PPI_CC_annotations_df.Systematic_ID))} genes, "
    f"{len(set(PPI_CC_annotations_df.GO_ID))}  GO-IDs ")
print('--------------------------------------------')
print("All annotations    : " 
    f"{len(set(PPI_annotations_df.Systematic_ID))} genes, "
    f"{len(set(PPI_annotations_df.GO_ID))} GO-IDs ")

SGD ∩ BioGRID universe sizes:
Biological Process : 4498 genes, 4697 GO-IDs 
Molecular Functions: 3542 genes, 2109 GO-IDs 
Cellular Components: 3994 genes, 910  GO-IDs 
--------------------------------------------
All annotations    : 4925 genes, 7716 GO-IDs 


### Save

In [13]:
PPI_annotations_df.to_csv(f"{ANNOTATION_DIRECTORY}/GO_all_systematic_BioGRID-SGD.csv", index=False)

PPI_BP_annotations_df.to_csv(f"{ANNOTATION_DIRECTORY}/GO_BP_systematic_BioGRID-SGD.csv", index=False)
PPI_MF_annotations_df.to_csv(f"{ANNOTATION_DIRECTORY}/GO_MF_systematic_BioGRID-SGD.csv", index=False)
PPI_CC_annotations_df.to_csv(f"{ANNOTATION_DIRECTORY}/GO_CC_systematic_BioGRID-SGD.csv", index=False)