In [None]:
import pandas as pd
import networkx as nx

In [None]:
go = pd.read_csv('../preprocessed_data/go_20250312.csv')
go['Gene_list'] = go['Genes'].str.split(',\s*').apply(lambda x: [gene.strip() for gene in x])

In [None]:
Target_list = pd.read_csv('../preprocessed_data/Target_list_selected.csv', index_col=None)['Proteins'].tolist()
Target_set = set(Target_list)
go['Target_count'] = go['Gene_list'].apply(lambda x: len(set(x) & Target_set))

go['Target_list'] = go['Gene_list'].apply(
    lambda x: list(set(x) & Target_set)
)

In [None]:
go = go[go['Target_count'] > 1]
go = go[['GOID', 'TERM', 'Ontology', 'Target_count', 'Target_list']]
go_id_to_num = dict(zip(go['GOID'], go['Target_count']))

In [None]:
go.columns

In [None]:
go_all_relationships = pd.read_csv('../preprocessed_data/go_all_relationships.csv').dropna()

In [None]:
def build_ontology_graph(df):

    G = nx.DiGraph()
    
    for _, row in df.iterrows():
        parent = row['parent']
        child = row['go_id']
        parent_ontology = row['parent_ontology']
        child_ontology = row['child_ontology']
        
        G.add_node(parent, ontology_type=parent_ontology)
        G.add_node(child, ontology_type=child_ontology)
        
        G.add_edge(parent, child)
    
    return G

graph = build_ontology_graph(go_all_relationships)

In [None]:
in_degree_dict = dict(graph.in_degree())
out_degree_dict = dict(graph.out_degree())

go["in_degree"] = go["GOID"].map(in_degree_dict).fillna(0).astype(int)
go["out_degree"] = go["GOID"].map(out_degree_dict).fillna(0).astype(int)

In [None]:
go = go[go['out_degree'] == 0]
Target_dict = pd.read_pickle('../preprocessed_data/Target_dict.pkl')

In [None]:
go_list = sorted(list(set(go['GOID'].unique().tolist())))
go_dict = {key: value+7854+311 for value, key in enumerate(go_list)}

In [None]:
print(len(go_list))

In [None]:
rows = []

ontology_to_interaction = {
    'BP': (17, 14),
    'MF': (18, 15),
    'CC': (19, 16)
}

for index, row in go.iterrows():
    goid = row['GOID']
    ontology = row['Ontology']
    targets = row['Target_list']
    
    mapped_go = go_dict.get(goid)
    if not mapped_go or ontology not in ontology_to_interaction:
        continue
    
    interaction_forward, interaction_backward = ontology_to_interaction[ontology]
    
    for target in targets:
        mapped_target = Target_dict.get(target)
        if not mapped_target:
            continue
        
        rows.append({
            'node1': mapped_go,
            'node2': mapped_target,
            'interaction': interaction_forward
        })
        
        rows.append({
            'node1': mapped_target,
            'node2': mapped_go,
            'interaction': interaction_backward
        })

df = pd.DataFrame(rows, columns=['node1', 'node2', 'interaction'])

In [None]:
kg2 = pd.read_csv('../preprocessed_data/kg_v2.csv', index_col=None)
kg3 = pd.concat([kg2, df], axis=0)

In [None]:
kg3['interaction'].value_counts()

In [None]:
kg3.to_csv('../preprocessed_data/kg_v3.csv', index=None)