In [2]:
import pandas as pd
import numpy as np

## Load the dataframes

In [3]:
interaction_df = pd.read_csv("dataframes/interaction_data.csv")
filtered_df = pd.read_csv('dataframes/filtered_data.csv')
result_df = pd.read_csv('dataframes/result_data.csv')
protein_pairs_df = pd.read_csv('dataframes/protein_pairs_data.csv')

In [4]:
interaction_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,559,560,561,562,563,564,565,566,567,568
0,CNBP_HUMAN,EBP2_HUMAN,4.376997,0.77117,0.835097,0.866124,0.538495,0.699603,0.73422,0.451208,...,17.306808,12.228814,21.40473,30.915457,6.094113,22.627251,-0.284131,9.42179,3.108872,0.86
1,DTBP1_HUMAN,BL1S1_HUMAN,4.355378,0.771795,0.977514,1.020024,0.61578,0.720567,0.676516,0.444765,...,17.071913,12.099715,20.790263,31.274987,6.148244,23.480201,-0.339447,9.58187,3.242057,0.89
2,PLK1_HUMAN,PSA3_HUMAN,4.350983,0.921458,0.979215,1.043114,0.606984,0.733716,0.684657,0.4417,...,16.864708,12.034104,20.710379,31.093584,6.085997,23.083606,-0.336897,9.525408,3.279343,0.88
3,ATG3_HUMAN,ATG12_HUMAN,4.336443,0.912624,0.937119,0.993354,0.590773,0.731851,0.681708,0.44368,...,16.242498,11.658963,19.775409,30.778781,6.057785,23.317048,-0.548705,9.540261,3.43214,0.9
4,CSN8_HUMAN,CSN2_HUMAN,4.363747,0.942411,1.021622,1.081436,0.61185,0.73512,0.679697,0.439954,...,17.005705,12.355417,20.585439,31.799906,6.121988,23.693373,-0.373932,9.704056,3.273589,0.96


In [5]:
import networkx as nx

In [13]:

def create_graph(protein_pairs_df, weighted = True):
    # Create an empty graph
    G = nx.Graph()
    
    # Extract unique proteins from the DataFrame
    unique_proteins = set(protein_pairs_df['Protein1'].tolist() + protein_pairs_df['Protein2'].tolist())
    
    # Add unique proteins as nodes to the graph
    for protein in unique_proteins:
        G.add_node(protein)
    
    # Iterate over each row in the DataFrame
    for _, row in protein_pairs_df.iterrows():
        protein1 = row['Protein1']
        protein2 = row['Protein2']
        interaction_probability = row['InteractionProbability']
        weight = interaction_probability
        if not weighted: weight = 1
        # Add an edge between protein1 and protein2 with weight = interaction probability
        G.add_edge(protein1, protein2, weight=weight)
    
    return G


In [14]:
protein_pairs_df.head()

Unnamed: 0,Protein1,Protein2,InteractionProbability
0,CNBP_HUMAN,EBP2_HUMAN,0.86
1,DTBP1_HUMAN,BL1S1_HUMAN,0.89
2,PLK1_HUMAN,PSA3_HUMAN,0.88
3,ATG3_HUMAN,ATG12_HUMAN,0.9
4,CSN8_HUMAN,CSN2_HUMAN,0.96


In [15]:

graph = create_graph(protein_pairs_df, weighted=False)


In [16]:
nx.write_graphml(graph, "graph/graph_file.graphml")

In [17]:
print(f"Number or nodes = {graph.number_of_nodes()} and Number of edges = {graph.number_of_edges()}")

Number or nodes = 4359 and Number of edges = 6497
