In [1]:
import networkx as nx
import pandas as pd

In [2]:
df_drug_protein = pd.read_csv("filtered_data/df_drug_protein_2(normalized).csv")
df_pairs_protein = pd.read_csv("filtered_data/df_pairs_protein(normalized).csv")

In [3]:
drug_nodes = set(
    df_pairs_protein["chemical_1"].unique().tolist()
    + df_pairs_protein["chemical_2"].tolist()
)
edges = [
    (
        row["chemical_1"],
        row["chemical_2"],
        {"z_weight": row["z_score_sum"], "min_max_weight": row["min_max_score_sum"]},
    )
    for _, row in df_pairs_protein.iterrows()
]

DP = nx.Graph()
DP.add_nodes_from(drug_nodes)
DP.add_edges_from(edges)
print(DP)

Graph with 1315 nodes and 66487 edges


In [4]:
dp_deg_centrality = nx.degree_centrality(DP)
dp_cc = nx.clustering(DP)
dp_pagerank = nx.pagerank(DP)
dp_closeness = nx.closeness_centrality(DP)
dp_betweenness = nx.betweenness_centrality(DP)

In [5]:
def add_node_features(df, feature_dict, feature_name):
    df[feature_name + "_1"] = df["chemical_1"].map(feature_dict)
    df[feature_name + "_2"] = df["chemical_2"].map(feature_dict)
    return df


df_pairs_protein = add_node_features(
    df_pairs_protein, dp_deg_centrality, "degree_centrality"
)
df_pairs_protein = add_node_features(df_pairs_protein, dp_cc, "clustering")
df_pairs_protein = add_node_features(df_pairs_protein, dp_pagerank, "pagerank")
df_pairs_protein = add_node_features(df_pairs_protein, dp_closeness, "closeness")
df_pairs_protein = add_node_features(df_pairs_protein, dp_betweenness, "betweenness")

In [6]:
df_pairs_protein.to_csv(
    "filtered_data/df_pairs_protein_3(network stats).csv", index=False
)

In [7]:
df_combinations = pd.read_csv("filtered_data/df_combinations_4(filtered).csv")
df_drug_cid = pd.read_csv("filtered_data/df_drug_cid.csv")
df_full = df_combinations.copy()

In [8]:
cid_dict = df_drug_cid.set_index('Drug')['cIds'].to_dict()

df_full['chemical_1'] = df_full['Drug1'].map(cid_dict)
df_full['chemical_2'] = df_full['Drug2'].map(cid_dict)

In [9]:
df_full_na = df_full.merge(df_pairs_protein, on=['chemical_1', 'chemical_2'],how='left')

df_full_large = df_full_na.fillna(0)

In [10]:
df_small_na = df_full.merge(df_pairs_protein, on=['chemical_1','chemical_2'],how='inner')

In [11]:
df_full_large.to_csv('training_data/df_large[drug-protein].csv',index=False)
df_small_na.to_csv('training_data/df_small[drug-protein].csv',index=False)
