In [4]:
import pandas as pd
from itertools import combinations
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import joblib

In [5]:
df_drug_protein = pd.read_csv("filtered_data/df_drug_protein_2(normalized).csv")
df_combinations = pd.read_csv("filtered_data/df_combinations_4(filtered).csv")

### Drug-Protein Graph creation

Okay so once again the idea is to create a monopartite graph to capture interactions between drugs, joined by their sum of scores.


In [6]:
def calculate_pairs(group):
    chemicals = group["chemical"].values
    z_scores = group["z_score"].values
    mm_scores = group["min_max_score"].values
    results = []
    for (chem1, zscore1, mmscore1), (chem2, zscore2, mmscore2) in combinations(
        zip(chemicals, z_scores, mm_scores), 2
    ):
        zs = zscore1 + zscore2
        ms = mmscore1 + mmscore2
        results.append(
            {
                "chemical_1": chem1,
                "chemical_2": chem2,
                "protein": group.name,
                "z_score_sum": zs,
                "min_max_score_sum": ms,
            }
        )
    return pd.DataFrame(results)


df_common_pairs = (
    df_drug_protein.groupby("protein")[["chemical", "z_score", "min_max_score"]]
    .apply(calculate_pairs)
    .reset_index(drop=True)
)

df_common_pairs = (
    df_common_pairs.groupby(["chemical_1", "chemical_2"])
    .agg({"z_score_sum": "sum", "min_max_score_sum": "sum"})
    .reset_index()
)

minmax_scaler = MinMaxScaler()
standard_scaler = StandardScaler()

df_common_pairs[["z_score_sum"]] = standard_scaler.fit_transform(
    df_common_pairs[["z_score_sum"]]
)

df_common_pairs[["min_max_score_sum"]] = minmax_scaler.fit_transform(
    df_common_pairs[["min_max_score_sum"]]
)
threshold = df_common_pairs["min_max_score_sum"].quantile(0.90)
df_common_pairs_filtered = df_common_pairs[
    df_common_pairs["min_max_score_sum"] >= threshold
]
df_common_pairs_filtered.to_csv(
    "filtered_data/df_pairs_protein(normalized).csv", index=False
)

joblib.dump(minmax_scaler, 'scalers/3.1_minmax_grouped.save')
joblib.dump(standard_scaler, 'scalers/3.1_zscore_grouped.save')



['scalers/3.1_zscore_grouped.save']