In [1]:
import pandas as pd
import numpy as np
import ast

In [None]:
drug_gene_existing = pd.read_csv(r'drug_gene_exist.csv')
gene_gene_existing = pd.read_csv(r'gene_gene_exist.csv')
gene_dis_existing = pd.read_csv(r'gene_dis_exist.csv')
drug_gene_predicted = pd.read_csv(r'drug_gene_pred_001.csv')
gene_gene_predicted = pd.read_csv(r'gene_gene_pred_001.csv')

drug_gene = pd.concat([drug_gene_existing, drug_gene_predicted], ignore_index=True)
gene_gene = pd.concat([gene_gene_existing, gene_gene_predicted], ignore_index=True)
gene_dis = gene_dis_existing

all_drugs = sorted(set(drug_gene['Entity1']))
all_diseases = sorted(set(gene_dis['Entity2']))

In [None]:
def parse_path(path_str):
    return ast.literal_eval(path_str)

def compute_path_score(path_str):
    path = ast.literal_eval(path_str)
    scores = [step['Score'] for step in path]
    return sum(scores) / len(scores)

In [None]:
len2 = pd.read_csv(r'output_length2.csv')
len3 = pd.read_csv(r'output_length3.csv')
len4 = pd.read_csv(r'output_length4.csv')

In [37]:
len2['ParsedPath'] = len2['Path'].apply(parse_path)
len3['ParsedPath'] = len3['Path'].apply(parse_path)
len4['ParsedPath'] = len4['Path'].apply(parse_path)

In [39]:
for df in [len2, len3, len4]:
    df['PathScore'] = df['Path'].apply(compute_path_score)

In [None]:
len2.loc[len2['PathScore'] == 1.0, 'PathScore'] = 0.9999
len3.loc[len3['PathScore'] == 1.0, 'PathScore'] = 0.9999
len4.loc[len4['PathScore'] == 1.0, 'PathScore'] = 0.9999

In [None]:
all_paths = pd.concat([len2, len3, len4], ignore_index=True)
agg_df = all_paths.groupby(['StartDrug', 'EndDisease'])['PathScore'].mean().reset_index()
pivot_matrix = agg_df.pivot_table(index='StartDrug', columns='EndDisease', values='PathScore')
final_matrix = pd.DataFrame(0.0, index=all_drugs, columns=all_diseases)
final_matrix.update(pivot_matrix)

In [None]:
doid_mesh_df = pd.read_csv(r'HQ_DIR\source\DOID_MeSH_UMLS.csv', dtype=str)
doid_mesh_df = doid_mesh_df[['DOID', 'MeSH']].drop_duplicates()
doid_mesh_df['DOID'] = 'DOID:' + doid_mesh_df['DOID']
doid_to_mesh = doid_mesh_df.set_index('DOID')['MeSH'].to_dict()
new_columns = [doid_to_mesh.get(col, col) for col in final_matrix.columns]
final_matrix.columns = new_columns

                           D054990   D054429  C536911   D006223  D009202  \
PUBCHEM.COMPOUND:10013505      0.0  0.996306   0.0000  0.998004      0.0   
PUBCHEM.COMPOUND:100154        0.0  0.996097   0.0000  0.998319      0.0   
PUBCHEM.COMPOUND:10071196      0.0  0.000000   0.0000  0.000000      0.0   
PUBCHEM.COMPOUND:10074640      0.0  0.999900   0.9999  0.999900      0.0   
PUBCHEM.COMPOUND:10077130      0.0  0.000000   0.0000  0.000000      0.0   

                            D010673  D010235   D008527   D009190  D008223  \
PUBCHEM.COMPOUND:10013505  0.997768      0.0  0.997482  0.998134      0.0   
PUBCHEM.COMPOUND:100154    0.998576      0.0  0.997568  0.999251      0.0   
PUBCHEM.COMPOUND:10071196  0.000000      0.0  0.000000  0.000000      0.0   
PUBCHEM.COMPOUND:10074640  0.999900      0.0  0.000000  0.000000      0.0   
PUBCHEM.COMPOUND:10077130  0.000000      0.0  0.000000  0.000000      0.0   

                           ...   D002051  D000096142   D011087   D015470  \
PUBC

In [None]:
final_matrix.to_csv(r'drug_disease_pathscore_matrix.csv')