# Implementation of Results section in manuscript

# Install/Import packages

#### Installation required only once

In [None]:
#pip install -r requirements.txt 

In [1]:
import pybel
from pybel.io.jupyter import to_jupyter
from utils import *


  from tqdm.autonotebook import tqdm


# Load previously cached KG file

In [2]:
infile = open('data/export/Monkeypox_KG_15jan.pkl','rb')
mpox_graph = pickle.load(infile)
infile.close()

# Get list of drugs that are used to treat "Virus Diseases"

In [3]:
query = ['Virus Diseases','Smallpox']

In [4]:
viral_drugs_kg = filter_graph(mpox_graph,query)
to_jupyter(viral_drugs_kg)

<IPython.core.display.Javascript object>

In [5]:
def getChemfromKG(mainGraph):

    chem_list = []
    for u, v, data in mainGraph.edges(data=True):
        
        if 'CHEMBL' in u.name:
            if u.name not in chem_list:
                chem_list.append(u.name)

        if 'CHEMBL' in v.name:
            if v.name not in chem_list:
                chem_list.append(v.name)
                
    return(chem_list)



In [6]:
viral_drugs = getChemfromKG(viral_drugs_kg)
viral_drugs

['CHEMBL50300',
 'CHEMBL116',
 'CHEMBL3137312',
 'CHEMBL1643',
 'CHEMBL61',
 'CHEMBL115',
 'CHEMBL1259059',
 'CHEMBL853',
 'CHEMBL163',
 'CHEMBL1257073',
 'CHEMBL584',
 'CHEMBL152',
 'CHEMBL57',
 'CHEMBL1490434',
 'CHEMBL318153']

# Function to retrieve chemicals in Phase IV, Modification to RetDrugInd function

In [7]:
def RetDrugInd_phase4(chemblIDs) -> dict:
    """Function to retrieve associated diseases from ChEMBL

    :param chemblIDs:
    :return:
    """
    getDrugInd = new_client.drug_indication

    drugIndList = []
    for chemblid in tqdm(chemblIDs, desc='Retrieving diseases from ChEMBL'):
        drugInd = getDrugInd.filter(
            molecule_chembl_id=chemblid
        ).only(['mesh_heading','max_phase_for_ind'])
        
        data = []
        
#         print(drugInd)
#         break
        
        for ind in drugInd:
            if int(ind.get('max_phase_for_ind')<4):
                continue
            data.append(ind)
                
        
        drugIndList.append(list(data))

    named_drugIndList = dict(zip(chemblIDs, drugIndList))
    named_drugIndList = {
        k: v
        for k, v in named_drugIndList.items()
        if v
    }
    return named_drugIndList

In [8]:
phase4_drugs = RetDrugInd_phase4(viral_drugs)
phase4_drugs


Retrieving diseases from ChEMBL: 100%|██████████| 15/15 [00:00<00:00, 289.45it/s]


{'CHEMBL116': [{'max_phase_for_ind': 4, 'mesh_heading': 'Virus Diseases'}],
 'CHEMBL3137312': [{'max_phase_for_ind': 4, 'mesh_heading': 'Virus Diseases'}],
 'CHEMBL1643': [{'max_phase_for_ind': 4,
   'mesh_heading': 'Hepatitis C, Chronic'},
  {'max_phase_for_ind': 4, 'mesh_heading': 'Liver Cirrhosis'},
  {'max_phase_for_ind': 4, 'mesh_heading': 'Virus Diseases'},
  {'max_phase_for_ind': 4, 'mesh_heading': 'Fibrosis'},
  {'max_phase_for_ind': 4, 'mesh_heading': 'Fibrosis'}],
 'CHEMBL61': [{'max_phase_for_ind': 4, 'mesh_heading': 'Virus Diseases'},
  {'max_phase_for_ind': 4, 'mesh_heading': 'Condylomata Acuminata'},
  {'max_phase_for_ind': 4, 'mesh_heading': 'Carcinoma, Squamous Cell'}],
 'CHEMBL115': [{'max_phase_for_ind': 4, 'mesh_heading': 'Virus Diseases'}],
 'CHEMBL1259059': [{'max_phase_for_ind': 4, 'mesh_heading': 'Hepatitis C'},
  {'max_phase_for_ind': 4, 'mesh_heading': 'Hepatitis C, Chronic'},
  {'max_phase_for_ind': 4, 'mesh_heading': 'Virus Diseases'}],
 'CHEMBL853': [{'max_p

In [9]:
#convert dict to a dataframe

#step 1
#generate list for chemicals that have sub-dict
chem_list = []
for chem in phase4_drugs:
    for dis in phase4_drugs[chem]:
        chem_list.append(chem)

#step 2
#create df from sub-dict        
phase4drugs = pd.concat([pd.DataFrame(d) for d in phase4_drugs.values()], ignore_index=True)

#step 3
#append step 1 to step 2
phase4drugs['Drug'] = chem_list

phase4drugs.head(5)
phase4drugs.to_csv('data/export/phase4drugs.csv')

drugs_CT4 = list(phase4_drugs.keys())
drugs_CT4

['CHEMBL116',
 'CHEMBL3137312',
 'CHEMBL1643',
 'CHEMBL61',
 'CHEMBL115',
 'CHEMBL1259059',
 'CHEMBL853',
 'CHEMBL163',
 'CHEMBL1257073',
 'CHEMBL584',
 'CHEMBL152',
 'CHEMBL57']

In [10]:
query_graph = filter_graph(mpox_graph,drugs_CT4)
filter_se = []
for node in query_graph:
    if node.namespace == 'SideEffect' or node.namespace == 'Disease' or node.namespace== 'ChEMBLAssay':
        filter_se.append(node)




In [11]:
G = query_graph.copy()
G.remove_nodes_from([n for n in G if n in set(filter_se)])

In [12]:
to_jupyter(G)

<IPython.core.display.Javascript object>