## Install and import libraries

In [10]:
!pip install -q pubchempy
!pip install -q rdkit
!pip install -q chembl_webresource_client
!pip install -q rcsb-api
!pip install -q deepchem
!pip install -q dockstring
!pip install -q openbabel-wheel

In [1]:
!git clone https://github.com/MauricioCafiero/MoDrAg.git

Cloning into 'MoDrAg'...
remote: Enumerating objects: 474, done.[K
remote: Counting objects: 100% (247/247), done.[K
remote: Compressing objects: 100% (184/184), done.[K
remote: Total 474 (delta 156), reused 138 (delta 57), pack-reused 227 (from 1)[K
Receiving objects: 100% (474/474), 44.73 MiB | 27.36 MiB/s, done.
Resolving deltas: 100% (267/267), done.


In [2]:
import torch
import os, re, sys
import gradio as gr
import numpy as np

sys.path.append('MoDrAg/SemanticMoDrAg/code')
from modrag_molecule_functions import *
from modrag_property_functions import *
from modrag_protein_functions import *
from modrag_task_graphs import *

sys.path.append('MoDrAg/SemanticMoDrAg/testing')
from tool_tests import *

device = "cuda" if torch.cuda.is_available() else "cpu"

Instructions for updating:
experimental_relax_shapes is deprecated, use reduce_retracing instead


## Tool tests

In [3]:
run_tests()


Testing name_node with prompt: [['CCO', 'c1ccccc1']]
name tool
CCO ethanol
c1ccccc1 benzene
✓ name_node: All 2 items matched

Testing smiles_node with prompt: [['aspirin', 'caffeine']]
smiles tool
✓ smiles_node: All 2 items matched

Testing related_node with prompt: [['CCO']]
related tool
got related molecules with smiles
ethanol
✓ related_node: Matched 5/5 items

Testing structure_node with prompt: [['CCO', 'c1ccccc1']]
structure tool
✓ structure_node: Returned 2 items (expected >= 2)

Testing substitution_node with prompt: [['c1cc(O)ccc1']]
substitution tool
✓ substitution_node: Matched 3/3 items

Testing lipinski_node with prompt: [['CCO', 'c1ccccc1']]
lipinski tool
✓ lipinski_node: Matched 2/2 items

Testing pharmfeature_node with prompt: ['CCO', ['c1ccccc1', 'CC(=O)Oc1ccccc1C(=O)O']]
pharmfeature tool


[14:59:47] unsupported number of radical electrons 4


✓ pharmfeature_node: All 2 items matched

Testing uniprot_node with prompt: [['DNA gyrase'], False]
UNIPROT tool
✓ uniprot_node: Matched 5/5 items

Testing listbioactives_node with prompt: [['P27338']]
List bioactives tool
       organism                            pref_name target_chembl_id  \
0  Homo sapiens  Amine oxidase [flavin-containing] B       CHEMBL2039   
1  Homo sapiens  Amine oxidase [flavin-containing] B       CHEMBL2039   
2  Homo sapiens                    Monoamine oxidase    CHEMBL2095205   

      target_type  
0  SINGLE PROTEIN  
1  SINGLE PROTEIN  
2  PROTEIN FAMILY  
Found info for Uniprot ID: P27338
Found 2 unique ChEMBL IDs
✓ listbioactives_node: Matched 2/2 items

Testing getbioactives_node with prompt: [['CHEMBL2039']]
Get bioactives tool
Found CHEMBL2039_bioactives.csv
number of records: 5510
✓ getbioactives_node: Matched 50/50 items

Testing predict_node with prompt: [['[NH3+]CCc1ccc(O)cc1'], 'CHEMBL2039']
Predict Tool
Number of molecules: 2000
Row 634 has a



score for training set: 0.928
score for validation set: 0.545
in predict node, smiles: [NH3+]CCc1ccc(O)cc1
Predicted IC50 for [NH3+]CCc1ccc(O)cc1: 10995.537239000963
✓ predict_node: All 1 items matched

Testing pdb_node with prompt: [['2A3R']]
pdb toolS
A3P
LDP
Blank line
Chain A: MELIQDTSRPPLEYVKGVPLIKYFAEALGPLQSFQARPDDLLINTYPKSGTTWVSQILDMIYQGGDLEKCNRAPIYVRVPFLEVNDPGEPSGLETLKDTPPPRLIKSHLPLALLPQTLLDQKVKVVYVARNPKDVAVSYYHFHRMEKAHPEPGTWDSFLEKFMAGEVSYGSWYQHVQEWWELSRTHPVLYLFYEDMKENPKREIQKILEFVGRSLPEETMDFMVQHTSFKEMKKNPMTNYTTVPQELMDHSISPFMRKGMAGDWKTTFTVAQNERFDADYAEKMAGCSLSFRSEL
Chain B: MELIQDTSRPPLEYVKGVPLIKYFAEALGPLQSFQARPDDLLINTYPKSGTTWVSQILDMIYQGGDLEKCNRAPIYVRVPFLEVNDPGEPSGLETLKDTPPPRLIKSHLPLALLPQTLLDQKVKVVYVARNPKDVAVSYYHFHRMEKAHPEPGTWDSFLEKFMAGEVSYGSWYQHVQEWWELSRTHPVLYLFYEDMKENPKREIQKILEFVGRSLPEETMDFMVQHTSFKEMKKNPMTNYTTVPQELMDHSISPFMRKGMAGDWKTTFTVAQNERFDADYAEKMAGCSLSFRSEL
✗ pdb_node: Only matched 1/2 items

Testing find_node with prompt: [['DNA gyrase']]
PDB search tool
✓ find_node: Matc



score for training set: 0.928
score for validation set: 0.545
in predict node, smiles: [NH3+]CCc1ccc(O)cc1
Predicted IC50 for [NH3+]CCc1ccc(O)cc1: 10995.537239000963
✓ get_predictions_for_protein: All 1 items matched

Testing dock_from_names with prompt: [['aspirin', 'caffeine'], 'DRD2']
smiles tool
docking tool
Number of CPUs: 8
query_protein: DRD2
Docking molecule with 8 cpu cores.
Docking score: -6.4
Docking molecule with 8 cpu cores.
Docking score: -5.6
✓ dock_from_names: All 2 items matched

TEST SUMMARY
Passed: 17/18 (94.4%)

✓ PASS: name_node
✓ PASS: smiles_node
✓ PASS: related_node
✓ PASS: structure_node
✓ PASS: substitution_node
✓ PASS: lipinski_node
✓ PASS: pharmfeature_node
✓ PASS: uniprot_node
✓ PASS: listbioactives_node
✓ PASS: getbioactives_node
✓ PASS: predict_node
✗ FAIL: pdb_node
✓ PASS: find_node
✓ PASS: docking_node
✓ PASS: target_node
✓ PASS: get_actives_for_protein
✓ PASS: get_predictions_for_protein
✓ PASS: dock_from_names


### reload

In [5]:
import importlib, sys


importlib.reload(sys.modules['modrag_molecule_functions'])
from modrag_molecule_functions import *

importlib.reload(sys.modules['modrag_property_functions'])
from modrag_property_functions import *

importlib.reload(sys.modules['modrag_protein_functions'])
from modrag_protein_functions import *

importlib.reload(sys.modules['modrag_task_graphs'])
from modrag_task_graphs import *

importlib.reload(sys.modules['tool_tests'])
from tool_tests import *