In [1]:
!pip install rdkit -qqq

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m29.4/29.4 MB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [6]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/Colab Notebooks/MDRinhib-test

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Colab Notebooks/MDRinhib-test


In [7]:
import pandas as pd
import numpy as np
from rdkit import Chem

In [8]:
def unique_ligands(list_of_structures):
    '''
    This function takes a list of structures that was docked in different conformations
    (ie. the same structure occurs several times) and returns a unique list of structures 
    '''

    unique_list_prop = []
    unique_list = []

    for x in list_of_structures:
        test = x.GetProp("FDA drugnames") + x.GetProp('s_i_glide_gridfile')
        if test not in unique_list_prop:
            unique_list_prop.append(test)
            unique_list.append(x)
    return unique_list


In [9]:
# Read sdf file with ligands docked to protein
suppl = Chem.SDMolSupplier('All_proteins_total.sdf')

# Put each ligand into list
mols_docked = [x for x in suppl]
print("Total number of FDA drug conformations successfully docked: " + str(len(mols_docked)))

unique_mols_docked = unique_ligands(mols_docked)
print("Number of FDA drugs successfully docked: " + str(len(unique_mols_docked)))

mollist = [] # list of uniquely docked mol objects for featurization

# Creates a list of lists with information on
# Transporter name and site
# Molecule name
# Docking score
for mol in unique_mols_docked:
    mollist.append([mol.GetProp('s_i_glide_gridfile'), mol.GetProp('FDA drugnames'), float(mol.GetProp('r_i_glide_gscore')), mol])

df = pd.DataFrame.from_records(mollist)
df = df.pivot(index=1, columns=0, values=[2,3])

receptors = []

# The following code fetches receptor names and lists them to be applied as columns in the dataFrame
for rec in df.keys():
    receptors.append(rec[1][11:15])

no_receptors = int(len(receptors)/2)
receptor_scores = []

for i in range(len(receptors)):
    if i >= no_receptors:
        receptors[i] = receptors[i] + "_mol"
    else:
        receptor_scores.append(receptors[i])

df.columns = receptors

# Rounds numbers in the dataFrame to 1 decimal
for rec in receptor_scores:
    df[rec] = pd.to_numeric(df[rec]).round(1)

# Replaces Nan values with 0
df = df.fillna(0)

Total number of FDA drug conformations successfully docked: 30861
Number of FDA drugs successfully docked: 10151


Raw data looks like this

In [10]:
mollist

[['glide-grid_MRP1_site_1',
  'DEPINAR; TANNIC ACID',
  -24.1806681857067,
  <rdkit.Chem.rdchem.Mol at 0x7f4edcd7ecf0>],
 ['glide-grid_MRP1_site_1',
  'ACARBOSE; ACARBOSE',
  -16.2161720456222,
  <rdkit.Chem.rdchem.Mol at 0x7f4edc8b89e0>],
 ['glide-grid_MRP1_site_1',
  'CRYSTODIGIN; DIGITOXIN',
  -11.7911479268545,
  <rdkit.Chem.rdchem.Mol at 0x7f4edc8b8c10>],
 ['glide-grid_MRP1_site_1',
  'DIGOXIN; DIGOXIN',
  -11.5762724541396,
  <rdkit.Chem.rdchem.Mol at 0x7f4edc8b8cf0>],
 ['glide-grid_MRP1_site_1',
  'OXYTOCIN 10 USP UNITS IN DEXTROSE 5%; OXYTOCIN',
  -11.3585063744803,
  <rdkit.Chem.rdchem.Mol at 0x7f4edc8b8d60>],
 ['glide-grid_MRP1_site_1',
  'VASOSTRICT; VASOPRESSIN',
  -11.1024767035676,
  <rdkit.Chem.rdchem.Mol at 0x7f4edc8b8dd0>],
 ['glide-grid_MRP1_site_1',
  'ABRAXANE; PACLITAXEL',
  -10.6113206699514,
  <rdkit.Chem.rdchem.Mol at 0x7f4edc8b8f90>],
 ['glide-grid_MRP1_site_1',
  'ERAXIS; ANIDULAFUNGIN',
  -10.5382027808328,
  <rdkit.Chem.rdchem.Mol at 0x7f4edc83e040>],
 ['gli

And this is how the DataFrame that I will work with look like.

The first 11 columns represents a docking score (computed dG) for 11 ABC-transporters. The last 11 columns represent the 3D representations of the molecules corresponding to how it's 'state' or 'bend' when it was docked.

It is likely that the 3D information is not used depending on what featurizer and model is implemented.


Shortly about the project
---

---

Multidrug resistance is a problem when treating patients with cytostatics for some types of cancer. This is especially prevalent when prolonged therapy is required. One proposed mechanism for this is upregulation of promiscuous efflux transporters of the family ABC*.

Through the last few decades three generations of antagonizing ligands for the identified receptors has been developed, but none have had clinical success.

Several issues has halted development in the area, but new technologies might pave way for opportunities to succeed.


---


*The ABC-gene family translate into different names for the individual transporter names (BCRP, MDR1, MRP1, etc...)

In [12]:
df

Unnamed: 0_level_0,BCRP,MDR1,MRP1,MRP2,MRP3,MRP4,MRP5,MRP6,MRP7,MRP8,...,MDR1_mol,MRP1_mol,MRP2_mol,MRP3_mol,MRP4_mol,MRP5_mol,MRP6_mol,MRP7_mol,MRP8_mol,MRP9_mol
1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8-HOUR BAYER; ASPIRIN,-5.3,-6.4,-5.1,-4.9,-4.1,-5.1,-9.3,-4.1,-3.5,-4.2,...,<rdkit.Chem.rdchem.Mol object at 0x7f4edc59f740>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc8677b0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc8267b0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc792120>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc753430>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc77c7b0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc6e66d0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc648660>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc60cc80>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc63ac80>
8-MOP; METHOXSALEN,-8.0,-6.4,-4.8,-3.9,-4.7,-4.7,-3.8,-3.9,-4.7,-5.7,...,<rdkit.Chem.rdchem.Mol object at 0x7f4edc5a0120>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc86e7b0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc83df20>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc785e40>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc75a200>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc735120>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc6ee890>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc69fd60>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc674040>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc5f0b30>
A-POXIDE; CHLORDIAZEPOXIDE HYDROCHLORIDE,-10.1,-10.7,-7.9,-6.9,-7.9,-6.9,-8.5,-6.2,-4.4,-9.7,...,<rdkit.Chem.rdchem.Mol object at 0x7f4edc57f9e0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc850120>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc8224a0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc7db970>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc7b0e40>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc709900>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc6dce40>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc6b3580>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc665430>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc635f20>
ABACAVIR AND LAMIVUDINE; LAMIVUDINE,-7.1,-6.3,-5.9,-6.7,-5.0,-5.0,-7.4,-5.6,-5.2,-4.6,...,<rdkit.Chem.rdchem.Mol object at 0x7f4edc5a0900>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc856c80>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc816f20>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc780ba0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc755970>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc707e40>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc6cf820>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc6a6510>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc605b30>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc63a350>
"ABACAVIR SULFATE, LAMIVUDINE AND ZIDOVUDINE; ZIDOVUDINE",-6.8,-7.6,-5.5,-4.6,-5.3,-5.1,-6.6,-4.4,-5.0,-3.8,...,<rdkit.Chem.rdchem.Mol object at 0x7f4edc588820>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc85e6d0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc82d270>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc7fcdd0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc7534a0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc70d270>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc6e0740>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc6a93c0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc6129e0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc5da820>
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZONTIVITY; VORAPAXAR SULFATE,-10.1,-8.6,-5.2,-5.8,-5.9,-6.9,-7.5,-3.2,-3.9,-5.8,...,<rdkit.Chem.rdchem.Mol object at 0x7f4edc585ac0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc865820>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc81c970>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc7efc80>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc7b1a50>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc7072e0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc6facf0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc6413c0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc6727b0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc5cb740>
ZYDELIG; IDELALISIB,-9.3,-8.5,-6.4,-5.2,-6.1,-8.2,-7.2,-6.0,-5.4,-6.3,...,<rdkit.Chem.rdchem.Mol object at 0x7f4edc586970>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc84fb30>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc823350>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc7ed970>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc7af350>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc701270>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc6cd350>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc6a2c10>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc66d970>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc5c9dd0>
ZYFLO CR; ZILEUTON,-7.4,-7.6,-5.7,-7.1,-6.6,-5.9,-6.3,-5.5,-5.3,-5.6,...,<rdkit.Chem.rdchem.Mol object at 0x7f4edc5899e0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc85a900>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc8285f0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc7e8270>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc744c80>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc712120>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc6d0ac0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc6a50b0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc675740>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc63a510>
ZYKADIA; CERITINIB,-6.9,-10.0,-6.2,-6.7,-7.0,-7.8,-5.3,-5.7,-5.2,-7.3,...,<rdkit.Chem.rdchem.Mol object at 0x7f4edc580f90>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc851660>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc816eb0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc7e55f0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc7b2cf0>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc723040>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc6cf270>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc6a5430>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc667820>,<rdkit.Chem.rdchem.Mol object at 0x7f4edc5d0660>
