In [None]:
from oddt.scoring.descriptors import oddt_vina_descriptor
from oddt.toolkits import rdk, ob
from oddt.docking import autodock_vina
import json
import numpy as np
from joblib import delayed, Parallel
from functools import partial
from sklearn.ensemble import RandomForestRegressor

from scipy import stats

from matplotlib import pyplot as plt
import json
import pandas as pd

class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)

PDBBIND_DIR = '/home/fergus/pdbbind/v2019'
VINA = '/home/fergus/autodock_vina_1_1_2_linux_x86/bin/vina'

In [None]:
T = 298
R = 0.001987# kcal/mol
def dg_to_pk(dg):
    pk = -np.log10(np.exp(dg/(R*T)))
    return pk

def dg_to_pk1(dg):
    pk = -dg / (2.3003 * R * T)
    return pk

In [None]:
with open('../data/pdbbind_training_set.txt') as f:
    train_pdbs = [l.strip() for l in f]
    
with open('../data/pdbbind_test_set.txt') as f:
    test_pdbs = [l.strip() for l in f]
    
pdbs = train_pdbs + test_pdbs

In [None]:
@delayed
def build(pdb):
    vina = oddt_vina_descriptor()
    protein = next(ob.readfile('pdb', f'{PDBBIND_DIR}/{pdb}/{pdb}_protein.pdb'))
    protein.protein = True
    # crystal
    ligand = next(ob.readfile('sdf', f'{PDBBIND_DIR}/{pdb}/{pdb}_ligand.sdf'))
    affinity_c = vina.build(ligand, protein)[0][0]
    # docked        
    poses = [mol for mol in ob.readfile('sdf', f'../pdbbind_docked_poses/{pdb}/{pdb}_ligand_docked.sdf')]
    affinities_d = []
    for pose in poses:
        affinities_d.append(vina.build(pose, protein)[0][0])
    # minimized
    ligand = next(ob.readfile('sdf', f'../pdbbind_docked_poses/{pdb}/{pdb}_ligand_minimized.sdf'))
    affinity_m = vina.build(ligand, protein)[0][0]
    return (affinity_c, affinities_d, affinity_m)

In [None]:
with Parallel(n_jobs=8, verbose=10) as parallel:
    results = parallel(build(pdb) for pdb in pdbs)
vina_crystal = {pdb: result[0] for pdb, result in zip(pdbs, results)}
vina_docked = {pdb: result[1] for pdb, result in zip(pdbs, results)}
vina_minimized = {pdb: result[2] for pdb, result in zip(pdbs, results)}

In [None]:
vina_crystal_pk = pd.Series({pdb: dg_to_pk(vina_crystal[pdb]) for pdb in vina_crystal})
vina_docked_pk = pd.Series({pdb: dg_to_pk(vina_docked[pdb][0]) for pdb in vina_crystal})
vina_docked_max_pk = pd.Series({pdb: np.max([dg_to_pk(dg) for dg in vina_docked[pdb]]) for pdb in vina_crystal})
vina_docked_mean_pk = pd.Series({pdb: np.mean([dg_to_pk(dg) for dg in vina_docked[pdb]]) for pdb in vina_crystal})
vina_minimized_pk = pd.Series({pdb: dg_to_pk(vina_minimized[pdb]) for pdb in vina_crystal})

vina_crystal_pk.to_csv('../results/vina_crystal_predicted_pk.csv')
vina_docked_pk.to_csv('../results/vina_docked_predicted_pk.csv')
vina_docked_max_pk.to_csv('../results/vina_docked_max_predicted_pk.csv')
vina_docked_mean_pk.to_csv('../results/vina_docked_mean_predicted_pk.csv')
vina_minimized_pk.to_csv('../results/vina_minimized_predicted_pk.csv')

In [None]:
plt.hist(vina_docked_pk)

In [None]:
plt.hist(vina_docked_max_pk)

In [None]:
plt.hist(vina_docked_pk)

In [None]:
pdbbind_test_set_affinity = pd.read_csv('../data/pdbbind_test_set_binding_affinity.csv', index_col=0, header=None, squeeze=True)

In [None]:
dude_vina_scores = {}
targets = ['AKT1', 'CP3A4', 'GCR', 'HIVPR', 'HIVRT', 'KIF11']
for target in targets:
    vina_scores = {}
    with open(f'../data/{target}_KI_docked_features.json') as f:
        feats = json.load(f)
    for key, vals in feats.items():
        # take top pose
        if key.endswith('_1'):
            label = key.split('_')[0]
            vina_affinity = vals['vina_affinity']
            vina_scores[label] = dg_to_pk(vina_affinity)
    dude_vina_scores[target] = vina_scores

In [None]:
import numpy as np
class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)
    
import json

with open('../results/dude_vina_scores.json', 'w') as f:
    json.dump(dude_vina_scores, f, cls=NumpyEncoder)