In [2]:
import sys, os
sys.path.insert(0, '../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from rdkit import Chem
from rdkit.Chem import AllChem


## Генерация и скоринг

In [3]:
from reward_function import score_smiles
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

MODEL = "seyonec/REINVENT-2.0-RNN-PubChemFingerprint-v2"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL)

out = []
BATCH=64; N=256
for _ in range(N//BATCH):
    toks = tokenizer(['']*BATCH, return_tensors='pt', padding=True)
    outs = model.generate(**toks, max_length=128)
    sms = tokenizer.batch_decode(outs, skip_special_tokens=True)
    for s in sms:
        out.append((s, score_smiles(s)))

gen_df = pd.DataFrame(out, columns=['smiles','score']).sort_values('score',ascending=False)
gen_df.to_csv('../results/generated.csv', index=False)
gen_df.head(10)


ModuleNotFoundError: No module named 'sascorer'

Collecting git+https://github.com/rdkit/sascorer.git
  Cloning https://github.com/rdkit/sascorer.git to /tmp/pip-req-build-0wi5vy1q
  Running command git clone --filter=blob:none --quiet https://github.com/rdkit/sascorer.git /tmp/pip-req-build-0wi5vy1q
Username for 'https://github.com': [31mERROR: Operation cancelled by user[0m[31m
[0m^C


## Докинг топ-50

In [None]:
import subprocess
from rdkit.Chem import AllChem

top50 = gen_df.head(50)
RECP = '../data/ache.pdbqt'
os.makedirs('../results/docking', exist_ok=True)

for i,(s,sc) in enumerate(top50.values):
    m = Chem.MolFromSmiles(s)
    mh = Chem.AddHs(m)
    AllChem.EmbedMolecule(mh,AllChem.ETKDG())
    pdb = f"../results/docking/lig{i}.pdb"
    Chem.MolToPDBFile(mh,pdb)
    pqt = pdb.replace('.pdb','.pdbqt')
    subprocess.run(['obabel',pdb,'-O',pqt,'--partialcharge','gasteiger'])
    outp = pqt.replace('.pdbqt','_out.pdbqt')
    subprocess.run([
        'vina','--receptor',RECP,'--ligand',pqt,
        '--center_x','10','--center_y','25','--center_z','15',
        '--size_x','20','--size_y','20','--size_z','20',
        '--out',outp
    ])
print("Docking done")
