In [7]:
import os
import shutil
import re

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import reinvent
from reinvent.notebooks import load_tb_data, plot_scalars, get_image, create_mol_grid
from reinvent.scoring.transforms import ReverseSigmoid
from reinvent.scoring.transforms.sigmoids import Parameters as SigmoidParameters

import ipywidgets as widgets

%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [8]:
# 配置采样模型的运行参数
sample_parameters = f"""
run_type = "sampling"
device = "cuda:0"
json_out_config = "_sampling.json"

[parameters]

model_file = "TL_reinvent.model.chkpt"
smiles_file = "stan_smi_1.smi"
sample_strategy = "beamsearch"

output_file = 'new_drug.csv'

num_smiles = 5
unique_molecules = true
randomize_smiles = true
"""

sampling_filename = "New_drug.toml"

with open(sampling_filename, "w") as tf:
    tf.write(sample_parameters)

In [9]:
# 推理生成分子
!reinvent -l sampling.log $sampling_filename

In [11]:
# 处理生成模型结果
## 经检查，部分生成分子与输入分子Tanimoto similarity为1的分子并不完全相同，故此处通过比较输入分子与生成分子的字符串是否完全相同来去除重复分子

### 读取查看前五个分子
df_results = pd.read_csv('new_drug.csv')
df_results.head(5)

Unnamed: 0,SMILES,Input_SMILES,Tanimoto,NLL
0,Brc1c(NC2=NCCN2)ccc2nccnc12,Brc1c(NC2=NCCN2)ccc2nccnc12,1.0,0.32
1,Clc1c(NC2=NCCN2)ccc2nccnc12,Brc1c(NC2=NCCN2)ccc2nccnc12,0.785714,1.46
2,Cc1c(NC2=NCCN2)ccc2nccnc12,Brc1c(NC2=NCCN2)ccc2nccnc12,1.0,5.54
3,Clc1cnc2ccc(NC3=NCCN3)cc2n1,Brc1c(NC2=NCCN2)ccc2nccnc12,0.785714,5.87
4,C#CC(O)(/C=C/Cl)CC,C#CC(O)(/C=C/Cl)CC,0.470588,0.0


In [12]:
# 去重
same_smile = list()
save_smiles = list()
for n in range(len(df_results)):
    if df_results['SMILES'][n] == df_results['Input_SMILES'][n]:
        same_smile.append(df_results['SMILES'][n])
    else:
        save_smiles.append(df_results['SMILES'][n])

In [13]:
len(same_smile) # 查看生成分子与输入分子相同的数量

500

In [14]:
# 评估医药分子的类医药性质
from rdkit.Chem import QED
from rdkit import Chem
from rdkit.Contrib.SA_Score import sascorer

write_list = list()
for smi in save_smiles:
    contents = dict()
    mol = Chem.MolFromSmiles(smi)
    contents['smiles'] = smi
    contents['QED_default'] = QED.default(mol)
    contents['QED_wights_max'] = QED.weights_max(mol)
    contents['QED_wights_mean'] = QED.weights_mean(mol)
    contents['QED_wights_none'] = QED.weights_none(mol)
    contents['SA_score'] = sascorer.calculateScore(mol)
    write_list.append(contents)

df = pd.DataFrame(write_list)
df.to_csv('results_properties.csv',index=None)

## 农药则用内部模型进行预测评估