### Yを予測するための効率的なトレーニングデータの抽出検討

In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import pubchempy
import random
 
from ord_schema import message_helpers, validations
from ord_schema.proto import dataset_pb2
from rdkit import rdBase, Chem, DataStructs
from rdkit.Avalon import pyAvalonTools
from rdkit.Chem import AllChem, Draw, rdMHFPFingerprint
from rdkit.Chem.Fingerprints import FingerprintMols
from rdkit.Chem.AtomPairs import Pairs, Torsions

In [43]:
#Buchwald-Hartwig Reaction datasetの読み込み
pb = "./practice/ord_dataset-00005539a1e04c809a9a78647bea649c.pb.gz"  
data = message_helpers.load_message(pb, dataset_pb2.Dataset) #生データであるjson型式のデータファイルとして読み取っている?
df = message_helpers.messages_to_dataframe(data.reactions, \
                                           drop_constant_columns=False) #jsonからdataframeへ変換

#元dfからA+B→Yとなる部分だけ抜き出してDataFrameにする
df = df[[ \
    'inputs["aryl halide"].components[0].identifiers[0].value', \
    'inputs["amine"].components[0].identifiers[0].value', \
    'outcomes[0].products[0].identifiers[0].value' \
    ]] 

df.columns = list('ABY') #列ラベルをわかりやすく A,B,Yに変換
df_buchwald_hardwig_smiles = df.drop_duplicates()\
                                .reset_index()\
                                .drop(columns=["index"])

#SMILESからMOLオブジェクトを生成する関数を定義
def generate_mol(smiles):
    try:
        mol = Chem.MolFromSmiles(smiles)
        return mol
    except:
        return None

# dfのすべてのA,B 要素に対してMolオブジェクトを生成する
df_mol_ABY = df_buchwald_hardwig_smiles.applymap(lambda smiles: generate_mol(smiles))\
                                        .rename(columns={"A": "mol_A", "B": "mol_B", "Y": "mol_Y"})

df_smiles_mol = pd.concat([df_buchwald_hardwig_smiles, df_mol_ABY], axis=1)
df_smiles_mol


Unnamed: 0,A,B,Y,mol_A,mol_B,mol_Y
0,CCOC1=C(C=C2C(=C1)N=CC(=C2NC3=C(C=C(C=C3)F)F)C...,CC(C)N1CCNCC1,CCOC1=C(C=C2C(=C1)N=CC(=C2NC3=C(C=C(C=C3)F)F)C...,<rdkit.Chem.rdchem.Mol object at 0x160819cb0>,<rdkit.Chem.rdchem.Mol object at 0x161e7e180>,<rdkit.Chem.rdchem.Mol object at 0x161c559a0>
1,C1=CC=C(C=C1)I,CN1C=NC2=C1C=C(C(=C2F)N)C(=O)OC,CN1C=NC2=C1C=C(C(=C2F)NC3=CC=CC=C3)C(=O)OC,<rdkit.Chem.rdchem.Mol object at 0x1608198c0>,<rdkit.Chem.rdchem.Mol object at 0x161e7e110>,<rdkit.Chem.rdchem.Mol object at 0x161c56180>
2,CC1=NC(=C(C=C1)OC2=CC(=NC=C2)Cl)C,C1=CC(=CC=C1N)S(=O)(=O)N,CC1=NC(=C(C=C1)OC2=CC(=NC=C2)NC3=CC=C(C=C3)S(=...,<rdkit.Chem.rdchem.Mol object at 0x16081a260>,<rdkit.Chem.rdchem.Mol object at 0x161e7f220>,<rdkit.Chem.rdchem.Mol object at 0x161c55fc0>
3,C1=C(C=NC=C1Br)Br,CC(=O)N1CCNCC1,CC(=O)N1CCN(CC1)C2=CC(=CN=C2)Br,<rdkit.Chem.rdchem.Mol object at 0x16081a6c0>,<rdkit.Chem.rdchem.Mol object at 0x161e7e0a0>,<rdkit.Chem.rdchem.Mol object at 0x161c54900>
4,CN1CC(OC2=C(C1)C=CC(=N2)Cl)C3=CC=CC=C3,C1=CC(=CC=C1N)N2C=CN=C2,CN1CC(OC2=C(C1)C=CC(=N2)NC3=CC=C(C=C3)N4C=CN=C...,<rdkit.Chem.rdchem.Mol object at 0x16081a420>,<rdkit.Chem.rdchem.Mol object at 0x161e7fae0>,<rdkit.Chem.rdchem.Mol object at 0x161c554d0>
...,...,...,...,...,...,...
468,C1=CC=C(C=C1)C(C2=CC=CC=C2)(C3=CC=CC=C3)N4C=NC...,C1CNCCN1,C1CN(CCN1)C2=CC=CC3=C2N=CN3C(C4=CC=CC=C4)(C5=C...,<rdkit.Chem.rdchem.Mol object at 0x161e7daf0>,<rdkit.Chem.rdchem.Mol object at 0x161c55d90>,<rdkit.Chem.rdchem.Mol object at 0x161cfba70>
469,C1=CC(=C(C=C1C(F)(F)F)Br)F,CC(C)(C)OC(=O)N1CCNCC1,CC(C)(C)OC(=O)N1CCN(CC1)C2=C(C=CC(=C2)C(F)(F)F)F,<rdkit.Chem.rdchem.Mol object at 0x161e7ca50>,<rdkit.Chem.rdchem.Mol object at 0x161c552a0>,<rdkit.Chem.rdchem.Mol object at 0x161cfb990>
470,C1=CN=C(C=C1C(F)(F)F)Cl,CC(C)(C)OC(=O)N1CCNCC1,CC(C)(C)OC(=O)N1CCN(CC1)C2=NC=CC(=C2)C(F)(F)F,<rdkit.Chem.rdchem.Mol object at 0x161e7fb50>,<rdkit.Chem.rdchem.Mol object at 0x161c56030>,<rdkit.Chem.rdchem.Mol object at 0x161cf84a0>
471,C1=CC2=C(C=CC(=C2N=C1)OS(=O)(=O)C(F)(F)F)Cl,CC(C)(C)OC(=O)N1CCNCC1,CC(C)(C)OC(=O)N1CCN(CC1)C2=C3C(=C(C=C2)Cl)C=CC=N3,<rdkit.Chem.rdchem.Mol object at 0x161e7e490>,<rdkit.Chem.rdchem.Mol object at 0x161c575a0>,<rdkit.Chem.rdchem.Mol object at 0x161cf9000>


## Buchwald-Hartwig Reaction データセット の FingerPrint　と　類似性

タニモト係数

Tanimoto Coefficient = c / (a + b - c)

・aは分子Aのビット配列で1が立っている数

・bは分子Bのビット配列で1が立っている数

・cは分子AとBで共通に1が立っている数

In [27]:
# 以下の化合物をテストデータとしてデータセットの各化合物とタニモト係数を計算する
#reactant_A = "CC1=NN(C=C1NC2=NC=C(C(=C2)I)C(F)(F)F)C"
#reactant_B = "CONC(=O)C1=CC=CC=C1N"

#mol_A = random.choice(sr_mol_A)
#mol_B = random.choice(sr_mol_B)

#Fingerprintから対象のテスト分子に対するタニモト係数を計算する関数を定義 
def calc_fps_tanimoto(test_fps, fps):
    fps_tnmt_coeffi = DataStructs.TanimotoSimilarity(test_fps, fps)
    return fps_tnmt_coeffi

1. MACCS Keys

    AllChem.GetMACCSKeysFingerprint(mol)

    166の部分構造について部分構造を有する場合は1が無い場合は0が格納される

In [104]:
#MACCAS Keys Fingerprintを生成する関数を定義 
def generate_maccs_fps(mol):
    maccs_fps = AllChem.GetMACCSKeysFingerprint(mol)
    return maccs_fps

#df_molの各要素に対して maccs_fps を生成
df_maccs_fps_ABY = df_mol_ABY.applymap(lambda mol: generate_maccs_fps(mol))\
                                        .rename(columns={"mol_A": "maccs_fps_A", "mol_B": "maccs_fps_B", "mol_Y": "maccs_fps_Y"})
df_smiles_maccs_fps = pd.concat([df_buchwald_hardwig_smiles, df_maccs_fps_ABY], axis=1)
df_smiles_maccs_fps.head()

Unnamed: 0,A,B,Y,maccs_fps_A,maccs_fps_B,maccs_fps_Y
0,CCOC1=C(C=C2C(=C1)N=CC(=C2NC3=C(C=C(C=C3)F)F)C...,CC(C)N1CCNCC1,CCOC1=C(C=C2C(=C1)N=CC(=C2NC3=C(C=C(C=C3)F)F)C...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,C1=CC=C(C=C1)I,CN1C=NC2=C1C=C(C(=C2F)N)C(=O)OC,CN1C=NC2=C1C=C(C(=C2F)NC3=CC=CC=C3)C(=O)OC,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,CC1=NC(=C(C=C1)OC2=CC(=NC=C2)Cl)C,C1=CC(=CC=C1N)S(=O)(=O)N,CC1=NC(=C(C=C1)OC2=CC(=NC=C2)NC3=CC=C(C=C3)S(=...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,C1=C(C=NC=C1Br)Br,CC(=O)N1CCNCC1,CC(=O)N1CCN(CC1)C2=CC(=CN=C2)Br,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,CN1CC(OC2=C(C1)C=CC(=N2)Cl)C3=CC=CC=C3,C1=CC(=CC=C1N)N2C=CN=C2,CN1CC(OC2=C(C1)C=CC(=N2)NC3=CC=C(C=C3)N4C=CN=C...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [143]:
#テスト分子ABのfpsをランダムに抜き出す
test_data_row = df_smiles_maccs_fps.iloc[45, :]
test_A_maccs_fps = test_data_row["maccs_fps_A"]
test_B_maccs_fps = test_data_row["maccs_fps_B"]
test_Y_maccs_fps = test_data_row["maccs_fps_Y"]
li_test_AB = []

df_smiles_maccs_fps_copy = df_smiles_maccs_fps.copy()
#df_maccs_fps の各要素に対して タニモト係数 を生成
df_smiles_maccs_fps_copy["tnmt_A"] = df_maccs_fps_ABY["maccs_fps_A"].apply(lambda maccs_fps: calc_fps_tanimoto(test_A_maccs_fps, maccs_fps))
df_smiles_maccs_fps_copy["tnmt_B"] = df_maccs_fps_ABY["maccs_fps_B"].apply(lambda maccs_fps: calc_fps_tanimoto(test_B_maccs_fps, maccs_fps))
df_smiles_maccs_fps_copy["tnmt_Y"] = df_maccs_fps_ABY["maccs_fps_Y"].apply(lambda maccs_fps: calc_fps_tanimoto(test_Y_maccs_fps, maccs_fps))
df_smiles_maccs_fps_tnmt = df_smiles_maccs_fps_copy

#df = df_smiles_maccs_fps_tnmt
#df_test_AB_row = df.loc[(df["maccs_fps_A"] == test_A_maccs_fps) \
                                         # & (df["maccs_fps_B"] == test_B_maccs_fps)]

df_smiles_maccs_fps_tnmt.head()


Unnamed: 0,A,B,Y,maccs_fps_A,maccs_fps_B,maccs_fps_Y,tnmt_A,tnmt_B,tnmt_Y
0,CCOC1=C(C=C2C(=C1)N=CC(=C2NC3=C(C=C(C=C3)F)F)C...,CC(C)N1CCNCC1,CCOC1=C(C=C2C(=C1)N=CC(=C2NC3=C(C=C(C=C3)F)F)C...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.212766,0.717949,0.529412
1,C1=CC=C(C=C1)I,CN1C=NC2=C1C=C(C(=C2F)N)C(=O)OC,CN1C=NC2=C1C=C(C(=C2F)NC3=CC=CC=C3)C(=O)OC,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.461538,0.327586,0.473684
2,CC1=NC(=C(C=C1)OC2=CC(=NC=C2)Cl)C,C1=CC(=CC=C1N)S(=O)(=O)N,CC1=NC(=C(C=C1)OC2=CC(=NC=C2)NC3=CC=C(C=C3)S(=...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.3125,0.126984,0.190476
3,C1=C(C=NC=C1Br)Br,CC(=O)N1CCNCC1,CC(=O)N1CCN(CC1)C2=CC(=CN=C2)Br,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.5,0.590909,0.596154
4,CN1CC(OC2=C(C1)C=CC(=N2)Cl)C3=CC=CC=C3,C1=CC(=CC=C1N)N2C=CN=C2,CN1CC(OC2=C(C1)C=CC(=N2)NC3=CC=C(C=C3)N4C=CN=C...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.183673,0.340909,0.421875


In [None]:
#残骸
sr_mol_A = df_buchwald_hardwig_smiles["A"].apply(lambda smiles: generate_mol(smiles))
sr_mol_B = df_buchwald_hardwig_smiles["B"].apply(lambda smiles: generate_mol(smiles))

#sr_molの各要素に対して maccs_fps を生成
sr_maccs_fps_A = sr_mol_A.apply(lambda mol: generate_maccs_fps(mol))
sr_maccs_fps_B = sr_mol_B.apply(lambda mol: generate_maccs_fps(mol))

#sr_maccs_fpsの各要素に対して タニモト係数 を計算
sr_maccs_fps_tnmt_A = sr_maccs_fps_A.apply(lambda fps: calc_fps_tanimoto(test_A_maccs_fps, fps) \
                                           if fps != test_A_maccs_fps else None).dropna()
sr_maccs_fps_tnmt_B = sr_maccs_fps_B.apply(lambda fps: calc_fps_tanimoto(test_B_maccs_fps, fps)  \
                                           if fps != test_B_maccs_fps else None).dropna()

#print(sr_mol_A.describe())
#print(sr_mol_B.describe())


#buchwald_hardwig_smiles_df.insert(0, ":", ":")
#buchwald_hardwig_smiles_df.insert(2, "+", "+")
#buchwald_hardwig_smiles_df.insert(4, "→", "→")
#buchwald_hardwig_smiles_df.insert(6, "\", "\")
#buchwald_hardwig_smiles_df

2. Topologicalフィンガープリント (RDKitフィンガープリント)

    Chem.RDKFingerprint(mol)

    一定の結合数に相当する原子と結合種類を格納(?)

In [29]:
#Topological(Rdkit) Fingerprintを生成する関数を定義 
def generate_rdkit_fps(mol):
    rdkit_fps = Chem.RDKFingerprint(mol)
    return rdkit_fps

#df_molの各要素に対して rdkit_fps を生成
sr_rdkit_fps_A = sr_mol_A.apply(lambda mol: generate_rdkit_fps(mol))
sr_rdkit_fps_B = sr_mol_B.apply(lambda mol: generate_rdkit_fps(mol))
#rdkit_fps_df

#テスト分子ABのfpsをランダムに抜き出す
test_A_rdkit_fps = random.choice(sr_rdkit_fps_A)
test_B_rdkit_fps = random.choice(sr_rdkit_fps_B)

#sr_rdkit_fpsの各要素に対して タニモト係数 を計算
sr_rdkit_fps_tnmt_A = sr_rdkit_fps_A.apply(lambda fps: calc_fps_tanimoto(test_A_rdkit_fps, fps) \
                                           if fps != test_A_rdkit_fps else None).dropna()
sr_rdkit_fps_tnmt_B = sr_rdkit_fps_B.apply(lambda fps: calc_fps_tanimoto(test_B_rdkit_fps, fps) \
                                           if fps != test_B_rdkit_fps else None).dropna()

print(sr_rdkit_fps_tnmt_A.describe()) 
print(sr_rdkit_fps_tnmt_B.describe())

count    464.000000
mean       0.206330
std        0.166590
min        0.028202
25%        0.102653
50%        0.155676
75%        0.262780
max        0.952273
Name: A, dtype: float64
count    471.000000
mean       0.061096
std        0.055793
min        0.000000
25%        0.019829
50%        0.051607
75%        0.087338
max        0.427184
Name: B, dtype: float64


3. Morganフィンガープリント (Circularフィンガープリント)

    AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits)

    基準となる原子からある距離にある部分構造を数え上げていく

In [147]:
#Morgan Fingerprintを生成する関数を定義 
def generate_morgan_fps(mol, radius, nBits):
    morgan_fps = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits)
    return morgan_fps

#df_molの各要素に対して maccs_fps を生成
sr_morgan_fps_A = sr_mol_A.apply(lambda mol: generate_morgan_fps(mol, 2, 2048))
sr_morgan_fps_B = sr_mol_B.apply(lambda mol: generate_morgan_fps(mol, 2, 2048))
#morgan_fps_df

#テスト分子ABのfpsをランダムに抜き出す
test_A_morgan_fps = random.choice(sr_morgan_fps_A)
test_B_morgan_fps = random.choice(sr_morgan_fps_B)

#sr_morgan_fpsの各要素に対して タニモト係数 を計
sr_morgan_fps_tnmt_A = sr_morgan_fps_A.apply(lambda fps: calc_fps_tanimoto(test_A_morgan_fps, fps) \
                                           if fps != test_A_morgan_fps else None).dropna()
sr_morgan_fps_tnmt_B = sr_morgan_fps_B.apply(lambda fps: calc_fps_tanimoto(test_B_morgan_fps, fps) \
                                           if fps != test_B_morgan_fps else None).dropna()

print(sr_morgan_fps_tnmt_A.describe()) 
print(sr_morgan_fps_tnmt_B.describe())

count    470.000000
mean       0.161393
std        0.090315
min        0.053571
25%        0.112903
50%        0.145161
75%        0.188679
max        0.775510
Name: A, dtype: float64
count    449.000000
mean       0.078788
std        0.103687
min        0.000000
25%        0.000000
50%        0.025000
75%        0.151515
max        0.416667
Name: B, dtype: float64


## プロンプトの改善

In [146]:
test_A_smiles = test_data_row.loc["A"]
test_B_smiles = test_data_row["B"]

print(test_A_smiles)
print(test_B_smiles)

C1=CC(=C(C=C1Cl)Br)C#N
CN(C)[C@H]1CCNC1


In [139]:
import openai
from context import training_dataset


openai.api_key = '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'

context = f"{training_dataset}\
    test data:\
    {test_A_smiles} + {test_B_smiles} = '?'\
    "

question = "Answer at least five candidates for '?'."

# Question-Answering
response = openai.Completion.create(
  engine="text-davinci-003",
  prompt=f"Question answering:\nContext: {context}\nQuestion: {question}",
  max_tokens=160
)

answer = response.choices[0].text.strip()
print(response)


IndexingError: Too many indexers

In [170]:
df = df_smiles_maccs_fps_tnmt
df_test_AB_row = df.loc[(df["A"] == test_A_smiles) \
                                       & (df["B"] == test_B_smiles)]
df_test_AB_row

test_Y_smiles = df_test_AB_row.loc[45, "Y"]
test_Y_maccs_fps2 = df_test_AB_row.loc[45, "maccs_fps_Y"]

text = response["choices"][0]["text"]
start_index = text.index("Candidates for '?' :") + len("Candidates for '?' :")
result = text[start_index:].split("\n")
values = [x.strip()[3:] for x in result if x.strip()]

df_product_Y_candidates = pd.DataFrame({"Y_candidates":values})

df_product_Y_candidates["Y_candidates_mol"] = df_product_Y_candidates["Y_candidates"].\
                                                apply(lambda smiles: generate_mol(smiles))

df_product_Y_candidates["Y_candidates_maccs_fps"] = df_product_Y_candidates["Y_candidates_mol"].\
                                                apply(lambda mol: AllChem.GetMACCSKeysFingerprint(mol))

df_product_Y_candidates["Y_candidates_tnmt"] = df_product_Y_candidates["Y_candidates_maccs_fps"].\
                                                apply(lambda maccs_fps: DataStructs.TanimotoSimilarity(test_Y_maccs_fps2, maccs_fps))
df_product_Y_candidates


Unnamed: 0,Y_candidates,Y_candidates_mol,Y_candidates_maccs_fps,Y_candidates_tnmt
0,CN1C2=C(C(=CC=C2)C#N)C(=O)CCC1,<rdkit.Chem.rdchem.Mol object at 0x1426f1070>,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.5
1,CN1CCC(C1)C2=C3C=C(C=CC3=NC=C2)C#N,<rdkit.Chem.rdchem.Mol object at 0x1426f10e0>,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.591837
2,CN1C2=C(C=CC2=C1)C(=O)CC#N,<rdkit.Chem.rdchem.Mol object at 0x1426f11c0>,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, ...",0.285714
3,CN1C2=C(CCC1)C(=O)C=C2C#N,<rdkit.Chem.rdchem.Mol object at 0x1426f3bc0>,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.438596
4,CN1C2=C(C=CC2=C1)NC(=O)CC#N,<rdkit.Chem.rdchem.Mol object at 0x1426f07b0>,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, ...",0.316667
