In [1]:

%load_ext autoreload
%autoreload 2

In [45]:
data = QM9(r"../data/qm9/")

In [53]:
from rdkit import Chem

import pandas as pd
import numpy as np

path = "qm9_smiles.csv"

suppl = Chem.SmilesMolSupplier(path)

df = pd.read_csv(path)

df["target1"] = np.random.randn((len(df)))
df["target2"] = np.random.randn((len(df)))

df.set_index("smiles",inplace=True)

df.to_csv("qm9_target.csv")

## Testing SmilesDataset

In [7]:
from smiles_dataset import SmilesDataset
from smiles_lightning_data_module import SmilesDataModule
from lightning_model import LightningClassicGNN
import pytorch_lightning as pl
from torch_geometric.transforms import distance
from torch_geometric.loader import DataLoader
import os
import torch
# making sure we are as determinstic as possibe
torch.use_deterministic_algorithms(True)
import numpy as np

! rm -rf ../data/test/processed
! rm -rf ../data/test_hydrogen/processed


seed=42
## pytorch lighting takes of seeding everything
pl.seed_everything(seed=seed, workers=True)
# create from csv a pytorch dataset
dataset = SmilesDataset(root=r"../data/test", filename="qm9_target.csv", add_hydrogen=True, transform=distance.Distance())
# from torch dataset, create lightning data module to make sure training splits are always done the same ways
data_module = SmilesDataModule(dataset=dataset, seed=seed)


num_node_features = data_module.num_node_features
num_edge_features= data_module.num_edge_features

gnn_model = LightningClassicGNN(classification=False, output_dim=2, num_node_features=num_node_features, num_edge_features=num_edge_features)

num_epochs=1


# from pytorch_ligthing import loggers
# logger = loggers.WandbLogger()

# default root dir is where the logs and weights are logged
# useful when debugging is limit_train_batches
# by default uses TensorBoardLogger, can be configured 
# Plugins allows us to connect to arbitrary cluster
# can set max_epochs
# can use precision to specify number of bit floating points to reduce memory footprint ()
# can use accumulate_grad_batches to speed-up training too
trainer = pl.Trainer(deterministic=True, auto_lr_find=True, default_root_dir=os.getcwd(), precision="bf16", max_epochs=num_epochs)



Global seed set to 42
Processing...
100%|██████████| 100/100 [00:00<00:00, 105.91it/s]
Done!
Global seed set to 42
Using bfloat16 Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


In [8]:
# tune to find the learning rate
trainer.tune(gnn_model,datamodule=data_module)

  rank_zero_warn(


Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
LR finder stopped early after 2 steps due to diverging loss.
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.
Restoring states from the checkpoint path at /home/harold/epfl/chem-ml-repr/code/.lr_find_ef32d5bb-2f32-433c-adc9-58cebf7beeee.ckpt


{'lr_find': <pytorch_lightning.tuner.lr_finder._LRFinder at 0x7fcce733e460>}

In [5]:

# we can resume from a checkpoint using trainer.fit(ckpth_path="some/path/to/my_checkpoint.ckpt")
trainer.fit(gnn_model, datamodule=data_module)

(5, 5)

In [3]:
from datasets import QM9Dataset
import os

root= "../data/qm9"
! rm -rf ../data/qm9/processed

data = QM9Dataset(root=root, begin_index=100000, end_index=101000)


Processing...


Index(['A', 'B', 'C', 'mu', 'alpha', 'homo', 'lumo', 'gap', 'r2', 'zpve', 'u0',
       'u298', 'h298', 'g298', 'cv', 'u0_atom', 'u298_atom', 'h298_atom',
       'g298_atom'],
      dtype='object')


6it [00:00, 53.69it/s]

100000 CCC(C)(C)C(C)C=O
100001 CCC(C)(C)C(O)C=O
100002 CCC(C)(C)N(C)C=O
100003 CCC(C)(O)C(C)C=O
100004 CCC(C)(O)C(O)C=O
100005 CC(C)(CO)N(C)C=O
100006 COC(C)(C)C(C)C=O
100007 COC(C)(C)C(O)C=O
100008 CC1(CC1)C(C)(C)CO
100009 CC(C)(CO)C1(C)CO1
100010 CC(C)(CO)C1(CC1)O
100011 CC(O)(CO)C1(C)CC1
100012 CC(O)(CO)C1(C)CN1


23it [00:00, 75.43it/s]

100013 CC(O)(CO)C1(C)CO1
100014 CC(O)(CO)C1(O)CC1
100015 CCC(C)(C)C1(CC1)C
100016 CCC(C)(C)C1(C)CO1
100017 CCC(C)(C)C1(CC1)O
100018 CCC(C)(O)C1(C)CC1
100019 CCC(C)(O)C1(C)CN1
100020 CCC(C)(O)C1(C)CO1
100021 CCC(C)(O)C1(O)CC1
100022 CC1(CC1)C(C)(C)OC
100023 COC(C)(C)C1(C)CN1
100024 COC(C)(C)C1(C)CO1
100025 CC(C)(C1(CC1)O)OC
100026 CC(C)(CO)C1CC1O
100027 CC(O)(CO)C1CC1O
100028 CC1CC1C(C)(C)CO
100029 CC1CC1C(C)(O)CO
100030 CC1CN1C(C)(C)CO


42it [00:00, 72.04it/s]

100031 CC1NC1C(C)(O)CO
100032 CC1OC1C(C)(C)CO
100033 CC1OC1C(C)(O)CO
100034 CCC(C)(C)C1CC1C
100035 CCC(C)(C)C1CC1O
100036 CCC(C)(C)C1CN1C
100037 CCC(C)(C)C1OC1C
100038 CCC(C)(O)C1CC1C
100039 CCC(C)(O)C1CC1O
100040 CCC(C)(O)C1CN1C
100041 CCC(C)(O)C1NC1C
100042 CCC(C)(O)C1OC1C
100043 CN1CC1C(C)(C)CO


50it [00:00, 67.22it/s]

100044 CN1CC1C(C)(O)CO
100045 COC(C)(C)C1CC1C
100046 COC(C)(C)C1CC1O
100047 COC(C)(C)C1CN1C
100048 COC(C)(C)C1NC1C
100049 COC(C)(C)C1OC1C
100050 CC(C)(CO)C1CCC1
100051 CC(C)(CO)C1CCO1
100052 CC(C)(CO)C1COC1
100053 CC(O)(CO)C1CCC1
100054 CC(O)(CO)C1CCO1
100055 CC(O)(CO)C1COC1
100056 CCC(C)(C)C1CCC1


66it [00:00, 72.30it/s]

100057 CCC(C)(C)C1CCO1
100058 CCC(C)(C)C1COC1
100059 CCC(C)(O)C1CCC1
100060 CCC(C)(O)C1CCO1
100061 CCC(C)(O)C1COC1
100062 CC(C)(C1CCC1)OC
100063 COC(C)(C)C1CCO1
100064 CC(C)(C1COC1)OC
100065 CC#CCC(C)(C)CO
100066 CC#CCC(C)(O)CO
100067 CCC(C)(C)CC#CC
100068 CCC(C)(O)CC#CC
100069 CC#CCC(C)(C)OC
100070 CC(=N)OC(C)(C)CO
100071 CC(=O)CC(C)(C)CO
100072 CC(=O)CC(C)(O)CO
100073 CC(=O)NC(C)(C)CO
100074 CC(=O)OC(C)(C)CO


84it [00:01, 78.02it/s]

100075 CC(C)(C[NH3+])CC([O-])=O
100076 CC(C)(CC(=O)N)CO
100077 CC(C)(CO)NC(=O)N
100078 CC(C)(CO)OC(=O)N
100079 CC([NH3+])(CO)CC([O-])=O
100080 CC(O)(C[NH3+])CC([O-])=O
100081 CC(O)(CO)CC(N)=O
100082 CCC(C)(C)CC(=O)C
100083 CCC(C)(C)CC(=O)N
100084 CCC(C)(C)NC(=O)C
100085 CCC(C)(C)NC(=O)N
100086 CCC(C)(C)OC(=O)C
100087 CCC(C)(C)OC(=O)N
100088 CCC(C)([NH3+])CC([O-])=O
100089 CCC(C)(O)CC(C)=O
100090 CCC(C)(O)CC(N)=O
100091 C[NH2+]C(C)(C)CC([O-])=O


102it [00:01, 79.39it/s]

100092 CC(=O)CC(C)(C)OC
100093 CC(C)(CC(=O)N)OC
100094 CC(C)CC(C)(C)CO
100095 CC(C)CC(C)(O)CO
100096 CC(C)OC(C)(C)CO
100097 CC(O)CC(C)(C)CO
100098 CC(O)CC(C)(O)CO
100099 CCC(C)(C)CC(C)C
100100 CCC(C)(C)CC(C)O
100101 CCC(C)(C)OC(C)C
100102 CCC(C)(O)CC(C)C
100103 CCC(C)(O)CC(C)O
100104 CC(C)CC(C)(C)OC
100105 COC(C)(C)CC(C)O
100106 CC(C)(CC1CC1)CO
100107 CC(C)(CO)CC1CO1
100108 CC(C)(CN1CC1)CO


120it [00:01, 79.67it/s]

100109 CC(C)(CO)OC1CC1
100110 CC(O)(CO)CC1CC1
100111 CC(O)(CO)CC1CN1
100112 CC(O)(CO)CC1CO1
100113 CC(O)(CO)CN1CC1
100114 CCC(C)(C)CC1CC1
100115 CCC(C)(C)CC1CO1
100116 CCC(C)(C)CN1CC1
100117 CCC(C)(C)OC1CC1
100118 CCC(C)(O)CC1CC1
100119 CCC(C)(O)CC1CO1
100120 CCC(C)(O)CN1CC1
100121 CC(C)(CC1CC1)OC
100122 COC(C)(C)CC1CO1
100123 CC(C)(CN1CC1)OC
100124 CCC(C)(C)OC=NC
100125 CC(C)(CO)OC=NC
100126 CC(C)(CCC#C)CO


139it [00:01, 79.39it/s]

100127 CC(C)(CCC#N)CO
100128 CC(C)(CO)NCC#N
100129 CC(C)(CO)OCC#C
100130 CC(C)(CO)OCC#N
100131 CC(O)(CO)CCC#C
100132 CC(O)(CO)CCC#N
100133 CCC(C)(C)CCC#C
100134 CCC(C)(C)CCC#N
100135 CCC(C)(C)NCC#N
100136 CCC(C)(C)OCC#C
100137 CCC(C)(C)OCC#N
100138 CCC(C)(O)CCC#C
100139 CCC(C)(O)CCC#N
100140 CC(C)(CCC#C)OC
100141 CC(C)(CCC#N)OC


161it [00:02, 92.09it/s]

100142 CC(C)(CCC=O)CO
100143 CC(C)(CNC=O)CO
100144 CC(C)(CO)COC=O
100145 CC(C)(CO)OCC=O
100146 CC(O)(CO)CCC=O
100147 CC(O)(CO)CNC=O
100148 CC(O)(CO)COC=N
100149 CC(O)(CO)COC=O
100150 CCC(C)(C)CCC=O
100151 CCC(C)(C)CNC=O
100152 CCC(C)(C)COC=O
100153 CCC(C)(C)OCC=O
100154 CCC(C)(O)CCC=O
100155 CCC(C)(O)CNC=O
100156 CCC(C)(O)COC=O
100157 CC(C)(CCC=O)OC
100158 CC(C)(CNC=O)OC
100159 CC(C)(COC=O)OC
100160 CC#CC(C)(CO)C=O
100161 CC#CC(O)(CO)C=O
100162 CCC(C)(C=O)C#CC
100163 CCC(O)(C=O)C#CC
100164 CNC(C)(C=O)C#CC


186it [00:02, 106.84it/s]

100165 COC(C)(C=O)C#CC
100166 CC(=O)C(C)(CO)C=O
100167 CC(=O)C(O)(CO)C=O
100168 CC(CO)(C=O)C(N)=O
100169 CCC(C)(C=O)C(C)=O
100170 CCC(C)(C=O)C(N)=O
100171 CCC(O)(C=O)C(C)=O
100172 CCC(O)(C=O)C(N)=O
100173 CNC(C)(C=O)C(C)=O
100174 CNC(C)(C=O)C(N)=O
100175 COC(C)(C=O)C(C)=O
100176 COC(C)(C=O)C(N)=O
100177 NC(=O)C(O)(CO)C=O
100178 CC(C)C(C)(CO)C=O
100179 CC(C)C(O)(CO)C=O
100180 CC(O)C(C)(CO)C=O
100181 CC(O)C(O)(CO)C=O
100182 CCC(C)(C=O)C(C)C
100183 CCC(C)(C=O)C(C)O
100184 CCC(O)(C=O)C(C)C
100185 CCC(O)(C=O)C(C)O
100186 CN(C)C(C)(CO)C=O
100187 COC(C)(C=O)C(C)C
100188 COC(C)(C=O)C(C)O
100189 CC(CO)(C=O)C1CC1
100190 CC(CO)(C=O)C1CN1


211it [00:02, 115.28it/s]

100191 CC(CO)(C=O)C1CO1
100192 CC(CO)(C=O)N1CC1
100193 CCC(C)(C=O)C1CC1
100194 CCC(C)(C=O)C1CO1
100195 CCC(C)(C=O)N1CC1
100196 CCC(O)(C=O)C1CC1
100197 CCC(O)(C=O)C1CN1
100198 CCC(O)(C=O)C1CO1
100199 COC(C)(C=O)C1CC1
100200 COC(C)(C=O)C1CN1
100201 COC(C)(C=O)C1CO1
100202 OCC(O)(C=O)C1CC1
100203 OCC(O)(C=O)C1CN1
100204 OCC(O)(C=O)C1CO1
100205 CC(CN)(CC#N)C#N
100206 CC(CO)(CC#C)C#C
100207 CC(CO)(CC#C)C#N
100208 CC(CO)(CC#N)C#C
100209 CC(CO)(CC#N)C#N
100210 CCC(C)(CC#C)C#C
100211 CCC(C)(CC#C)C#N
100212 CCC(C)(CC#N)C#C
100213 CCC(C)(CC#N)C#N


223it [00:02, 98.56it/s] 

100214 CCC(N)(CC#C)C#N
100215 CCC(N)(CC#N)C#C
100216 CCC(N)(CC#N)C#N
100217 CCC(O)(CC#C)C#C
100218 CCC(O)(CC#C)C#N
100219 CCC(O)(CC#N)C#C
100220 CCC(O)(CC#N)C#N
100221 CNC(C)(CC#C)C#N
100222 CNC(C)(CC#N)C#C
100223 CNC(C)(CC#N)C#N
100224 COC(C)(CC#C)C#C
100225 COC(C)(CC#C)C#N
100226 COC(C)(CC#N)C#C
100227 COC(C)(CC#N)C#N
100228 NC(CO)(CC#C)C#N
100229 NC(CO)(CC#N)C#C
100230 NC(CO)(CC#N)C#N
100231 NCC(N)(CC#C)C#N
100232 NCC(N)(CC#N)C#N
100233 NCC(O)(CC#C)C#N
100234 NCC(O)(CC#N)C#N


246it [00:02, 103.62it/s]

100235 OCC(O)(CC#C)C#C
100236 OCC(O)(CC#C)C#N
100237 OCC(O)(CC#N)C#C
100238 OCC(O)(CC#N)C#N
100239 CC(CO)(CC#C)C=O
100240 CC(CO)(CC#N)C=O
100241 CCC(C)(CC#C)C=O
100242 CCC(C)(CC#N)C=O
100243 CCC(O)(CC#C)C=O
100244 CCC(O)(CC#N)C=O
100245 CNC(C)(CC#N)C=O
100246 COC(C)(CC#C)C=O
100247 COC(C)(CC#N)C=O
100248 OCC(O)(CC#C)C=O
100249 OCC(O)(CC#N)C=O
100250 CC#CC(C)(CO)CO
100251 CC#CC(CO)(CO)O
100252 CCC(C)(CC)C#CC
100253 CCC(C)(CO)C#CC
100254 CCC(C)(OC)C#CC
100255 CCC(CC)(C#CC)O


269it [00:03, 104.68it/s]

100256 CCC(O)(CO)C#CC
100257 COC(C)(CO)C#CC
100258 CC(=O)C(C)(CO)CO
100259 CC(=O)C(CO)(CO)O
100260 CC(C[NH3+])(CO)C([O-])=O
100261 CC(CO)(CO)C(=O)N
100262 CCC(C)(CC)C(=O)C
100263 CCC(C)(CC)C(=O)N
100264 CCC(C)(C[NH3+])C([O-])=O
100265 CCC(C)(CO)C(C)=O
100266 CCC(C)(CO)C(N)=O
100267 CCC(C)([NH2+]C)C([O-])=O
100268 CCC(C)(OC)C(C)=O
100269 CCC(C)(OC)C(N)=O
100270 CCC([NH3+])(CC)C([O-])=O
100271 CCC(N)(C[NH3+])C([O-])=O
100272 CCC([NH3+])(CO)C([O-])=O
100273 CCC(CC)(C(=O)C)O
100274 CCC(CC)(C(=O)N)O
100275 CCC(O)(C[NH3+])C([O-])=O
100276 CCC(O)(CO)C(C)=O
100277 CCC(O)(CO)C(N)=O
100278 C[NH2+]C(C)(CO)C([O-])=O
100279 COC(C)(C[NH3+])C([O-])=O


294it [00:03, 109.38it/s]

100280 COC(C)(CO)C(C)=O
100281 COC(C)(CO)C(N)=O
100282 C(C(CO)(C(=O)N)O)O
100283 [NH3+]C(CO)(CO)C([O-])=O
100284 NC(C[NH3+])(CO)C([O-])=O
100285 [NH3+]CC(O)(CO)C([O-])=O
100286 CC(C)C(C)(CO)CO
100287 CC(C)C(CO)(CO)O
100288 CC(O)C(C)(CO)CO
100289 CC(O)C(O)(CO)CO
100290 CCC(C)(CC)C(C)C
100291 CCC(C)(CC)C(C)O
100292 CCC(C)(CO)C(C)C
100293 CCC(C)(CO)C(C)O
100294 CCC(C)(OC)C(C)C
100295 CCC(C)(OC)C(C)O
100296 CCC(CC)(C(C)C)O
100297 CCC(O)(CC)C(C)O
100298 CCC(O)(CO)C(C)C
100299 CCC(O)(CO)C(C)O
100300 COC(C)(CO)C(C)C


317it [00:03, 105.74it/s]

100301 COC(C)(CO)C(C)O
100302 CC(CO)(CO)C1CC1
100303 CC(CO)(CO)C1CN1
100304 CC(CO)(CO)C1CO1
100305 CC(CO)(CO)N1CC1
100306 CCC(C)(CC)C1CC1
100307 CCC(C)(CC)C1CO1
100308 CCC(C)(CO)C1CC1
100309 CCC(C)(CO)C1CO1
100310 CCC(C)(CO)N1CC1
100311 CCC(C)(OC)C1CC1
100312 CCC(C)(OC)C1CN1
100313 CCC(C)(OC)C1CO1
100314 CCC(CC)(C1CC1)O
100315 CCC(O)(CC)C1CO1
100316 CCC(O)(CO)C1CC1
100317 CCC(O)(CO)C1CN1
100318 CCC(O)(CO)C1CO1
100319 COC(C)(CO)C1CC1
100320 COC(C)(CO)C1CN1
100321 COC(C)(CO)C1CO1
100322 C1CC1C(CO)(CO)O
100323 OCC(O)(CO)C1CN1


340it [00:03, 101.53it/s]

100324 OCC(O)(CO)C1CO1
100325 CC(CC#C)(CO)CO
100326 CC(CC#N)(CO)CO
100327 CCC(C)(CC#C)OC
100328 CCC(C)(CC#N)OC
100329 CCC(C)(CC)CC#C
100330 CCC(C)(CC)CC#N
100331 CCC(C)(CO)CC#C
100332 CCC(C)(CO)CC#N
100333 CCC(CC)(CC#C)O
100334 CCC(CC)(CC#N)O
100335 CCC(O)(CO)CC#C
100336 CCC(O)(CO)CC#N
100337 COC(C)(CO)CC#C
100338 COC(C)(CO)CC#N
100339 C(C#N)C(CO)(CO)N
100340 C#CCC(CO)(CO)O
100341 C(C#N)C(CO)(CO)O
100342 CC(CC=O)(CO)CO


365it [00:03, 109.55it/s]

100343 CC(CO)(CO)NC=O
100344 CC(CO)(CO)OC=N
100345 CC(CO)(CO)OC=O
100346 CCC(C)(CC)CC=O
100347 CCC(C)(CC)NC=O
100348 CCC(C)(CC)OC=O
100349 CCC(C)(CC=O)OC
100350 CCC(C)(CO)CC=O
100351 CCC(C)(CO)NC=O
100352 CCC(C)(CO)OC=N
100353 CCC(C)(CO)OC=O
100354 CCC(CC)(CC=O)O
100355 CCC(O)(CO)CC=O
100356 COC(C)(CO)CC=O
100357 C(C=O)C(CO)(CO)O
100358 CC(CN)(NC=O)C#N
100359 CC(CN)(OC=N)C#N
100360 CC(CN)(OC=O)C#N
100361 CC(CO)(CC=O)C#C
100362 CC(CO)(CC=O)C#N
100363 CC(CO)(NC=O)C#C
100364 CC(CO)(NC=O)C#N
100365 CC(CO)(OC=N)C#C
100366 CC(CO)(OC=N)C#N


391it [00:04, 114.81it/s]

100367 CC(CO)(OC=O)C#C
100368 CC(CO)(OC=O)C#N
100369 CCC(C)(CC=O)C#C
100370 CCC(C)(CC=O)C#N
100371 CCC(C)(NC=O)C#C
100372 CCC(C)(NC=O)C#N
100373 CCC(C)(OC=N)C#C
100374 CCC(C)(OC=N)C#N
100375 CCC(C)(OC=O)C#C
100376 CCC(C)(OC=O)C#N
100377 CCC(O)(CC=O)C#C
100378 CCC(O)(CC=O)C#N
100379 CNC(C)(CC=O)C#N
100380 COC(C)(CC=O)C#C
100381 COC(C)(CC=O)C#N
100382 OCC(O)(CC=O)C#C
100383 OCC(O)(CC=O)C#N
100384 CC(CO)(CC=O)C=O
100385 CC(CO)(NC=O)C=O
100386 CC(CO)(OC=N)C=O
100387 CC(CO)(OC=O)C=O
100388 CCC(C)(CC=O)C=O
100389 CCC(C)(NC=O)C=O
100390 CCC(C)(OC=N)C=O


403it [00:04, 110.58it/s]

100391 CCC(C)(OC=O)C=O
100392 CCC(O)(CC=O)C=O
100393 COC(C)(CC=O)C=O
100394 OCC(O)(CC=O)C=O
100395 CC(=NC(C)(C)C)OC
100396 CC(N=C(N)CO)C#C
100397 CC(N=C(N)CO)C#N
100398 COC(C)=NC(C)C#C
100399 COC(C)=NC(C)C#N
100400 COC(N)=NC(C)C#C
100401 COC(N)=NC(C)C#N
100402 CC(C=O)N=C(N)CO
100403 COC(C)=NC(C)C=O
100404 COC(N)=NC(C)C=O
100405 CC(=NC1(CC1)C)OC
100406 COC(C)=NC1CC1C
100407 COC(C)=NC1CC1O
100408 COC(N)=NC1CC1O
100409 CC(=NC1CCC1)OC
100410 CC(=NC1COC1)OC
100411 COC(=NC1COC1)N
100412 C1C(CO1)N=C(CO)N


428it [00:04, 115.07it/s]

100413 CC#CCN=C(CO)N
100414 CC#CCN=C(C)OC
100415 CC#CCN=C(N)OC
100416 CC(=O)CN=C(CO)N
100417 CCC(N)=[NH+]CC([O-])=O
100418 CNC(C)=[NH+]CC([O-])=O
100419 CNC(N)=[NH+]CC([O-])=O
100420 CC(=O)CN=C(C)OC
100421 CC(=NCC(=O)N)OC
100422 CC(=O)CN=C(N)OC
100423 COC(=NCC(=O)N)N
100424 COC(N)=[NH+]CC([O-])=O
100425 C(C(=O)N)N=C(CO)N
100426 NC(CO)=[NH+]CC([O-])=O
100427 NCC(N)=[NH+]CC([O-])=O
100428 CC(C)CN=C(C)OC
100429 COC(C)=NCC(C)O
100430 CC(=NCC1CC1)OC
100431 COC(C)=NCC1CN1
100432 COC(C)=NCC1CO1
100433 CC(=NCCC#C)OC
100434 CC(=NCCC#N)OC
100435 CC(=NCCC=O)OC


451it [00:04, 103.33it/s]

100436 CC#CC#CC(C)CO
100437 CC#CC#CC(O)CO
100438 CCC(C)C#CC#CC
100439 CCC(O)C#CC#CC
100440 COC(C)C#CC#CC
100441 CC(=O)C#CC(O)CO
100442 CC(C[NH3+])C#CC([O-])=O
100443 CC(CO)C#CC(C)=O
100444 CC(CO)C#CC(N)=O
100445 CCC(C)C#CC(C)=O
100446 CCC(C)C#CC(N)=O
100447 CCC([NH3+])C#CC([O-])=O
100448 CCC(O)C#CC(C)=O
100449 CCC(O)C#CC(N)=O
100450 C[NH2+]C(C)C#CC([O-])=O
100451 COC(C)C#CC(C)=O
100452 COC(C)C#CC(N)=O
100453 NC(=O)C#CC(O)CO
100454 [NH3+]C(CO)C#CC([O-])=O


462it [00:04, 100.50it/s]

100455 NC(C[NH3+])C#CC([O-])=O
100456 [NH3+]CC(O)C#CC([O-])=O
100457 CC(C)C#CC(C)CO
100458 CC(C)C#CC(O)CO
100459 CC(O)C#CC(C)CO
100460 CC(O)C#CC(O)CO
100461 CCC(C)C#CC(C)C
100462 CCC(C)C#CC(C)O
100463 CCC(O)C#CC(C)C
100464 CCC(O)C#CC(C)O
100465 COC(C)C#CC(C)C
100466 COC(C)C#CC(C)O
100467 CC(CO)C#CC1CC1
100468 CC(CO)C#CC1CN1
100469 CC(CO)C#CC1CO1
100470 CCC(C)C#CC1CC1
100471 CCC(C)C#CC1CN1
100472 CCC(C)C#CC1CO1


483it [00:05, 96.02it/s] 

100473 CCC(O)C#CC1CC1
100474 CCC(O)C#CC1CN1
100475 CCC(O)C#CC1CO1
100476 COC(C)C#CC1CC1
100477 COC(C)C#CC1CN1
100478 COC(C)C#CC1CO1
100479 OCC(O)C#CC1CC1
100480 OCC(O)C#CC1CN1
100481 OCC(O)C#CC1CO1
100482 CC(CO)C#CCC#C
100483 CC(CO)C#CCC#N
100484 CCC(C)C#CCC#C
100485 CCC(C)C#CCC#N
100486 CCC(O)C#CCC#C
100487 CCC(O)C#CCC#N
100488 COC(C)C#CCC#C
100489 COC(C)C#CCC#N
100490 OCC(O)C#CCC#C
100491 OCC(O)C#CCC#N
100492 CC(CO)C#CCC=O


504it [00:05, 99.82it/s]

100493 CCC(C)C#CCC=O
100494 CCC(O)C#CCC=O
100495 COC(C)C#CCC=O
100496 OCC(O)C#CCC=O
100497 CC#CC(=O)C(C)CO
100498 CC#CC(=O)C(O)CO
100499 CCC(C)C(=O)C#CC
100500 CCC(O)C(=O)C#CC
100501 CCN(C)C(=O)C#CC
100502 COC(C)C(=O)C#CC
100503 CC(=O)C(=O)C(O)CO
100504 CC(CO)C(=O)C(C)=O
100505 CC(CO)C(=O)C(N)=N
100506 CC(CO)C(=O)C(N)=O
100507 CCC(C)C(=O)C(C)=O
100508 CCC(C)C(=O)C(N)=N
100509 CCC(C)C(=O)C(N)=O
100510 CCC(O)C(=O)C(C)=O
100511 CCC(O)C(=O)C(N)=N
100512 CCC(O)C(=O)C(N)=O
100513 CCN(C)C(=N)C(=O)C
100514 CCN(C)C(=[NH2+])C([O-])=O
100515 CCN(C)C(=O)C(=O)C
100516 CCN(C)C(=O)C(=N)N
100517 CCN(C)C(=O)C(=O)N


531it [00:05, 114.76it/s]

100518 C[NH2+]C(C)C(=O)C([O-])=O
100519 COC(C)C(=O)C(C)=O
100520 COC(C)C(=O)C(N)=N
100521 COC(C)C(=O)C(N)=O
100522 NC(=N)C(=O)C(O)CO
100523 NC(=O)C(=O)C(O)CO
100524 CC(C)C(=O)C(C)CO
100525 CC(C)C(=O)C(O)CO
100526 CC(CO)C(=O)N(C)C
100527 CC(O)C(=O)C(C)CO
100528 CC(O)C(=O)C(O)CO
100529 CCC(C)C(=O)C(C)C
100530 CCC(C)C(=O)C(C)O
100531 CCC(C)C(=O)N(C)C
100532 CCC(O)C(=O)C(C)C
100533 CCC(O)C(=O)C(C)O
100534 CCC(O)C(=O)N(C)C
100535 CCN(C)C(=O)C(C)C
100536 CCN(C)C(=O)C(C)O
100537 CCN(C)C(=O)N(C)C
100538 CN(C)C(=O)C(O)CO
100539 COC(C)C(=O)C(C)C
100540 COC(C)C(=O)C(C)O
100541 COC(C)C(=O)N(C)C


555it [00:05, 113.57it/s]

100542 CC(CO)C(=O)C1CC1
100543 CC(CO)C(=O)C1CN1
100544 CC(CO)C(=O)C1CO1
100545 CC(CO)C(=O)N1CC1
100546 CCC(C)C(=O)C1CC1
100547 CCC(C)C(=O)C1CN1
100548 CCC(C)C(=O)C1CO1
100549 CCC(C)C(=O)N1CC1
100550 CCC(O)C(=O)C1CC1
100551 CCC(O)C(=O)C1CN1
100552 CCC(O)C(=O)C1CO1
100553 CCC(O)C(=O)N1CC1
100554 CCN(C)C(=O)C1CC1
100555 CCN(C)C(=O)C1CN1
100556 CCN(C)C(=O)C1CO1
100557 CCN(C)C(=O)N1CC1
100558 COC(C)C(=O)C1CC1
100559 COC(C)C(=O)C1CN1
100560 COC(C)C(=O)C1CO1
100561 COC(C)C(=O)N1CC1
100562 OCC(O)C(=O)C1CC1
100563 OCC(O)C(=O)C1CN1
100564 OCC(O)C(=O)C1CO1
100565 OCC(O)C(=O)N1CC1
100566 CC(CO)C(=O)CC#C


581it [00:05, 115.16it/s]

100567 CC(CO)C(=O)CC#N
100568 CCC(C)C(=O)CC#C
100569 CCC(C)C(=O)CC#N
100570 CCC(O)C(=O)CC#C
100571 CCC(O)C(=O)CC#N
100572 CCN(C)C(=O)CC#C
100573 CCN(C)C(=O)CC#N
100574 COC(C)C(=O)CC#C
100575 COC(C)C(=O)CC#N
100576 OCC(O)C(=O)CC#C
100577 OCC(O)C(=O)CC#N
100578 CC(CO)C(=N)NC=O
100579 CC(CO)C(=O)CC=O
100580 CC(CO)C(=O)NC=N
100581 CC(CO)C(=O)NC=O
100582 CC(CO)C(=O)OC=N
100583 CCC(C)C(=N)NC=O
100584 CCC(C)C(=O)CC=O
100585 CCC(C)C(=O)NC=N
100586 CCC(C)C(=O)NC=O
100587 CCC(C)C(=O)OC=N
100588 CCC(N)C(=O)NC=O


608it [00:06, 123.04it/s]

100589 CCC(N)C(=O)OC=N
100590 CCC(O)C(=N)NC=O
100591 CCC(O)C(=O)CC=O
100592 CCC(O)C(=O)NC=N
100593 CCC(O)C(=O)NC=O
100594 CCC(O)C(=O)OC=N
100595 CCN(C)C(=O)CC=O
100596 CCN(C)C(=O)NC=N
100597 CCN(C)C(=O)NC=O
100598 CCN(C)C(=O)OC=N
100599 CNC(C)C(=O)OC=N
100600 COC(C)C(=N)NC=O
100601 COC(C)C(=O)CC=O
100602 COC(C)C(=O)NC=N
100603 COC(C)C(=O)NC=O
100604 COC(C)C(=O)OC=N
100605 NC(CO)C(=O)NC=O
100606 NC(CO)C(=O)OC=N
100607 OCC(O)C(=N)NC=O
100608 OCC(O)C(=O)CC=O
100609 OCC(O)C(=O)NC=N
100610 OCC(O)C(=O)NC=O
100611 OCC(O)C(=O)OC=N
100612 CCN(C)C(=NC)C#N
100613 CC(CO)C(C#C)C#C
100614 CC(CO)C(C#C)C#N
100615 CCC(C)C(C#C)C#C


633it [00:06, 110.94it/s]

100616 CCC(C)C(C#C)C#N
100617 CCC(C)C(C#N)C#N
100618 CCC(O)C(C#C)C#C
100619 CCC(O)C(C#C)C#N
100620 CCN(C)C(C#C)C#N
100621 CCN(C)C(C#N)C#N
100622 COC(C)C(C#C)C#C
100623 COC(C)C(C#C)C#N
100624 NC(CO)C(C#C)C#N
100625 NC(C[NH3+])[C-](C#N)C#N
100626 [NH3+]CC(O)[C-](C#N)C#N
100627 OCC(O)C(C#C)C#C
100628 OCC(O)C(C#C)C#N
100629 CC(C)(C#C)C(O)CO
100630 CC(C)(C#N)C(O)CO
100631 CC(CO)C(C)(C)C#C
100632 CC(CO)C(C)(C)C#N
100633 CC(CO)C(C)(N)C#N
100634 CC(CO)C(C)(O)C#C
100635 CC(CO)C(C)(O)C#N
100636 CC(N)(C#N)C(N)CO


645it [00:06, 104.60it/s]

100637 CC(N)(C#N)C(O)CO
100638 CC(O)(C#C)C(O)CO
100639 CC(O)(C#N)C(N)CO
100640 CC(O)(C#N)C(O)CO
100641 CCC(C)C(C)(C)C#C
100642 CCC(C)C(C)(C)C#N
100643 CCC(C)C(C)(N)C#N
100644 CCC(C)C(C)(O)C#C
100645 CCC(C)C(C)(O)C#N
100646 CCC(O)C(C)(C)C#C
100647 CCC(O)C(C)(C)C#N
100648 CCC(O)C(C)(N)C#N
100649 CCC(O)C(C)(O)C#C
100650 CCC(O)C(C)(O)C#N
100651 CCN(C)C(C)(C)C#N
100652 COC(C)C(C)(C)C#C
100653 COC(C)C(C)(C)C#N
100654 COC(C)C(C)(N)C#N


668it [00:06, 103.47it/s]

100655 COC(C)C(C)(O)C#C
100656 COC(C)C(C)(O)C#N
100657 CC(C)(C=O)C(O)CO
100658 CC(CO)C(C)(C)C=O
100659 CC(CO)C(C)(O)C=O
100660 CC(O)(C=O)C(O)CO
100661 CCC(C)C(C)(C)C=O
100662 CCC(C)C(C)(O)C=O
100663 CCC(O)C(C)(C)C=O
100664 CCC(O)C(C)(O)C=O
100665 COC(C)C(C)(C)C=O
100666 COC(C)C(C)(O)C=O
100667 CC(C)(CO)C(O)CO
100668 CC(CO)C(C)(C)CO
100669 CC(CO)C(C)(O)CO
100670 CC(O)(CO)C(O)CO
100671 CCC(C)(C)C(C)CO
100672 CCC(C)(C)C(C)OC
100673 CCC(C)(C)C(O)CO
100674 CCC(C)(O)C(C)CO
100675 CCC(C)(O)C(C)OC


689it [00:06, 97.48it/s] 

100676 CCC(C)(O)C(O)CO
100677 CCC(C)C(C)(C)CC
100678 CCC(C)C(C)(C)CO
100679 CCC(C)C(C)(C)OC
100680 CCC(C)C(C)(O)CC
100681 CCC(C)C(C)(O)CO
100682 CCC(O)C(C)(C)CC
100683 CCC(O)C(C)(C)CO
100684 CCC(O)C(C)(C)OC
100685 CCC(O)C(C)(O)CC
100686 CCC(O)C(C)(O)CO
100687 COC(C)(C)C(C)CO
100688 COC(C)(C)C(O)CO
100689 COC(C)C(C)(C)CO
100690 COC(C)C(C)(C)OC
100691 COC(C)C(C)(O)CO
100692 CC#CC(C)C(C)CO
100693 CC#CC(C)C(O)CO
100694 CC#CC(O)C(C)CO


711it [00:07, 98.51it/s]

100695 CC#CC(O)C(O)CO
100696 CCC(C)C(C)C#CC
100697 CCC(C)C(O)C#CC
100698 CCC(O)C(C)C#CC
100699 CCC(O)C(O)C#CC
100700 COC(C)C(C)C#CC
100701 COC(C)C(O)C#CC
100702 CC(=O)C(O)C(O)CO
100703 CC(C([NH3+])CN)C([O-])=O
100704 CC(C([NH3+])CO)C([O-])=O
100705 CC(C(O)C[NH3+])C([O-])=O
100706 CC(C(O)CO)C(C)=O
100707 CC(C(O)CO)C(N)=O
100708 CC(C[NH3+])C(C)C([O-])=O
100709 CC(C[NH3+])C(O)C([O-])=O
100710 CC(CO)C(C)C(C)=O
100711 CC(CO)C(C)C(N)=O
100712 CC(CO)C([NH3+])C([O-])=O
100713 CC(CO)C(O)C(C)=O
100714 CC(CO)C(O)C(N)=O
100715 CC(CO)N(C)C(C)=O


734it [00:07, 104.32it/s]

100716 CC(CO)N(C)C(N)=O
100717 CCC(C)C(C)C(C)=O
100718 CCC(C)C(C)C(N)=O
100719 CCC(C)C([NH3+])C([O-])=O
100720 CCC(C)C(O)C(C)=O
100721 CCC(C)C(O)C(N)=O
100722 CCC(C)N(C)C(C)=O
100723 CCC(C)N(C)C(N)=O
100724 CCC([NH3+])C(C)C([O-])=O
100725 CCC([NH3+])C(N)C([O-])=O
100726 CCC([NH3+])C(O)C([O-])=O
100727 CCC(O)C(C)C(C)=O
100728 CCC(O)C(C)C(N)=O
100729 CCC(O)C([NH3+])C([O-])=O
100730 CCC(O)C(O)C(C)=O
100731 CCC(O)C(O)C(N)=O
100732 CC[NH+](C)C(C)C([O-])=O
100733 C[NH2+]C(C)C(C)C([O-])=O
100734 C[NH2+]C(C)C(N)C([O-])=O
100735 C[NH2+]C(C)C(O)C([O-])=O
100736 COC(C)C(C)C(C)=O
100737 COC(C)C(C)C(N)=O
100738 COC(C)C([NH3+])C([O-])=O


748it [00:07, 112.16it/s]

100739 COC(C)C(O)C(C)=O
100740 COC(C)C(O)C(N)=O
100741 NC(=O)C(O)C(O)CO
100742 [NH3+]C(C(O)CO)C([O-])=O
100743 NC(C([NH3+])CO)C([O-])=O
100744 [NH3+]C(CO)C(O)C([O-])=O
100745 NCC([NH3+])C(O)C([O-])=O
100746 [NH3+]CC(O)C(O)C([O-])=O
100747 CC(C)C(C)C(C)CO
100748 CC(C)C(C)C(O)CO
100749 CC(C)C(O)C(C)CO
100750 CC(C)C(O)C(O)CO
100751 CC(O)C(C)C(C)CO
100752 CC(O)C(C)C(O)CO
100753 CC(O)C(O)C(C)CO
100754 CC(O)C(O)C(O)CO
100755 CCC(C)C(C)C(C)C
100756 CCC(C)C(C)C(C)O
100757 CCC(C)C(O)C(C)C
100758 CCC(C)C(O)C(C)O
100759 CCC(O)C(C)C(C)C


771it [00:07, 102.05it/s]

100760 CCC(O)C(C)C(C)O
100761 CCC(O)C(O)C(C)C
100762 CCC(O)C(O)C(C)O
100763 COC(C)C(C)C(C)C
100764 COC(C)C(C)C(C)O
100765 COC(C)C(O)C(C)C
100766 COC(C)C(O)C(C)O
100767 CC(C(O)CO)C1CC1
100768 CC(C(O)CO)C1CN1
100769 CC(C(O)CO)C1CO1
100770 CC(C(O)CO)N1CC1
100771 CC(CO)C(C)C1CC1
100772 CC(CO)C(C)C1CO1
100773 CC(CO)C(C)N1CC1
100774 CC(CO)C(O)C1CC1
100775 CC(CO)C(O)C1CN1
100776 CC(CO)C(O)C1CO1
100777 CCC(C)C(C)C1CC1
100778 CCC(C)C(C)C1CO1
100779 CCC(C)C(O)C1CC1


793it [00:07, 102.35it/s]

100780 CCC(C)C(O)C1CN1
100781 CCC(C)C(O)C1CO1
100782 CCC(O)C(C)C1CC1
100783 CCC(O)C(C)C1CO1
100784 CCC(O)C(C)N1CC1
100785 CCC(O)C(O)C1CC1
100786 CCC(O)C(O)C1CN1
100787 CCC(O)C(O)C1CO1
100788 COC(C)C(C)C1CC1
100789 COC(C)C(C)C1CO1
100790 COC(C)C(C)N1CC1
100791 COC(C)C(O)C1CC1
100792 COC(C)C(O)C1CN1
100793 COC(C)C(O)C1CO1
100794 OCC(O)C(O)C1CC1
100795 OCC(O)C(O)C1CN1
100796 OCC(O)C(O)C1CO1
100797 CC(CC#C)C(O)CO
100798 CC(CC#N)C(O)CO
100799 CC(CO)C(C)CC#C
100800 CC(CO)C(C)CC#N


815it [00:08, 93.03it/s] 

100801 CC(CO)C(O)CC#C
100802 CC(CO)C(O)CC#N
100803 CC(CO)N(C)CC#N
100804 CCC(C)C(C)CC#C
100805 CCC(C)C(C)CC#N
100806 CCC(C)C(O)CC#C
100807 CCC(C)C(O)CC#N
100808 CCC(C)N(C)CC#N
100809 CCC(O)C(C)CC#C
100810 CCC(O)C(C)CC#N
100811 CCC(O)C(O)CC#C
100812 CCC(O)C(O)CC#N
100813 COC(C)C(C)CC#C
100814 COC(C)C(C)CC#N
100815 COC(C)C(O)CC#C
100816 COC(C)C(O)CC#N
100817 NC(CC#N)C(O)CO


839it [00:08, 102.36it/s]

100818 OCC(O)C(O)CC#C
100819 OCC(O)C(O)CC#N
100820 CC(CC=O)C(O)CO
100821 CC(CO)C(C)CC=O
100822 CC(CO)C(C)NC=O
100823 CC(CO)C(C)OC=O
100824 CC(CO)C(O)CC=O
100825 CC(CO)N(C)CC=O
100826 CC(NC=O)C(O)CO
100827 CC(OC=N)C(O)CO
100828 CC(OC=O)C(O)CO
100829 CCC(C)C(C)CC=O
100830 CCC(C)C(C)NC=O
100831 CCC(C)C(C)OC=O
100832 CCC(C)C(O)CC=O
100833 CCC(O)C(C)CC=O
100834 CCC(O)C(C)NC=O
100835 CCC(O)C(C)OC=O
100836 CCC(O)C(O)CC=O
100837 COC(C)C(C)CC=O
100838 COC(C)C(C)NC=O
100839 COC(C)C(C)OC=O
100840 COC(C)C(O)CC=O
100841 OCC(O)C(O)CC=O


865it [00:08, 115.09it/s]

100842 CCN(C)C(=NC)C=O
100843 CC(CO)C(C=O)C#C
100844 CC(CO)C(C=O)C#N
100845 CCC(C)C(C=O)C#C
100846 CCC(C)C(C=O)C#N
100847 CCC(O)C(C=O)C#C
100848 CCC(O)C(C=O)C#N
100849 CCN(C)C(C=O)C#C
100850 CCN(C)C(C=O)C#N
100851 COC(C)C(C=O)C#C
100852 COC(C)C(C=O)C#N
100853 OCC(O)C(C=O)C#C
100854 OCC(O)C(C=O)C#N
100855 CC(CO)C(C=O)C=O
100856 CC(CO)N(C=N)C=O
100857 CC(CO)N(C=O)C=O
100858 CCC(C)C(C=O)C=O
100859 CCC(C)N(C=N)C=O
100860 CCC(C)N(C=O)C=O
100861 CCC(O)C(C=O)C=O
100862 CCN(C)C(C=O)C=O
100863 COC(C)C(C=O)C=O
100864 OCC(O)C(C=O)C=O
100865 CC(CO)C(CO)C#C
100866 CC(CO)C(CO)C#N


877it [00:08, 103.61it/s]

100867 CCC(C#C)C(C)CO
100868 CCC(C#C)C(C)OC
100869 CCC(C#C)C(O)CO
100870 CCC(C#N)C(C)CO
100871 CCC(C#N)C(C)OC
100872 CCC(C#N)C(O)CO
100873 CCC(C#N)N(C)CC
100874 CCC(C)C(CC)C#C
100875 CCC(C)C(CC)C#N
100876 CCC(C)C(CO)C#C
100877 CCC(C)C(CO)C#N
100878 CCC(C)C(NC)C#N
100879 CCC(C)C(OC)C#C
100880 CCC(C)C(OC)C#N
100881 CCC(N)C(NC)C#N
100882 CCC(O)C(CC)C#C
100883 CCC(O)C(CC)C#N


898it [00:09, 94.20it/s] 

100884 CCC(O)C(CO)C#C
100885 CCC(O)C(CO)C#N
100886 CCC(O)C(NC)C#N
100887 CCC(O)C(OC)C#C
100888 CCC(O)C(OC)C#N
100889 CCN(C)C(CN)C#N
100890 CCN(C)C(CO)C#N
100891 CNC(C#N)C(C)CO
100892 CNC(C#N)C(C)OC
100893 CNC(C#N)C(N)CO
100894 CNC(C#N)C(O)CO
100895 CNC(C)C(NC)C#N
100896 CNC(C)C(OC)C#N
100897 COC(C#C)C(C)CO
100898 COC(C#C)C(O)CO
100899 COC(C#N)C(C)CO
100900 COC(C#N)C(N)CO
100901 COC(C#N)C(O)CO


908it [00:09, 90.07it/s]

100902 COC(C)C(CO)C#C
100903 COC(C)C(CO)C#N
100904 COC(C)C(OC)C#C
100905 COC(C)C(OC)C#N
100906 NC(CO)C(CO)C#N
100907 NCC(C#N)C(O)CO
100908 OCC(O)C(CO)C#C
100909 OCC(O)C(CO)C#N
100910 CC(CO)C(CO)C=O
100911 CCC(C)C(CC)C=O
100912 CCC(C)C(CO)C=O
100913 CCC(C)C(OC)C=O
100914 CCC(C)N(CC)C=O
100915 CCC(C=O)C(C)CO
100916 CCC(C=O)C(C)OC
100917 CCC(C=O)C(O)CO


927it [00:09, 87.11it/s]

100918 CCC(O)C(CC)C=O
100919 CCC(O)C(CO)C=O
100920 CCC(O)C(OC)C=O
100921 CCN(C)C(CO)C=O
100922 CCN(C=O)C(C)CO
100923 COC(C)C(CO)C=O
100924 COC(C)C(OC)C=O
100925 COC(C=O)C(C)CO
100926 COC(C=O)C(O)CO
100927 OCC(O)C(CO)C=O
100928 CC(CO)C(CO)CO
100929 CCC(C)C(CC)CC
100930 CCC(C)C(CC)CO
100931 CCC(C)C(CC)OC
100932 CCC(C)C(CO)CO


944it [00:09, 69.80it/s]

100933 CCC(C)C(CO)OC
100934 CCC(CC)C(C)CO
100935 CCC(CC)C(C)OC
100936 CCC(CC)C(O)CO
100937 CCC(CO)C(C)CO
100938 CCC(CO)C(C)OC
100939 CCC(CO)C(O)CO
100940 CCC(O)C(CC)CC
100941 CCC(O)C(CC)CO
100942 CCC(O)C(CC)OC
100943 CCC(O)C(CO)CO
100944 CCC(O)C(CO)OC


952it [00:09, 68.11it/s]

100945 CCC(OC)C(C)CO
100946 CCC(OC)C(C)OC
100947 CCC(OC)C(O)CO
100948 COC(C)C(CO)CO
100949 COC(C)C(CO)OC
100950 COC(CO)C(C)CO
100951 COC(CO)C(O)CO
100952 OCC(O)C(CO)CO
100953 CC(CO)C1(C)CC1C
100954 CC(CO)C1(C)CC1O
100955 CC(CO)C1(C)CN1C
100956 CC(CO)C1(C)OC1C
100957 CC(CO)C1(O)CC1C


966it [00:10, 65.87it/s]

100958 CC(CO)C1(O)CC1O
100959 CC1(CC1O)C(O)CO
100960 CC1CC1(C)C(O)CO
100961 CC1CC1(O)C(O)CO
100962 CC1NC1(C)C(O)CO
100963 CC1OC1(C)C(O)CO
100964 CCC(C)C1(C)CC1C
100965 CCC(C)C1(C)CC1O
100966 CCC(C)C1(C)CN1C
100967 CCC(C)C1(C)OC1C
100968 CCC(C)C1(O)CC1C
100969 CCC(C)C1(O)CC1O
100970 CCC(O)C1(C)CC1C


980it [00:10, 64.43it/s]

100971 CCC(O)C1(C)CC1O
100972 CCC(O)C1(C)CN1C
100973 CCC(O)C1(C)NC1C
100974 CCC(O)C1(C)OC1C
100975 CCC(O)C1(O)CC1C
100976 CCC(O)C1(O)CC1O
100977 CN1CC1(C)C(O)CO
100978 COC(C)C1(C)CC1C
100979 COC(C)C1(C)CC1O
100980 COC(C)C1(C)CN1C
100981 COC(C)C1(C)NC1C
100982 COC(C)C1(C)OC1C
100983 COC(C)C1(O)CC1C
100984 COC(C)C1(O)CC1O


995it [00:10, 66.57it/s]

100985 OCC(O)C1(O)CC1O
100986 CC(CO)C1(C)CCC1
100987 CC(CO)C1(C)CCO1
100988 CC(CO)C1(C)COC1
100989 CC(CO)C1(O)CCC1
100990 CC(CO)C1(O)COC1
100991 CC1(CCC1)C(O)CO
100992 CC1(CCO1)C(O)CO
100993 CC1(COC1)C(O)CO
100994 CCC(C)C1(C)CCC1
100995 CCC(C)C1(C)CCO1
100996 CCC(C)C1(C)COC1
100997 CCC(C)C1(O)CCC1
100998 CCC(C)C1(O)COC1


1000it [00:10, 94.68it/s]

100999 CCC(O)C1(C)CCC1
NUM MOLECULES SKIPPED 0, 0.00% of the data



Done!


In [2]:
from datasets import BaceDataset


root= "../data/bace"
! rm -rf ../data/bace/processed

data = BaceDataset(root=root)

Processing...


Index(['Class', 'pIC50'], dtype='object')


0it [00:00, ?it/s]

NUM MOLECULES SKIPPED 0, 0.00% of the data





IndexError: list index out of range

In [1]:
import pandas as pd
import torch


%load_ext autoreload
%autoreload 2

df = pd.read_csv("../data/bace/raw/bace.csv")

BACE_REGRESSION_TASKS = "pIC50"
BACE_CLASSIFICATION_TASKS = "Class"

df = df[["mol", BACE_CLASSIFICATION_TASKS, BACE_REGRESSION_TASKS]]
df.set_index("mol", drop=True, inplace=True)

torch.tensor(df.values)

tensor([[1.0000, 9.1549],
        [1.0000, 8.8539],
        [1.0000, 8.6990],
        ...,
        [0.0000, 2.9531],
        [0.0000, 2.7333],
        [0.0000, 2.5445]], dtype=torch.float64)

array([[1.       , 9.1549015],
       [1.       , 8.8538723],
       [1.       , 8.6989698],
       ...,
       [0.       , 2.9531147],
       [0.       , 2.7332981],
       [0.       , 2.5445461]])

In [7]:
df.Class.factorize()

(array([0, 0, 0, ..., 1, 1, 1]), Int64Index([1, 0], dtype='int64'))