# 0 - Minimal Example

This example is to illustrate core functionality of Oloren Chem Engine. In depth
tutorials of these features and core classes are provided in the notebooks prefixed
by '1'. Advanced usage is provided in the notebooks prefixed by '2'. Custom
implementation of functionality is provided in the notebooks prefixed by '3'.

In [1]:
import olorenchemengine as oce
import pandas as pd

df = pd.read_csv("https://storage.googleapis.com/oloren-public-data/CHEMBL%20Datasets/997_2298%20-%20VEGFR1%20(CHEMBL1868).csv")
dataset = (oce.BaseDataset(data = df.to_csv(),
    structure_col = "Smiles", property_col = "pChEMBL Value") +
           oce.CleanStructures() + 
           oce.ScaffoldSplit()
)

2022-12-29 03:58:25.997255: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-29 03:58:26.767565: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2022-12-29 03:58:26.767653: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory


0 structure(s) were removed.


In [2]:
model = oce.BaseBoosting([
    oce.RandomForestModel(oce.DescriptastorusDescriptor("morgan3counts"), n_estimators=1000),
    oce.RandomForestModel(oce.OlorenCheckpoint("default"), n_estimators=1000),
    oce.ChemPropModel(epochs=20, batch_size=64)
])

model.fit(*dataset.train_dataset)

790it [00:02, 289.55it/s]
790it [00:02, 272.57it/s]
100%|██████████| 13/13 [00:00<00:00, 238.13it/s]


In [3]:
model.test(*dataset.test_dataset)

100it [00:00, 301.36it/s]
100%|██████████| 2/2 [00:00<00:00, 238.63it/s]


{'r2': 0.5452633756506161,
 'Spearman': 0.7784959345185842,
 'Explained Variance': 0.5949236200632468,
 'Max Error': 2.5975678196477157,
 'Mean Absolute Error': 0.5007694841205477,
 'Mean Squared Error': 0.5537952864718954,
 'Root Mean Squared Error': 0.7441742312603248}

In [4]:
model.fit_cv(*dataset.train_dataset, error_model = oce.KernelRegressionError(kernel = "sdc", ci=0.8))

790it [00:02, 291.79it/s]
790it [00:02, 296.16it/s]
100%|██████████| 13/13 [00:00<00:00, 244.67it/s]
790it [00:02, 293.12it/s]
100%|██████████| 13/13 [00:00<00:00, 240.45it/s]
159it [00:00, 286.77it/s]
159it [00:00, 293.94it/s]
100%|██████████| 3/3 [00:00<00:00, 245.58it/s]
631it [00:02, 296.19it/s]
100%|██████████| 10/10 [00:00<00:00, 234.71it/s]
159it [00:00, 286.95it/s]
100%|██████████| 3/3 [00:00<00:00, 237.70it/s]
631it [00:00, 1572.79it/s]
159it [00:00, 287.26it/s]
159it [00:00, 299.01it/s]
100%|██████████| 3/3 [00:00<00:00, 249.55it/s]
631it [00:02, 294.94it/s]
100%|██████████| 10/10 [00:00<00:00, 228.73it/s]
159it [00:00, 295.53it/s]
100%|██████████| 3/3 [00:00<00:00, 244.52it/s]
631it [00:00, 1586.26it/s]
159it [00:00, 301.55it/s]
159it [00:00, 307.25it/s]
100%|██████████| 3/3 [00:00<00:00, 249.41it/s]
631it [00:02, 293.37it/s]
100%|██████████| 10/10 [00:00<00:00, 233.27it/s]
159it [00:00, 300.40it/s]
100%|██████████| 3/3 [00:00<00:00, 228.17it/s]
631it [00:00, 1590.54it/s]
15

[0.46459183983941876,
 0.4263721628711872,
 0.40946986973384736,
 0.4449925997238581,
 0.4749022516025433]

In [9]:
oce.save(model, "tmp.oce")

In [10]:
results = model.predict(dataset.test_dataset[0], return_ci = True, return_vis = True)

100it [00:00, 292.72it/s]
100%|██████████| 2/2 [00:00<00:00, 221.04it/s]
100it [00:00, 296.97it/s]
100%|██████████| 2/2 [00:00<00:00, 240.62it/s]
100it [00:00, 611.24it/s]


In [11]:
results["vis"].iloc[32].render_ipynb()

In [8]:
vis = oce.VisualizeModelSim(dataset, model, eval_set="test")
vis.render_ipynb()

100it [00:00, 249.96it/s]
100%|██████████| 2/2 [00:00<00:00, 195.20it/s]
