In [1]:
from csv import DictReader
from ecnet.datasets import QSPRDataset

with open("terpenes.csv", "r") as csv_file:
    reader = DictReader(csv_file)
    terpenes = [r for r in reader]
csv_file.close()

smiles = [t["\ufeffSMILES"] for t in terpenes]
print(len(smiles))

dataset = QSPRDataset(smiles, [0 for _ in range(len(smiles))], backend="alvadesc")

64


In [2]:
from copy import deepcopy
from ecnet.model import load_model
import numpy
import os
import pickle
import torch
from typing import Tuple

FP_MODEL_PATH = "../training/models/fp/"
KV_MODEL_PATH = "../training/models/kv/"
LHV_MODEL_PATH = "../training/models/lhv/"
MON_MODEL_PATH = "../training/models/mon/"
RON_MODEL_PATH = "../training/models/ron/"
YSI_MODEL_PATH = "../training/models/ysi/"
OS_MODEL_PATH = "../training/models/os/"

def predict_for(model_folder: str) -> Tuple[numpy.ndarray]:

    models = [load_model(f"{model_folder}{f}") for f in os.listdir(model_folder) if "pca" not in f]
    with open(f"{model_folder}/trf.pca", "rb") as f:
        pca = pickle.load(f)

    ds = deepcopy(dataset)
    ds.desc_vals = torch.as_tensor(pca.transform(ds.desc_vals.detach().numpy()))
    preds = [m(ds.desc_vals).detach().numpy() for m in models]
    return (numpy.mean(preds, axis=0), numpy.std(preds, axis=0))

In [3]:
preds_fp, std_fp = predict_for(FP_MODEL_PATH)
preds_kv, std_kv = predict_for(KV_MODEL_PATH)
preds_lhv, std_lhv = predict_for(LHV_MODEL_PATH)
preds_mon, std_mon = predict_for(MON_MODEL_PATH)
preds_ron, std_ron = predict_for(RON_MODEL_PATH)
preds_ysi, std_ysi = predict_for(YSI_MODEL_PATH)
preds_os, std_os = predict_for(OS_MODEL_PATH)

In [4]:
# calculate dOS (RON/MON prediction-derived OS)

preds_dos = [preds_ron[i][0] - preds_mon[i][0] for i in range(len(preds_ron))]

In [5]:
from csv import DictWriter

rows = [{
    "SMILES": smiles[i][0],
    "FP": preds_fp[i][0],
    "FP std": std_fp[i][0],
    "KV": preds_kv[i][0],
    "KV std": std_kv[i][0],
    "LHV": preds_lhv[i][0],
    "LHV std": std_lhv[i][0],
    "MON": preds_mon[i][0],
    "MON std": std_mon[i][0],
    "RON": preds_ron[i][0],
    "RON std": std_ron[i][0],
    "YSI": preds_ysi[i][0],
    "YSI std": std_ysi[i][0],
    "OS": preds_os[i][0],
    "OS std": std_os[i][0],
    "dOS": preds_dos[i]
} for i in range(len(smiles))]

with open("terpene_predictions.csv", "w", encoding="utf-8") as csv_file:
    writer = DictWriter(csv_file, list(rows[0].keys()), delimiter=",", lineterminator="\n")
    writer.writeheader()
    writer.writerows(rows)
csv_file.close()