In [1]:
from ecnet.datasets import load_lhv

smiles, prop = load_lhv()

print(len(smiles), len(prop))

388 388


In [2]:
from sklearn.model_selection import train_test_split

smiles_train, smiles_test, prop_train, prop_test = train_test_split(smiles, prop, test_size=0.2, random_state=24)

print(len(smiles_train), len(prop_train))
print(len(smiles_test), len(prop_test))

310 310
78 78


In [3]:
from ecnet.datasets import QSPRDataset
from sklearn.decomposition import PCA
import torch

ds_train = QSPRDataset(smiles_train, prop_train, backend='alvadesc')
ds_test = QSPRDataset(smiles_test, prop_test, backend='alvadesc')
ds_train.target_vals.type(torch.float32)
ds_test.target_vals.type(torch.float32)

print(ds_train.desc_vals.shape, ds_test.desc_vals.shape)

pca = PCA(n_components=len(ds_train))
pca.fit(ds_train.desc_vals.detach().numpy())
ds_train.desc_vals = torch.tensor(pca.transform(ds_train.desc_vals.detach().numpy())).type(torch.float32)
ds_test.desc_vals = torch.tensor(pca.transform(ds_test.desc_vals.detach().numpy())).type(torch.float32)

print(ds_train.desc_vals.shape, ds_test.desc_vals.shape)

torch.Size([310, 5305]) torch.Size([78, 5305])
torch.Size([310, 310]) torch.Size([78, 310])


In [4]:
from ecnet import ECNet
from sklearn.metrics import median_absolute_error, r2_score

maes_train, maes_test = [], []
r2s_train, r2s_test = [], []

for i in range(10):

    print('Model: {}'.format(i))
    _model = ECNet(ds_train.desc_vals.shape[1], ds_train.target_vals.shape[1], 512, 2)
    _model.fit(dataset=ds_train, valid_size=0.33, shuffle=True, patience=16, epochs=512, lr=0.001)
    pred_train = _model(ds_train.desc_vals).detach().numpy()
    pred_test = _model(ds_test.desc_vals).detach().numpy()
    maes_train.append(median_absolute_error(prop_train, pred_train))
    maes_test.append(median_absolute_error(prop_test, pred_test))
    r2s_train.append(r2_score(prop_train, pred_train))
    r2s_test.append(r2_score(prop_test, pred_test))
    _model.save('models/lhv_{}.pt'.format(i))

Model: 0
Model: 1
Model: 2
Model: 3
Model: 4
Model: 5
Model: 6
Model: 7
Model: 8
Model: 9


In [5]:
import numpy as np

print('Training MAE: {:.4f} +/- {:.4f}'.format(np.mean(maes_train), np.std(maes_train)))
print('Training R2: {:.4f} +/- {:.4f}'.format(np.mean(r2s_train), np.std(r2s_train)))
print('Test MAE: {:.4f} +/- {:.4f}'.format(np.mean(maes_test), np.std(maes_test)))
print('Test R2: {:.4f} +/- {:.4f}'.format(np.mean(r2s_test), np.std(r2s_test)))

Training MAE: 0.3341 +/- 0.2408
Training R2: 0.9960 +/- 0.0047
Test MAE: 0.7668 +/- 0.2357
Test R2: 0.9644 +/- 0.0102
