In [1]:
from data.dataset import Dataset
from data.feature.coordinate import CoordinateFeaturizer
from data.dataloader import DataLoader
from methods.ridge_method import RidgeMethod

import numpy as np

In [2]:
from itertools import combinations

In [3]:
featurizer = CoordinateFeaturizer()
#trimer_dataset = Dataset.from_file('xe3_50.xyz', 3, featurizer)
#energy_base = trimer_dataset[-1][1][0]
dimer_dataset = Dataset.from_file('dataset/xe2_50.xyz', 1, featurizer)
trimer_dataset = Dataset.from_file('dataset/xe3_50.xyz', 1, featurizer)
rand_trimer_dataset = Dataset.from_file('dataset/xe3_dataset_dft.xyz', 1, featurizer)

In [4]:
n_dimer_train = 40
n_trimer_train = 40
n_rand_trimer_train = 5000

dimer_train, dimer_val = dimer_dataset.split(
    [list(range(n_dimer_train)),
     list(range(n_dimer_train, len(dimer_dataset)))])
trimer_train, trimer_val = trimer_dataset.split(
    [list(range(n_trimer_train)),
     list(range(n_trimer_train, len(dimer_dataset)))])
rand_trimer_train, rand_trimer_val = rand_trimer_dataset.split(
    [list(range(n_rand_trimer_train)), 
     list(range(n_rand_trimer_train, len(rand_trimer_dataset)))])
dimer_val = DataLoader([dimer_val])
trimer_val = DataLoader([trimer_val])
rand_trimer_val = DataLoader([rand_trimer_val])

In [5]:
train_dimer = DataLoader([dimer_train])
val_dimer = DataLoader([dimer_val])
train_trimer = DataLoader([trimer_train, rand_trimer_train])
val_trimer = DataLoader([trimer_val, rand_trimer_val])

In [6]:
train_dimer.X = np.array([np.concatenate(dimer) for dimer in train_dimer.X])
method_dimer = RidgeMethod(train_dimer)
method_dimer.train()
for i in range(len(train_trimer.X)):
    train_trimer.y[i] -= np.sum(method_dimer.predict(
        [np.concatenate(list(pair)) for pair in combinations(train_trimer.X[i], 2)]))
train_trimer.X = np.array([np.concatenate(trimer) for trimer in train_trimer.X])
method_trimer = RidgeMethod(train_trimer)
method_trimer.train()

In [7]:
train_dimer = DataLoader([dimer_train])
val_dimer = DataLoader([dimer_val])
train_trimer = DataLoader([trimer_train, rand_trimer_train])
val_trimer = DataLoader([trimer_val, rand_trimer_val])

# Train %RMSE

In [8]:
result = np.zeros((1, len(dimer_train.X)))

for i in range(len(dimer_train.X)):
    result[0, i] += np.sum(method_dimer.predict(
        [np.concatenate(list(pair)) for pair in combinations(dimer_train.X[i], 2)]))
rmse = np.sqrt(np.mean((result - dimer_train.y)**2)) / np.std(dimer_train.y)

In [9]:
rmse

1.0051882704311188

In [10]:
result = np.zeros((1, len(trimer_train.X)))

for i in range(len(trimer_train.X)):
    result[0, i] += np.sum(method_dimer.predict(
        [np.concatenate(list(pair)) for pair in combinations(trimer_train.X[i], 2)]))
trimer_train.X = np.array([np.concatenate(trimer) for trimer in trimer_train.X])
result += method_trimer.predict(trimer_train.X).T

rmse = np.sqrt(np.mean((result - trimer_train.y)**2)) / np.std(train_trimer.y[:40])

In [11]:
rmse

21.737807064896135

In [12]:
result = np.zeros((1, len(rand_trimer_train.X)))

for i in range(len(trimer_val.X)):
    result[0, i] += np.sum(method_dimer.predict(
        [np.concatenate(list(pair)) for pair in combinations(rand_trimer_train.X[i], 2)]))
rand_trimer_train.X = np.array([np.concatenate(trimer) for trimer in rand_trimer_train.X])
result += method_trimer.predict(rand_trimer_train.X).T

rmse = np.sqrt(np.mean((result - rand_trimer_train.y)**2)) / np.std(train_trimer.y[40:])

In [13]:
rmse

1067282.135641071

# Val %RMSE

In [14]:
result = np.zeros((1, len(dimer_val.X)))

for i in range(len(dimer_val.X)):
    result[0, i] += np.sum(method_dimer.predict(
        [np.concatenate(list(pair)) for pair in combinations(dimer_val.X[i], 2)]))
rmse = np.sqrt(np.mean((result - dimer_val.y)**2)) / np.std(train_dimer.y)

In [15]:
rmse

0.24686048159676532

In [16]:
result = np.zeros((1, len(trimer_val.X)))

for i in range(len(trimer_val.X)):
    result[0, i] += np.sum(method_dimer.predict(
        [np.concatenate(list(pair)) for pair in combinations(trimer_val.X[i], 2)]))
trimer_val.X = np.array([np.concatenate(trimer) for trimer in trimer_val.X])
result += method_trimer.predict(trimer_val.X).T

rmse = np.sqrt(np.mean((result - trimer_val.y)**2)) / np.std(train_trimer.y[:40])

In [17]:
rmse

36.502844685312226

In [18]:
result = np.zeros((1, len(rand_trimer_val.X)))

for i in range(len(trimer_val.X)):
    result[0, i] += np.sum(method_dimer.predict(
        [np.concatenate(list(pair)) for pair in combinations(rand_trimer_val.X[i], 2)]))
rand_trimer_val.X = np.array([np.concatenate(trimer) for trimer in rand_trimer_val.X])
result += method_trimer.predict(rand_trimer_val.X).T

rmse = np.sqrt(np.mean((result - rand_trimer_val.y)**2)) / np.std(train_trimer.y[40:])

In [19]:
rmse

1062995.817932185