In [2]:
import numpy as np

from src.dataset import *
from src.model4 import *
from src.train import *

import torch
import src.templates
import plotly.io as pio
import plotly.graph_objects as go
pio.templates.default = "template_TNR"

In [3]:
dataset = QM9Dataset(root="./dataset/qm9",
                     target_y="gap",
                     max_atoms=100,
                     force_reload=False
                     )

In [4]:
model3 = E3GG(node_attr_dim = dataset.node_attr.shape[1],
              edge_dim   = dataset.edge_attr.shape[1],
              hidden_dim = 64,
              num_layers=7,
              equivariant=True,
              )

In [5]:
from torch_geometric.loader import DataLoader
from torch.utils.data import random_split
import torch

# split
n = len(dataset)
n_train = int(0.90 * n)
n_test = n - n_train
train_ds, test_ds = random_split(dataset, [n_train, n_test], generator=torch.Generator().manual_seed(42))

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_ds, batch_size=64, shuffle=False)

In [6]:
n_epochs = 20
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

optimizer = torch.optim.Adam(model3.parameters(), lr=1e-3, weight_decay=1e-16)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=n_epochs)

train_losses, val_losses = [], []

In [7]:
model3.to(device)

_train_losses, _val_losses = train( model = model3,
                                  train_loader = train_loader,
                                  val_loader = test_loader,
                                  optimizer = optimizer,
                                  scheduler = lr_scheduler,
                                  epochs = n_epochs,
                                  device = device,
                                  )
train_losses += _train_losses
val_losses += _val_losses

100%|██████████| 20/20 [22:12<00:00, 66.65s/it]


In [10]:
fig = go.Figure(layout={
        'plot_bgcolor': 'white',
        'paper_bgcolor' : 'white',})

fig.update_layout(width = 600,
                  height = 600,
                  legend = dict(x = 0.95, y = 0.9)
                  )

fig.update_xaxes(title = "epoch")
fig.update_yaxes(title = "MSE loss")

fig.add_trace(go.Scatter(y=val_losses, mode='lines', name = 'Validation loss'))
fig.add_trace(go.Scatter(y=train_losses, mode='lines', name = 'Training loss'))
fig.show()

In [11]:
import src.templates
import plotly.io as pio
import plotly.graph_objects as go
pio.templates.default = "template_TNR"

model3.to("cpu")

fig = go.Figure(layout={
        'plot_bgcolor': 'white',
        'paper_bgcolor' : 'white',})

fig.update_layout(width = 600,
                  height = 600
                  )
fig.update_xaxes(title = "model",
                 range=[0, 15]
)
fig.update_yaxes(title = "dataset",
                 range=[0, 15])

fig.add_trace(go.Scatter(x = [-15, 15], y = [-15, 15], mode = 'lines', line=dict(color = "lightgrey"), showlegend=False))

for batch in tqdm(train_loader):
    fig.add_trace(go.Scatter(x = model3(batch).detach(),
                             y = batch.y,
                             mode = 'markers',
                             marker = dict(color = "blue"),
                             showlegend=False,
                             )
                  )

for batch in tqdm(test_loader):
    fig.add_trace(go.Scatter(x = model3(batch).detach(),
                             y = batch.y,
                             mode = 'markers',
                             marker = dict(color = "green"),
                             showlegend=False,
                             )
                  )

fig.show()

100%|██████████| 1840/1840 [00:32<00:00, 56.53it/s]
100%|██████████| 205/205 [00:03<00:00, 60.05it/s]


In [9]:
torch.save(model3.state_dict(), "model.pt")

In [12]:
MAE_train = []
MAE_valid = []

for batch in train_loader:
    MAE_train.append( (np.abs(model3(batch).detach() -  batch.y)).numpy() )

for batch in test_loader:
    MAE_valid.append( (np.abs(model3(batch).detach() -  batch.y)).numpy() )


__array_wrap__ must accept context and return_scalar arguments (positionally) in the future. (Deprecated NumPy 2.0)


__array_wrap__ must accept context and return_scalar arguments (positionally) in the future. (Deprecated NumPy 2.0)



In [13]:
print(f"MAE (train): {1000 * np.mean(np.concatenate(MAE_train)):.2f} meV")
print(f"MAE (valid): {1000 * np.mean(np.concatenate(MAE_valid)):.2f} meV")

MAE (train): 250.65 meV
MAE (valid): 254.31 meV


In [15]:
np.sum([p.numel() for p in model3.parameters()])


np.int64(219087)

In [25]:
_model3 = E3GG(node_attr_dim = dataset.node_attr.shape[1],
              edge_dim   = dataset.edge_attr.shape[1],
              hidden_dim = 64,
              num_layers=7,
              equivariant=True,
              )

In [26]:
np.sum([p.numel() for p in _model3.parameters()])


np.int64(219087)