In [50]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import numpy as np
from torch_geometric.nn import global_add_pool, GATConv, CGConv, VGAE,GCNConv
import pickle
from itertools import combinations
from sklearn.model_selection import train_test_split
from torch_geometric.data import DataLoader


In [32]:
input_dim = 10
hidden_dim = 5
num_heads = 3

In [66]:
class VAEGeneratorEncoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_heads):
        super().__init__()
        #self.conv1 = GATConv(input_dim, hidden_dim, heads=num_heads)
        self.conv1 = GCNConv(input_dim, 2 * hidden_dim)
        self.conv_mu = GCNConv(2 * hidden_dim, hidden_dim)
        self.conv_logstd = GCNConv(2 * hidden_dim, hidden_dim)
        self.relu = nn.ReLU()

    def forward(self, x, edge_index, edge_attr):
        x = x.float()
        x = self.relu(self.conv1(x, edge_index))
        return self.conv_mu(x, edge_index), self.conv_logstd(x, edge_index)

model = VGAE(VAEGeneratorEncoder(input_dim, hidden_dim, num_heads))


In [23]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)


In [24]:
with open("../data/combined_data.pkl", "rb") as file:
    combined_data = pickle.load(file)

atomic_num_tensors = torch.load("../data/atomic_number_tensors.pt")
bond_tensors = torch.load("../data/bond_tensors.pt")

In [10]:
print(combined_data[0])
print(len(combined_data))

Data(x=[8, 10], edge_index=[2, 7], edge_attr=[7, 5], atom_data=[8, 35])
3208


In [46]:
train_data, test_data = train_test_split(combined_data, test_size=0.15, random_state=48)

batch_size = 1

train_data, valid_data = train_test_split(train_data, test_size=0.1, random_state=48)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=batch_size)




In [9]:
print(atomic_num_tensors.shape)

torch.Size([3208, 63, 11])


In [12]:
print(bond_tensors.shape)

torch.Size([3208, 63, 63, 5])


In [47]:
def create_pos_edge_index(data):
    return data.edge_index

def create_neg_edge_index(num_nodes, pos_edge_index):
    all_possible_edges = list(combinations(range(num_nodes), 2))

    pos_edges = [(src.item(), dst.item()) for src, dst in pos_edge_index.t()]

    neg_edges = [edge for edge in all_possible_edges if edge not in pos_edges]

    neg_edge_index = torch.tensor(neg_edges).t()

    return neg_edge_index

In [69]:
# Train model
num_epochs = 200
losses = []
valid_losses = []
for epoch in range(num_epochs):
    model.train()
    for batch in train_loader:
        optimizer.zero_grad()
        output = model.encode(batch.x, batch.edge_index, batch.edge_attr)
        pos_edge = create_pos_edge_index(batch)
        neg_edge = create_neg_edge_index(len(batch.x), pos_edge)
        loss = model.recon_loss(output, pos_edge, neg_edge)
        loss = loss + (1 / batch.x.shape[0]) * model.kl_loss()
        #loss.requires_grad = True
        loss.backward()
        optimizer.step()
        #scheduler.step()
    #model.eval()
    #with torch.no_grad():
    #    valid_loss = 0
    #    for graph in valid_loader:
    #        output_valid = model.encode(graph.x, graph.edge_index)
    #        pos_edge = create_pos_edge_index(batch)
    #        neg_edge = create_neg_edge_index(batch)
    #        valid_loss_calc = model.recon_loss(output, pos_edge, neg_edge)
    #        loss = loss + (1 / graph.x.shape[0]) * model.kl_loss()
    #        valid_loss += valid_loss_calc.item()
    #    valid_loss /= len(valid_loader)
    #    valid_losses.append(valid_loss)

    if epoch % 10 == 0:
        losses.append(loss.item())
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')



IndexError: index 0 is out of bounds for dimension 0 with size 0