In [9]:
import torch.nn    as nn
import torch.optim as optim
import networkx    as nx
import GM_library  as GML
import torch

from torch.utils.data       import random_split
from torch_geometric.utils  import convert
from torch_geometric.data   import Data
from torch_geometric.loader import DataLoader
from torch_geometric.nn     import GraphConv, Linear

import sys
sys.path.append('../')
import MP.MP_library as MPL

# Checking if pytorch can run in GPU, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
cell, composition, concentration, positions = MPL.information_from_VASPfile('../MP/Loaded_PHONON/mp-3332-20180417',
                                                                            'POSCAR')

In [27]:
L = [15, 15, 15]
particle_types, atomic_masses, charges, electronegativities, ionization_energies = GML.graph_POSCAR_encoding(cell, composition, concentration, positions, L)

In [42]:
import importlib
importlib.reload(GML)

<module 'GM_library' from '/Users/cibran/Work/UPC/GenerativeModels/GM_library.py'>

In [None]:
all_nodes, all_positions = GML.get_atoms_in_box(particle_types,
                                                composition,
                                                cell,
                                                atomic_masses,
                                                charges,
                                                electronegativities,
                                                ionization_energies,
                                                positions,
                                                L)

In [None]:
all_nodes, all_positions

In [None]:
edges, attributes = GML.get_edges_in_box(nodes, all_positions)

In [2]:
n_epochs      = 1000
batch_size    = 128
learning_rate = 0.0001

# Number of diffusing and denoising steps, which can be different
n_diffusing_steps = 10
n_denoising_steps = 10

# Dropouts for node and edge models (work independently)
dropout_node = 0.2
dropout_edge = 0.2

# Target to generate new crystals
target = 'GM_D'

input_folder  = 'models'
target_folder = f'{input_folder}/{target}'
model_name    = f'{target_folder}/model.pt'

# Generation of graph database for training

Load the datasets, already standarized if possible.

In [3]:
labels_name         = f'{target_folder}/labels.pt'
dataset_name        = f'{target_folder}/dataset.pt'
dataset_name_std    = f'{target_folder}/standardized_dataset.pt'
parameters_name_std = f'{target_folder}/standardized_parameters.pt'  # Parameters for rescaling the predictions

if path.exists(dataset_name_std) and path.exists(labels_name) and path.exists(parameters_name_std):
    # Load the standardized dataset, with corresponding labels and parameters
    dataset    = torch.load(dataset_name_std)
    labels     = torch.load(labels_name)
    parameters = torch.load(parameters_name_std)

    # Assigning parameters accordingly
    target_mean, feat_mean, edge_mean, target_std, edge_std, feat_std, scale = parameters
    
    # Defining target factor
    target_factor = target_std / scale

elif path.exists(dataset_name) and path.exists(labels_name):
    # Load the raw dataset, with corresponding labels, and standardize it
    dataset = torch.load(dataset_name)
    labels  = torch.load(labels_name)
    
    # Standardize dataset
    dataset, parameters = GML.standardize_dataset(dataset, labels)
    
    # Save standardized dataset
    torch.save(dataset,    dataset_name_std)
    torch.save(parameters, parameters_name_std)

else:
    # Generate the raw dataset from scratch, and standardize it
    
    
    
    
    
    # Standardize dataset
    dataset, parameters = GML.standardize_dataset(dataset, labels)
    
    # Save standardized dataset
    torch.save(dataset,    dataset_name_std)
    torch.save(parameters, parameters_name_std)

# Generation of diffusing and denoising Markov chains

In [6]:
# In GM-library

# Generation of Graph Neural Network models

In [12]:
# In GM-library

# Definition of train-test datasets

In [9]:
# torch.manual_seed(12345)

# Define the sizes of the train and test sets
train_size = int(0.8 * len(dataset))
test_size  = len(dataset) - train_size

# Use random_split() to generate train and test sets
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

print(f'Number of training graphs: {len(train_dataset)}')
print(f'Number of testing  graphs: {len(test_dataset)}')

Number of training graphs: 171
Number of testing  graphs: 43


# Training of the model

In [18]:
# Determine number of features in dataset
n_features = dataset[0].num_node_features

# Instantiate the models for nodes and edges
node_model = MPL.nGCNN(n_features, dropout_node).to(device)
edge_model = MPL.eGCNN(n_features, dropout_edge).to(device)
print('\nNode GCNN:')
print(node_model)
print('\nEdge GCNN:')
print(edge_model)


Nodes:
nGCNN(
  (conv1): GraphConv(5, 512)
  (conv2): GraphConv(512, 512)
  (linconv): Linear(512, 16, bias=True)
  (lin): Linear(16, 1, bias=True)
)

Edges:
eGCNN(
  (linear1): Linear(100, 64, bias=True)
  (linear2): Linear(64, 64, bias=True)
  (linear3): Linear(64, 100, bias=True)
)


In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion_node = nn.MSELoss()
criterion_edge = nn.MSELoss()

# Training loop
for epoch in range(n_epochs):
    # Training
    
    
    train_loss = 0
    for graph in train_dataset:
        optimizer.zero_grad()
        
        # Diffuse the graph with some noise
        diffused_graph = MPL.diffuse(graph, n_diffusing_steps)
        
        # Denoise the diffused graph
        denoised_graph = diffused_graph.clone()
        for t in range(n_denoising_steps):
            # Perform a single forward pass for predicting node features
            out_x = node_model(diffused_graph.x, 
                               diffused_graph.edge_index,
                               diffused_graph.edge_attr)
            
            # Define edges
            # DEFINE x_i and x_j as connected nodes
            
            # Perform a single forward pass for predicting edge attributes
            out_attr = edge_model(x_i, 
                                  x_j)

            # Construct noise graph
            noise_graph = Data(x=out_x, edge_index=diffused_graph.edge_index, edge_attr=out_attr)

            # Denoise the graph with the predicted noise
            denoised_graph = MPL.denoising_step(diffused_graph, noise_graph, t, n_denoising_steps)

        # Calculate the loss for node features
        loss_node = criterion_node(graph.x, denoised_graph.x)

        # Calculate the loss for edge attributes
        loss_edge = criterion_edge(graph.edge_attr, denoised_graph.edge_attr)
        
        
        ### I would independtly check node and edge losses
        
        
        # Accumulate the total training loss
        loss = loss_node + loss_edge
        train_loss = loss.item()

        # Backpropagation and optimization step
        loss.backward()
        optimizer.step()
    
    # Compute the average train loss
    train_loss = train_loss / len(train_loader)
    
    print(f'Epoch: {epoch+1}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')