In [1]:
import torch.nn    as nn
import torch.optim as optim
import GM_library  as GML
import numpy       as np
import torch

from os                     import path, listdir
from torch.utils.data       import random_split
from torch_geometric.utils  import convert
from torch_geometric.data   import Data
from torch_geometric.loader import DataLoader
from torch_geometric.nn     import GraphConv, Linear

import sys
sys.path.append('../')
import MP.MP_library as MPL

# Checking if pytorch can run in GPU, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
n_epochs      = 1000
batch_size    = 128
learning_rate = 0.0001

# Number of diffusing and denoising steps, which can be different
n_diffusing_steps = 10
n_denoising_steps = 10

# Dropouts for node and edge models (work independently)
dropout_node = 0.2
dropout_edge = 0.2

# Define box shape
L = [20, 20, 20]

# Target to generate new crystals
target = 'GM_EPA'

# In case database is created from scratch (otherwise, it is not being used)
DB_path = '../MP/Loaded_EMP'

input_folder  = 'models'
target_folder = f'{input_folder}/{target}'
model_name    = f'{target_folder}/model.pt'

# Generation of graph database for training

Load the datasets, already standarized if possible.

In [3]:
labels_name         = f'{target_folder}/labels.pt'
dataset_name        = f'{target_folder}/dataset.pt'
dataset_name_std    = f'{target_folder}/standardized_dataset.pt'
parameters_name_std = f'{target_folder}/standardized_parameters.pt'  # Parameters for rescaling the predictions

if path.exists(dataset_name_std) and path.exists(labels_name) and path.exists(parameters_name_std):
    # Load the standardized dataset, with corresponding labels and parameters
    dataset    = torch.load(dataset_name_std)
    labels     = torch.load(labels_name)
    parameters = torch.load(parameters_name_std)

    # Assigning parameters accordingly
    target_mean, feat_mean, edge_mean, target_std, edge_std, feat_std, scale = parameters
    
    # Defining target factor
    target_factor = target_std / scale

elif path.exists(dataset_name) and path.exists(labels_name):
    # Load the raw dataset, with corresponding labels, and standardize it
    dataset = torch.load(dataset_name)
    labels  = torch.load(labels_name)
    
    # Standardize dataset
    dataset, parameters = GML.standardize_dataset(dataset, labels)
    
    # Save standardized dataset
    torch.save(dataset,    dataset_name_std)
    torch.save(parameters, parameters_name_std)

else:
    # Generate the raw dataset from scratch, and standardize it
    
    # Read all mateials within the database
    materials = listdir(DB_path)
    
    dataset = []
    labels  = []
    for material in materials:
        try:
            # Try to read the polyforms
            polymorfs = listdir(f'{DB_path}/{material}')
        except:
            continue
        
        print(material)
        for polymorf in polymorfs:
            print(f'\t{polymorf}')
            
            # Path to folder containing the POSCAR
            path_to_POSCAR = f'{DB_path}/{material}/{polymorf}'
            
            # Extract parameters from POSCAR
            cell, composition, concentration, positions = MPL.information_from_VASPfile(path_to_POSCAR,
                                                                                        'POSCAR')
            
            # Generate POSCAR covering the box
            try:
                nodes, edges, attributes = GML.graph_POSCAR_encoding(cell, composition, concentration, positions, L)
            except:
                print(f'Error: {material} {polymorf} not loaded')
                continue
            
            # Load ground state energy per atom
            gs_energy = float(np.loadtxt(f'{path_to_POSCAR}/EPA'))
            
            # Construct temporal graph structure
            graph = Data(x=nodes,
                         edge_index=edges,
                         edge_attr=attributes,
                         y=torch.tensor([[gs_energy]], dtype=torch.float)
                        )
            
            # Append to dataset and labels
            dataset.append(graph)
            labels.append(f'{material}-{polymorf}')
    
    # Standardize dataset
    dataset, parameters = GML.standardize_dataset(dataset, labels)
    
    # Save standardized dataset
    torch.save(dataset,    dataset_name_std)
    torch.save(parameters, parameters_name_std)

Sr2LiCBr3N2
	Fd-3m
LiUNbO6
	P2_1-c
NaCo(PO3)3
	I-43d
Rb2NaTiF6
	Fm-3m
Na6TeMo6(H22O23)2
	P-1
Li2MgNi3O8
	C2-m
Na3Ti8O16
	Pm
	Pmmn
Na5Ca2Ti2P5SO24
	P1
Na2(ReS2)3
	C2-c
Li2MnAgF6
	Fm-3m
Li3Mn8(OF3)4
	C2-m
	P-1
Na2Ti3Cl8
	R-3m
	R3m
LiHo2Ag2(WO4)4
	C2-m
Li4Ti2Fe3Ni3O16
	Cm
NaCeO2
	I4_1-amd
	R-3m
Li9Bi5O13
	P2_1-c
Na3GaSi3Sn3(SO8)3
	P1
Na4ScSi2SnPO12
	Cc
Li9Al3P8O29
	P-3c1
Na6Zn3As4O19
	P2_13
Li6Mn15O32
	C2-m
	P-1
NaNb13O33
	C2-m
	P1
Li3VO4
	Pmn2_1
	Pnma
Li2VCrP2(HO5)2
	P-1
	P1
LiMnSO4F
	P-1
Li2V(CO3)3
	Ama2
BaLi2NiO3
	Pnma
	P2_1-c
NaPrYFeO6
	F-43m
Li8GeO6
	P6_3cm
Na5Hf2Si3Ge2(PO8)3
	P1
Ba3Na7Ti3Nb7O30
	Pm
Li8TeN2
	I4_1md
Na2LiYCl6
	Fm-3m
NaLi3Ti2Fe2(PO4)6
	P1
	R3
Na7Y3Zn(PO4)6
	P1
LiMnF6
	R-3
Sr3LiNbO6
	R-3c
Rb2LiTmCl6
	Fm-3m
Na4Ca2Nb2Si3(SO8)3
	P1
Li32Ti3Cr13O48
	P1
LiCeO2
	P2_1-c
Na3Y2Ge2P5SO24
	P1
Ba2Na2Zr2Si6O23
	P2_1-c
Na3HfMg(PO4)3
	Cc
Na2LiErCl6
	Fm-3m
Na3VO4
	Pmn2_1
	Pbca
	I-42m
NaAlH24(SO10)2
	Pa-3
Na42Ca8Ta16Si33(SO48)3
	P1
LiSb4P7O24
	P-1
Li5Ti(SiO4)2
	C2
Na2ZrO3
	C2-c
Na5Bi2As(

Li7Mn3Fe(PO4)6
	R3
LiSmO3
	Pm-3m
Na3Ca2Sc2PS5O24
	P1
NaLi3Fe4(SiO3)8
	P2
	P-1
	P1
Na(Ti3Se4)2
	P-3
Li34Sb8S7
	I4-m
Li3NbP3O11
	P-1
Li2Ni(WO4)2
	P-1
Na2Mg2Ti6Si3(S3O16)3
	P1
Li3Fe2Ni3O10
	P1
Na2LaTa3Si4(SO12)2
	P1
Na6Nb8Si11SO48
	P1
Na7Ca2Ga2P5SO24
	P1
Na2TaSi2SnPO12
	Cc
Li2ZnFe(PO4)2
	P2_1-m
NaVBP2H3O10
	C2-c
Li3OsN2
	Ia-3
	P2_1-c
Li2SnO3
	C2-c
Li3Nb4ZnO12
	P1
Li(Mg2Si)4
	Pm-3m
Li6Mn5Ni3O16
	Cm
NaSi6
	P6-m
	Cmcm
LiCr2Cu2(WO4)4
	P1
Cs2NaPrBr6
	Fm-3m
Na4Eu2P4Pb
	Cc
NaInI4
	P2_1-c
Na6Al4Si3(SO8)3
	P1
Li5(NiO2)4
	P4_332
	Cc
	Cmc2_1
Na5HfSc3Si3(SO8)3
	P1
NaWO2
	I4_1-a
Li4V3OF11
	P-1
	P1
Na14Zn5Si6Sn3(SO8)6
	P1
Ba5LiGaN3F5
	Pnma
Li4Ni8O9F7
	P1
Li2Co3SnO8
	P6_3mc
	R-3m
	P1
	P4_332
	R3m
	C2
NaPS3
	P2_1-m
Li2Fe3P9O28
	P-1
Na2CaSiSn(SO6)2
	Cc
Na9Mg2Ta10Si9(PO8)9
	P1
Na3LiCuPCO7
	P2_1-m
NaLa2TaTi2O12
	P1
K5Li2PrF10
	Pnma
NaMo3P3O13
	P2_1-m
NaTi2SiO9
	P4_2-mcm
Cs7NaSi8
	Pa-3
LiInI4
	P2_1-c
Ba3Na
	Pm-3m
	I4-mmm
Sr2LiOsO6
	Fm-3m
LiNdTi4O12
	P4-mmm
Na5Zr2Ta2Si5PO24
	P1
Li3Sb17S27
	P1
Li2MnP2HO8
	P2_1

NotADirectoryError: [Errno 20] Not a directory: '../MP/Loaded_EMP/Na16(WO3)19/.DS_Store/POSCAR'

# Generation of diffusing and denoising Markov chains

In [4]:
# In GM-library

# Generation of Graph Neural Network models

In [5]:
# In GM-library

# Definition of train-test datasets

In [6]:
# torch.manual_seed(12345)

# Define the sizes of the train and test sets
train_size = int(0.8 * len(dataset))
test_size  = len(dataset) - train_size

# Use random_split() to generate train and test sets
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

print(f'Number of training graphs: {len(train_dataset)}')
print(f'Number of testing  graphs: {len(test_dataset)}')

Number of training graphs: 785
Number of testing  graphs: 197


In [7]:
train_loader = DataLoader(train_dataset, batch_size=batch_size,        shuffle=True)
test_loader  = DataLoader(test_dataset,  batch_size=len(test_dataset), shuffle=True)

# Training of the model

In [63]:
# Determine number of features in dataset
n_features = dataset[0].num_node_features

# Instantiate the models for nodes and edges
node_model = GML.nGCNN(n_features, dropout_node).to(device)
edge_model = GML.eGCNN(n_features, dropout_edge).to(device)
print('\nNode GCNN:')
print(node_model)
print('\nEdge GCNN:')
print(edge_model)


Node GCNN:
nGCNN(
  (conv1): GraphConv(4, 64)
  (conv2): GraphConv(64, 64)
  (conv3): GraphConv(64, 4)
)

Edge GCNN:
eGCNN(
  (linear1): Linear(in_features=4, out_features=32, bias=True)
  (linear2): Linear(in_features=32, out_features=1, bias=True)
)


In [62]:
import importlib
importlib.reload(GML)

<module 'GM_library' from '/Users/cibran/Work/UPC/GenerativeModels/GM_library.py'>

In [None]:
node_optimizer = torch.optim.Adam(node_model.parameters(), lr=learning_rate)
edge_optimizer = torch.optim.Adam(edge_model.parameters(), lr=learning_rate)
node_criterion = nn.MSELoss()
edge_criterion = nn.MSELoss()

# Training loop
for epoch in range(n_epochs):
    # Training
    
    
    train_loss = 0
    for graph in train_dataset:
        node_optimizer.zero_grad()
        edge_optimizer.zero_grad()
        
        # Diffuse the graph with some noise
        print('Diffusing')
        diffused_graph = GML.diffuse(graph, n_diffusing_steps)
        
        # Denoise the diffused graph
        denoised_graph = diffused_graph.clone()
        for t in range(n_denoising_steps):
            # Perform a single forward pass for predicting node features
            out_x = node_model(diffused_graph.x, 
                               diffused_graph.edge_index,
                               diffused_graph.edge_attr)
            
            # Define x_i and x_j as features of every corresponding pair of nodes (same order than attributes)
            x_i = diffused_graph.x[diffused_graph.edge_index[0]]
            x_j = diffused_graph.x[diffused_graph.edge_index[1]]
            
            # Perform a single forward pass for predicting edge attributes
            out_attr = edge_model(x_i, x_j)

            # Construct noise graph
            noise_graph = Data(x=out_x, edge_index=diffused_graph.edge_index, edge_attr=out_attr)

            # Denoise the graph with the predicted noise
            print('Denoising')
            denoised_graph = GML.denoising_step(denoised_graph, noise_graph, t, n_denoising_steps)
        
        print('Backpropagating')
        # Calculate the loss for node features
        loss_node = node_criterion(graph.x, denoised_graph.x)

        # Calculate the loss for edge attributes
        loss_edge = edge_criterion(graph.edge_attr, denoised_graph.edge_attr)
        
        
        ### I would independtly check node and edge losses
        
        
        # Accumulate the total training loss
        loss = loss_node + loss_edge
        train_loss = loss.item()

        # Backpropagation and optimization step
        loss.backward()
        node_optimizer.step()
        edge_optimizer.step()
    
    # Compute the average train loss
    train_loss = train_loss / len(train_loader)
    
    print(f'Epoch: {epoch+1}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

Diffusing
Denoising
Data(x=[92, 4], edge_index=[2, 8372], edge_attr=[8372], y=[1, 1]) Data(x=[92, 4], edge_index=[2, 8372], edge_attr=[8372, 1])
Denoising
Data(x=[92, 4], edge_index=[2, 8372], edge_attr=[8372, 8372], y=[1, 1]) Data(x=[92, 4], edge_index=[2, 8372], edge_attr=[8372, 1])
Denoising
Data(x=[92, 4], edge_index=[2, 8372], edge_attr=[8372, 8372], y=[1, 1]) Data(x=[92, 4], edge_index=[2, 8372], edge_attr=[8372, 1])
Denoising
Data(x=[92, 4], edge_index=[2, 8372], edge_attr=[8372, 8372], y=[1, 1]) Data(x=[92, 4], edge_index=[2, 8372], edge_attr=[8372, 1])
Denoising
Data(x=[92, 4], edge_index=[2, 8372], edge_attr=[8372, 8372], y=[1, 1]) Data(x=[92, 4], edge_index=[2, 8372], edge_attr=[8372, 1])
Denoising
Data(x=[92, 4], edge_index=[2, 8372], edge_attr=[8372, 8372], y=[1, 1]) Data(x=[92, 4], edge_index=[2, 8372], edge_attr=[8372, 1])
Denoising
Data(x=[92, 4], edge_index=[2, 8372], edge_attr=[8372, 8372], y=[1, 1]) Data(x=[92, 4], edge_index=[2, 8372], edge_attr=[8372, 1])
Denoising

  return F.mse_loss(input, target, reduction=self.reduction)


Diffusing
Denoising
Data(x=[148, 4], edge_index=[2, 21756], edge_attr=[21756], y=[1, 1]) Data(x=[148, 4], edge_index=[2, 21756], edge_attr=[21756, 1])
Denoising
Data(x=[148, 4], edge_index=[2, 21756], edge_attr=[21756, 21756], y=[1, 1]) Data(x=[148, 4], edge_index=[2, 21756], edge_attr=[21756, 1])
Denoising
Data(x=[148, 4], edge_index=[2, 21756], edge_attr=[21756, 21756], y=[1, 1]) Data(x=[148, 4], edge_index=[2, 21756], edge_attr=[21756, 1])
Denoising
Data(x=[148, 4], edge_index=[2, 21756], edge_attr=[21756, 21756], y=[1, 1]) Data(x=[148, 4], edge_index=[2, 21756], edge_attr=[21756, 1])
Denoising
Data(x=[148, 4], edge_index=[2, 21756], edge_attr=[21756, 21756], y=[1, 1]) Data(x=[148, 4], edge_index=[2, 21756], edge_attr=[21756, 1])
Denoising
Data(x=[148, 4], edge_index=[2, 21756], edge_attr=[21756, 21756], y=[1, 1]) Data(x=[148, 4], edge_index=[2, 21756], edge_attr=[21756, 1])
Denoising
Data(x=[148, 4], edge_index=[2, 21756], edge_attr=[21756, 21756], y=[1, 1]) Data(x=[148, 4], edge_i

  return F.mse_loss(input, target, reduction=self.reduction)
