In [115]:
import torch.nn   as nn
import GM_library as GML
import numpy      as np
import torch

from os                   import path, listdir
from torch_geometric.data import Data

import sys
sys.path.append('../')
import MP.MP_library as MPL

# Checking if pytorch can run in GPU, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [116]:
# Number of graphs to predict
N_predictions = 10

# Number of diffusing and denoising steps
n_t_steps = 2

# Decay of parameter alpha
noise_contribution = 0.15
alpha_decay = 0.5 * (1 - noise_contribution**2)

# Dropouts for node and edge models (independent of each other)
dropout_node = 0.2
dropout_edge = 0.2

# Define box shape
L = [20, 20, 20]

# Target to generate new crystals
target = 'GM_EPA'

# In case database is created from scratch (otherwise, it is not being used)
DB_path = '../MP/Loaded_EMP'

input_folder    = 'models'
target_folder   = f'{input_folder}/{target}'
edge_model_name = f'{target_folder}/edge_model.pt'
node_model_name = f'{target_folder}/node_model.pt'

In [117]:
# Number of graphs to predict
N_predictions = 10

# Number of diffusing and denoising steps, which can be different
n_denoising_steps = 2

# Dropouts for node and edge models (independent of each other)
dropout_node = 0.2
dropout_edge = 0.2

# Define box shape
L = [20, 20, 20]

# Target to generate new crystals
target = 'GM_EPA'

# In case database is created from scratch (otherwise, it is not being used)
DB_path = '../MP/Loaded_EMP'

input_folder    = 'models'
target_folder   = f'{input_folder}/{target}'
edge_model_name = f'{target_folder}/edge_model.pt'
node_model_name = f'{target_folder}/node_model.pt'

# Generation of graph database for training

Load the datasets, already standarized if possible.

In [118]:
labels_name         = f'{target_folder}/labels.pt'
dataset_name        = f'{target_folder}/dataset.pt'
dataset_name_std    = f'{target_folder}/standardized_dataset.pt'
parameters_name_std = f'{target_folder}/standardized_parameters.pt'  # Parameters for rescaling the predictions

# Load the standardized dataset, with corresponding labels and parameters
dataset    = torch.load(dataset_name_std)
parameters = torch.load(parameters_name_std)

# Assigning parameters accordingly
target_mean, feat_mean, edge_mean, target_std, edge_std, feat_std, scale = parameters

# Defining target factor
target_factor = target_std / scale

In [119]:
# Calculate the mean and standard deviation of the number of nodes
total_nodes = torch.tensor([data.num_nodes for data in dataset])
mean_nodes  = torch.mean(total_nodes.float()).item()
std_nodes   = torch.std(total_nodes.float()).item()

mean_nodes, std_nodes

(620.1802978515625, 163.6297149658203)

# Loading the model

In [120]:
# Determine number of features in dataset, considering the t_step information
n_features = dataset[0].num_node_features + 1

# Instantiate the models for nodes and edges
node_model = GML.nGCNN(n_features, dropout_node).to(device)
node_model.load_state_dict(torch.load(node_model_name))
node_model.eval()

edge_model = GML.eGCNN(n_features, dropout_edge).to(device)
edge_model.load_state_dict(torch.load(edge_model_name))
edge_model.eval()

print('\nNode GCNN:')
print(node_model)
print('\nEdge GCNN:')
print(edge_model)


Node GCNN:
nGCNN(
  (conv1): GraphConv(5, 64)
  (conv2): GraphConv(64, 64)
  (conv3): GraphConv(64, 5)
)

Edge GCNN:
eGCNN(
  (linear1): Linear(in_features=6, out_features=32, bias=True)
  (linear2): Linear(in_features=32, out_features=1, bias=True)
)


# Generating new cystals

In [121]:
# Predicting loop
predicted_dataset = []
with torch.no_grad():
    for i in range(N_predictions):
        # Get random number of nodes
        n_nodes = int(np.random.normal(mean_nodes, std_nodes))
        
        # Diffuse the graph with some noise
        diffused_graph = GML.get_random_graph(n_nodes, n_features-1)  # CHECK THIS FUNCTION
        
        # Denoise the diffused graph
        #print(f'Denoising...')
        denoised_graph, _ = GML.denoise(diffused_graph, n_t_steps, node_model, edge_model,
                                        s=alpha_decay)
        
        # Append generated graph
        predicted_dataset.append(denoised_graph)

# Revert stardadization
denoised_graphs = GML.revert_standardize_dataset(predicted_dataset, parameters)
denoised_graphs

[Data(x=[452, 4], edge_index=[2, 101926], edge_attr=[101926]),
 Data(x=[419, 4], edge_index=[2, 87571], edge_attr=[87571]),
 Data(x=[723, 4], edge_index=[2, 261003], edge_attr=[261003]),
 Data(x=[854, 4], edge_index=[2, 364231], edge_attr=[364231]),
 Data(x=[642, 4], edge_index=[2, 205761], edge_attr=[205761]),
 Data(x=[551, 4], edge_index=[2, 151525], edge_attr=[151525]),
 Data(x=[675, 4], edge_index=[2, 227475], edge_attr=[227475]),
 Data(x=[810, 4], edge_index=[2, 327645], edge_attr=[327645]),
 Data(x=[742, 4], edge_index=[2, 274911], edge_attr=[274911]),
 Data(x=[368, 4], edge_index=[2, 67528], edge_attr=[67528])]

In [123]:
for i in range(N_predictions):
    graph = denoised_graphs[i]
    try:
        GML.POSCAR_graph_encoding(graph, L, POSCAR_name=f'POSCAR-{i}', POSCAR_directory='./')
    except SystemExit:
        continue

In [78]:
# Generate the raw dataset from scratch, and standardize it
L = [40, 40, 40]
path_to_POSCAR = '.'

# Extract parameters from POSCAR
cell, composition, concentration, positions = MPL.information_from_VASPfile(path_to_POSCAR,
                                                                            'POSCAR')

# Generate POSCAR covering the box
nodes, edges, attributes, all_nodes, all_positions, all_species = GML.graph_POSCAR_encoding(cell,
                                                              composition,
                                                              concentration,
                                                              positions,
                                                              L)

# Load ground state energy per atom
gs_energy = float(np.loadtxt(f'{path_to_POSCAR}/EPA'))

# Construct temporal graph structure
graph = Data(x=nodes,
             edge_index=edges,
             edge_attr=attributes,
             y=torch.tensor([[gs_energy]], dtype=torch.float)
            )
print(graph)
dataset.append(graph)

Data(x=[2413, 4], edge_index=[2, 2910078], edge_attr=[2910078], y=[1, 1])


In [85]:
composition, concentration, positions_sorted = GML.composition_concentration_from_keys(all_species,
                                                                                       np.array(all_positions))
np.savetxt('CONTCAR', positions_sorted)
composition, concentration

(['Ba', 'Na', 'Si'], [345, 715, 1353])

In [98]:
graph_0               = graph.clone()
graph_std, parameters = GML.standardize_dataset([graph_0])
graph_dstd            = GML.revert_standardize_dataset(graph_std, parameters)[0]

node_loss, edge_loss = GML.get_graph_losses(graph_0, graph_dstd)
node_loss, edge_loss

(tensor(0.), tensor(8.4791e-14))

In [114]:
import importlib
importlib.reload(GML)

<module 'GM_library' from '/Users/cibran/Work/UPC/GenerativeModels/GM_library.py'>

In [113]:
GML.POSCAR_graph_encoding(graph, L, POSCAR_name=None, POSCAR_directory='./')

  # Calculate the area using Heron's formula


0 33 1492 40.763695 55.582813 51.303658 25.99203380784781 49.131082918601486


<_io.TextIOWrapper name='.//POSCAR' mode='w' encoding='UTF-8'>