In [1]:
import torch.nn   as nn
import GM_library as GML
import numpy      as np
import torch
import json

from os                   import path, listdir
from torch_geometric.data import Data

import sys
sys.path.append('../')
import MP.MP_library as MPL

# Checking if pytorch can run in GPU, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
# Target to generate new crystals
target = 'GM_EPA'

# In case database is created from scratch (otherwise, it is not being used)
DB_path = '../MP/Loaded_EMP'

input_folder    = 'models'
target_folder   = f'{input_folder}/{target}'
edge_model_name = f'{target_folder}/edge_model.pt'
node_model_name = f'{target_folder}/node_model.pt'

In [3]:
# Read the file in JSON format to a dictionary
with open(f'{target_folder}/model_parameters.json', 'r') as json_file:
    model_parameters = json.load(json_file)

# Number of graphs to predict
N_predictions = 10

# Number of diffusing and denoising steps
n_t_steps = model_parameters['n_t_steps']

# Decay of parameter alpha
noise_contribution = model_parameters['noise_contribution']
alpha_decay = 0.5 * (1 - noise_contribution**2)

# Dropouts for node and edge models (independent of each other)
dropout_node = model_parameters['dropout_node']
dropout_edge = model_parameters['dropout_edge']

# Define box shape
L = model_parameters['L']

# Generation of graph database for training

Load the datasets, already standarized if possible.

In [4]:
labels_name         = f'{target_folder}/labels.pt'
dataset_name        = f'{target_folder}/dataset.pt'
dataset_name_std    = f'{target_folder}/standardized_dataset.pt'
parameters_name_std = f'{target_folder}/standardized_parameters.pt'  # Parameters for rescaling the predictions

# Load the standardized dataset, with corresponding labels and parameters
dataset    = torch.load(dataset_name_std)
parameters = torch.load(parameters_name_std)

# Assigning parameters accordingly
target_mean, feat_mean, edge_mean, target_std, edge_std, feat_std, scale = parameters

# Defining target factor
target_factor = target_std / scale

In [5]:
# Calculate the mean and standard deviation of the number of nodes
total_nodes = torch.tensor([data.num_nodes for data in dataset])
mean_nodes  = torch.mean(total_nodes.float()).item()
std_nodes   = torch.std(total_nodes.float()).item()

mean_nodes, std_nodes

(554.6327514648438, 179.89730834960938)

# Loading the model

In [6]:
# Determine number of features in dataset, considering the t_step information
n_features = dataset[0].num_node_features + 1

# Instantiate the models for nodes and edges
node_model = GML.nGCNN(n_features, dropout_node).to(device)
node_model.load_state_dict(torch.load(node_model_name))
node_model.eval()

edge_model = GML.eGCNN(n_features, dropout_edge).to(device)
edge_model.load_state_dict(torch.load(edge_model_name))
edge_model.eval()

print('\nNode GCNN:')
print(node_model)
print('\nEdge GCNN:')
print(edge_model)


Node GCNN:
nGCNN(
  (conv1): GraphConv(5, 256)
  (conv2): GraphConv(256, 256)
  (conv3): GraphConv(256, 5)
)

Edge GCNN:
eGCNN(
  (linear1): Linear(in_features=6, out_features=32, bias=True)
  (linear2): Linear(in_features=32, out_features=64, bias=True)
  (linear3): Linear(in_features=64, out_features=1, bias=True)
)


# Generating new cystals

In [7]:
# Predicting loop
predicted_dataset = []
with torch.no_grad():
    for i in range(N_predictions):
        # Get random number of nodes
        n_nodes = int(np.random.normal(mean_nodes, std_nodes))
        
        # Diffuse the graph with some noise
        diffused_graph = GML.get_random_graph(n_nodes, n_features-1)
        
        # Denoise the diffused graph
        #print(f'Denoising...')
        denoised_graph, _ = GML.denoise(diffused_graph, n_t_steps, node_model, edge_model,
                                        s=alpha_decay)
        
        # Append generated graph
        predicted_dataset.append(denoised_graph)

# Revert stardadization
denoised_graphs = GML.revert_standardize_dataset(predicted_dataset, parameters)
denoised_graphs

[Data(x=[463, 4], edge_index=[2, 106953], edge_attr=[106953]),
 Data(x=[318, 4], edge_index=[2, 50403], edge_attr=[50403]),
 Data(x=[481, 4], edge_index=[2, 115440], edge_attr=[115440]),
 Data(x=[305, 4], edge_index=[2, 46360], edge_attr=[46360]),
 Data(x=[557, 4], edge_index=[2, 154846], edge_attr=[154846]),
 Data(x=[607, 4], edge_index=[2, 183921], edge_attr=[183921]),
 Data(x=[484, 4], edge_index=[2, 116886], edge_attr=[116886]),
 Data(x=[632, 4], edge_index=[2, 199396], edge_attr=[199396]),
 Data(x=[623, 4], edge_index=[2, 193753], edge_attr=[193753]),
 Data(x=[1012, 4], edge_index=[2, 511566], edge_attr=[511566])]

In [8]:
for i in range(N_predictions):
    graph = denoised_graphs[i].clone()
    try:
        GML.POSCAR_graph_encoding(graph, L, file_name=f'POSCAR-{i}', POSCAR_directory='./')
    except SystemExit:
        continue

Invalid graph, atoms overlapping. Applying brute force :)


  area = np.sqrt(s * (s - a) * (s - b) * (s - c))


0 15 32 19.489489 20.746012 29.056068 -0.8727253937743874 20.727647074733103
Invalid graph, atoms overlapping. Applying brute force :)
0 4 40 21.348904 21.022003 29.17965 1.083155583383329 20.99407991369532
Invalid graph, atoms overlapping. Applying brute force :)
0 3 71 20.378431 23.299988 24.351856 8.9593989487687 21.508570422724404
Invalid graph, atoms overlapping. Applying brute force :)
0 6 139 21.148945 23.96177 25.776806 8.440149810273859 22.426106433657413
Invalid graph, atoms overlapping. Applying brute force :)
0 2 344 19.34664 23.736185 24.501827 8.718811515964148 22.07688401041395
Invalid graph, atoms overlapping. Applying brute force :)
0 1 390 18.661749 23.336548 23.669731 8.911253894260138 21.56812508449216
Invalid graph, atoms overlapping. Applying brute force :)
0 9 366 19.882797 23.476297 23.177288 10.292200794778958 21.09993226068741
Invalid graph, atoms overlapping. Applying brute force :)
0 14 33 21.594126 24.692616 21.179533 14.52846289114611 19.96619710153983
Inv