In [2]:
import matplotlib.pyplot as plt
import numpy             as np
import pandas            as pd
import seaborn           as sns
import os
import torch
import json

from libraries.model   import Helmholtz_free_energy_function, make_predictions, GCNN, compute_coefficients
from libraries.dataset import load_atomic_masses, include_temperatures, create_predictions_dataset, standardize_dataset_from_keys

# Checking if pytorch can run in GPU, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

sns.set_theme()

# Compute phase transitions

In [3]:
target_folder = '/home/claudio/cibran/Work/UPC/MP/models/Fv-accurate-fulldata'

In [4]:
labels_name                 = f'{target_folder}/labels.pt'
dataset_name                = f'{target_folder}/dataset.pt'
dataset_name_std            = f'{target_folder}/standardized_dataset.pt'
labels_name_std             = f'{target_folder}/standardized_labels.pt'
dataset_parameters_name_std = f'{target_folder}/standardized_parameters.json'  # Parameters for rescaling the predictions

# Load the standardized dataset, with corresponding labels and parameters
dataset = torch.load(dataset_name_std, weights_only=False)
labels  = torch.load(labels_name_std,  weights_only=False)
for i, data in enumerate(dataset):
    data.label = labels[i]

# Convert NumPy arrays back to PyTorch tensors
# Load the data from the JSON file
with open(dataset_parameters_name_std, 'r') as json_file:
    numpy_dict = json.load(json_file)

# Convert torch tensors to numpy arrays
dataset_parameters = {}
for key, value in numpy_dict.items():
    try:
        dataset_parameters[key] = torch.tensor(value)
    except:
        dataset_parameters[key] = value

# Defining target factor
target_factor = dataset_parameters['target_std'] / dataset_parameters['scale']
target_factor = target_factor.to(device)

In [5]:
import sys
sys.path.append('../../UPC')
from GenerativeModels.libraries.dataset import get_datasets

In [6]:
# Check if data has been already split, else do it randomly
path_to_train_labels = f'{target_folder}/train_labels.txt'
path_to_val_labels   = f'{target_folder}/validation_labels.txt'
path_to_test_labels  = f'{target_folder}/test_labels.txt'

# Copy labels
material_labels = labels.copy()

# Read labels splitting (which are strings)
train_labels = np.genfromtxt(path_to_train_labels, dtype='str').tolist()
val_labels   = np.genfromtxt(path_to_val_labels,   dtype='str').tolist()
test_labels  = np.genfromtxt(path_to_test_labels,  dtype='str').tolist()

# Use the computed indexes to generate train and test sets
# We iteratively check where labels equals a unique train/test labels and append the index to a list
train_dataset = get_datasets(train_labels, material_labels, dataset)
val_dataset   = get_datasets(val_labels,   material_labels, dataset)
test_dataset  = get_datasets(test_labels,  material_labels, dataset)

#del dataset  # Free up CUDA memory

print(f'Number of training   graphs: {len(train_dataset)}')
print(f'Number of validation graphs: {len(val_dataset)}')
print(f'Number of testing    graphs: {len(test_dataset)}')

Number of training   graphs: 58729
Number of validation graphs: 7348
Number of testing    graphs: 7337


In [7]:
# Load the data from the JSON file
with open(f'{target_folder}/standardized_parameters.json', 'r') as json_file:
    numpy_dict = json.load(json_file)

# Convert NumPy arrays back to PyTorch tensors
standardized_parameters = {}
for key, value in numpy_dict.items():
    try:
        standardized_parameters[key] = torch.tensor(value)
    except:
        standardized_parameters[key] = value

# Load Graph Neural Network model (making room for temperature as node attribute) to device
# Dropout for initializing the model, not used at all while predicting
model = GCNN(features_channels=train_dataset[0].num_node_features,
             pdropout=0).to(device)

# Load and evaluate Graph Neural Network model
model.load_state_dict(torch.load(f'{target_folder}/model.pt', map_location=torch.device(device)))
model.eval()

GCNN(
  (conv1): GraphConv(5, 512)
  (conv2): GraphConv(512, 512)
  (linconv1): Linear(in_features=512, out_features=64, bias=True)
  (linconv2): Linear(in_features=64, out_features=16, bias=True)
  (lin): Linear(in_features=16, out_features=1, bias=True)
)

In [15]:
test_dataset[0].x[0][-1] < 0

tensor(True)

In [11]:
target_mean = standardized_parameters['target_mean']
scale       = standardized_parameters['scale']
target_std  = standardized_parameters['target_std']

In [26]:
#reference_dataset = test_dataset
reference_dataset = []
for data in test_dataset:
    if data.x[0][-1] < 10:
        reference_dataset.append(data)

#prediction_dataset = train_dataset
prediction_dataset = []
for data in train_dataset:
    if data.x[0][-1] < 10:
        prediction_dataset.append(data)

In [None]:
# Compute predictions and corresponding uncertainties
predictions, uncertainties = make_predictions(reference_dataset, prediction_dataset, model, standardized_parameters)

In [None]:
ground_truths = []
for i, data in enumerate(prediction_dataset):
    ground_truth = data.y.cpu().numpy()[0] * target_std / scale + target_mean
    ground_truths.append(ground_truth)

In [None]:
plt.plot(ground_truths, predictions, 'og', label='Validation set')  # Predictions
for x, y, err in zip(ground_truths, predictions, uncertainties):
    plt.plot([x, x], [y - err, y + err], 'g--')  # Vertical line

_min_ = np.min([np.min(ground_truths), np.min(predictions-uncertainties)])
_max_ = np.max([np.max(ground_truths), np.max(predictions+uncertainties)])
plt.plot([_min_, _max_], [_min_, _max_], '-r')  # Identity line
plt.xlabel('Computed')
plt.ylabel('Predicted')
plt.legend(loc='best')
plt.savefig('UQ-prediction-comparison.pdf', dpi=50, bbox_inches='tight')
plt.show()

In [None]:
diff_real = np.abs(predictions - ground_truths)
diff_predictions = uncertainties

# semi-transparent points
plt.plot(diff_real, diff_predictions, 'o', markeredgecolor='none', 
         alpha=0.1, label='Validation set')

# identity line
_min_ = np.min([np.min(diff_real), np.min(diff_predictions)])
_max_ = np.max([np.max(diff_real), np.max(diff_predictions)])
plt.plot([_min_, _max_], [_min_, _max_], '-r', label='Ideal calibration')

plt.xlabel('Computed uncertainty')
plt.ylabel('Predicted uncertainty')
plt.legend(loc='best')
plt.savefig('UQ-diff-comparison.pdf', dpi=150, bbox_inches='tight')
plt.show()
