Notebook for generating the samples for the GNN model.
For each dataset, the final output is the Dataset.dat file needed by the GNN model.

In [None]:
from functions import get_tuples, export_tuples, geometry_to_graph_analysis
from paths import paths
from constants import VORONOI_TOLERANCE, CORDERO, FG_RAW_GROUPS, CORDERO_ORIGINAL

# Create graph representations

Here we are performing the conversion of the molecule/adsorption geometries into a graph data structure. The final graphs are stored in a text format that contains all the information needed to generate the graph objects in a suitbale Pytorch geometric format. The conversion to PyG suitable format is performed within the HetGraphDataset class.

In [None]:
bad_samples = 0
tot_samples = 0
print("VORONOI TOLERANCE = {} Angstrom".format(VORONOI_TOLERANCE))
for dataset in FG_RAW_GROUPS:
    my_tuple = get_tuples(dataset, VORONOI_TOLERANCE, CORDERO)
    export_tuples(paths[dataset]['dataset'], my_tuple)
    x = geometry_to_graph_analysis(dataset)
    if dataset[:3] != "gas":
        bad_samples += x[0]
        tot_samples += x[2]
print("Voronoi tolerance: {} Angstrom".format(VORONOI_TOLERANCE))
print("Bad samples: {}".format(bad_samples))
print("Total samples: {}".format(tot_samples))
print("Percentage of bad samples: {:.2f}%".format(bad_samples * 100/tot_samples))

## Sensitivity analysis of the bad representations wrt Voronoi tolerance parameter

In [None]:
voronoi_array = [0, 0.25, 0.5, 0.75, 1.0]
bad_representations_array = []
tot_representations_array = []
for i in range(len(voronoi_array)):
    bad_samples = 0
    tot_samples = 0
    V = voronoi_array[i]
    for dataset in FG_RAW_GROUPS:
        my_tuple = get_tuples(dataset, V, CORDERO_ORIGINAL)
        export_tuples(paths[dataset]['dataset'], my_tuple)
        x = geometry_to_graph_analysis(dataset)
        if dataset[:3] != "gas":
            bad_samples += x[0]
            tot_samples += x[2]
    bad_representations_array.append(bad_samples)
    tot_representations_array.append(tot_samples)
    

In [None]:
print(bad_representations_array)
print(tot_representations_array)


In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(voronoi_array, bad_representations_array)
plt.xlabel("Voronoi tolerance")
plt.ylabel("Bad representations")
plt.title("Missing M-A connections")