# Compute Decomposition Energies

The [GNoME](https://www.nature.com/articles/s41586-023-06735-9) dataset presents hundreds of thousands of novel stable crystals compared to prior datasets. While GNoME has updated the convex hull of many chemical systems of interest, further research will likely continue to find low energy structures and potentially update the convex hulls.

In this colab, we provide examples for computing the decomposition energy of a new structure compared to the entire GNoME dataset. This strategy can be used to check if a new structure is stable or (if not) compute the distance to the convex hull.


# Import Libraries

In [None]:
!pip install pymatgen

In [None]:
import itertools
import json
import os
import pandas as pd

import pymatgen as mg
from pymatgen.entries.computed_entries import ComputedEntry
from pymatgen.analysis import phase_diagram

## Download the Dataset

In [None]:
PUBLIC_LINK = "https://storage.googleapis.com/"
BUCKET_NAME = "gdm_materials_discovery"

FOLDER_NAME = "gnome_data"
FILES = (
    "stable_materials_summary.csv",
)

EXTERNAL_FOLDER_NAME = "external_data"
EXTERNAL_FILES = (
    "external_materials_summary.csv",
)

def download_from_link(link: str, output_dir: str):
  """Download a file from a public link using wget."""
  os.system(f"wget {link} -P {output_dir}")

parent_directory = os.path.join(PUBLIC_LINK, BUCKET_NAME)
for filename in FILES:
  public_link = os.path.join(parent_directory, FOLDER_NAME, filename)
  download_from_link(public_link, '.')

for filename in EXTERNAL_FILES:
  public_link = os.path.join(parent_directory, EXTERNAL_FOLDER_NAME, filename)
  download_from_link(public_link, '.')

## Preprocess the GNoME Dataset



In [None]:
gnome_crystals = pd.read_csv('stable_materials_summary.csv', index_col=0)
gnome_crystals

In [None]:
# This set contains all other elements on the convex hull that are not inlcuded
# in the definition of GNoMe structures as they have a matching composition in
# Materials Project / OQMD.
reference_crystals = pd.read_csv('external_materials_summary.csv')
reference_crystals

In [None]:
def annotate_chemical_system(crystals: pd.DataFrame) -> pd.DataFrame:
  """Annotate a summary DataFrame with the chemical system"""
  chemical_systems = []
  for i, e in enumerate(crystals['Elements']):
    # replace single quotes with double quotes to avoid having to use python eval
    chemsys = json.loads(e.replace("'", '"'))

    # provide chemical system in sorted order to make for easier lookup
    chemical_systems.append(tuple(sorted(chemsys)))
  crystals['Chemical System'] = chemical_systems
  return crystals

In [None]:
# Collect list of all convex hull entries
gnome_crystals = annotate_chemical_system(gnome_crystals)
reference_crystals = annotate_chemical_system(reference_crystals)
all_crystals = pd.concat([gnome_crystals, reference_crystals], ignore_index=True)

In [None]:
required_columns = ['Composition', 'NSites', 'Corrected Energy', 'Formation Energy Per Atom', 'Chemical System']
minimal_entries = all_crystals[required_columns]
grouped_entries = minimal_entries.groupby('Chemical System')

## Choose a Structure

In [None]:
# @title Provide Entry Details
# @markdown To compute the decomposition energy of a provided structure, please
# @markdown fill out the composition and Corrected Energy in the form below.
# @markdown If no data is provided a random structure will be chosen.

composition = '' # @param {type:"string"}
energy = 0.0 # @param {type:"number"}

if composition == '':
  print("No composition provided. Choosing a random crystal.")
  sample = gnome_crystals.sample()
  sample_entry = ComputedEntry(
      composition=sample['Composition'].item(),
      energy=sample['Corrected Energy'].item(),
  )
  chemsys = sample['Chemical System'].item()
else:
  composition = mg.Composition(composition)
  sample_entry = ComputedEntry(
      composition=composition,
      energy=energy,
  )
  chemsys = [str(el) for el in composition.elements]

## Gather Entries from the Chemical System

Computing the decomposition energy requires computing the convex hull of the associated system. To do so, we gather all other crystals from the given
chemical system from the GNoME dataset + previously known entries to the convex hull.

In [None]:
# Gather other entries on the convex hull

def gather_convex_hull(chemsys):
  phase_diagram_entries = []

  for length in range(len(chemsys) + 1):
    for subsystem in itertools.combinations(chemsys, length):
      subsystem_key = tuple(sorted(subsystem))
      subsystem_entries = grouped_entries.groups.get(subsystem_key, [])

      if len(subsystem_entries):
        phase_diagram_entries.append(minimal_entries.iloc[subsystem_entries])

  phase_diagram_entries = pd.concat(phase_diagram_entries)

  # Convert to mg.ComputedEntries for used with phase_diagram tooling
  mg_entries = []

  for _, row in phase_diagram_entries.iterrows():
    composition = row['Composition']
    formation_energy = row['Corrected Energy']
    entry = ComputedEntry(composition, formation_energy)
    mg_entries.append(entry)

  # Add entries with 0 formation entries for every element
  for element in chemsys:
    elemental_entry = ComputedEntry(element, 0.0)
    mg_entries.append(elemental_entry)

  return mg_entries

In [None]:
mg_entries = gather_convex_hull(chemsys)

## Compute Phase Diagram

In [None]:
# Compute the convex hull for the phase diagram
diagram = phase_diagram.PhaseDiagram(mg_entries)

## Compute Decomposition Energies

In [None]:
# View the currently sampled entry
sample_entry

In [None]:
decomposition, decomposition_energy = diagram.get_decomp_and_e_above_hull(sample_entry, allow_negative=True)

In [None]:
# For a sample from GNoME, this number is likely to be <1e-3 as this was the
# threshold set for the data release.
print(f"Decomposition Energy: {decomposition_energy}.")

In [None]:
print(f"Decomposition: {decomposition}")

# Run All Cells at Once

The following cell combines the rest of the logic used above and can be used instead of running the rest of the cells above multiple times.

In [None]:
# @title Provide Entry Details
# @markdown To compute the decomposition energy of a provided structure, please
# @markdown fill out the composition and Corrected Energy in the form below.
# @markdown If no data is provided a random structure will be chosen.

composition = '' # @param {type:"string"}
energy = 0.0 # @param {type:"number"}

assert composition, ("Please provide a entry details in the form.")
composition = mg.Composition(composition)
sample_entry = ComputedEntry(
    composition=composition,
    energy=energy,
)
chemsys = [str(el) for el in composition.elements]
mg_entries = gather_convex_hull(chemsys)
diagram = phase_diagram.PhaseDiagram(mg_entries)
decomposition, decomposition_energy = diagram.get_decomp_and_e_above_hull(sample_entry, allow_negative=True)
print(f"Decomposition Energy: {decomposition_energy}.")
print(f"Decomposition: {decomposition}")