<a href="https://colab.research.google.com/github/Laere11/Laere11/blob/Material-Sciences/Evaluation_Metric_File.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Evaluation Metrics File Creation**

The evaluation metrics file is a critical component in the MatterGen workflow—it acts as a quantitative checkpoint that summarizes how well the generated materials meet the desired criteria. Here’s its significance in the big picture:

Quality Assessment:
It provides key numerical metrics (such as stability, novelty, uniqueness, and RMSD) that help you assess whether the generated structures are chemically plausible and stable.

Performance Benchmarking:
By comparing the metrics (like energy values before and after relaxation), you can determine if the model is effectively generating materials that are closer to thermodynamic equilibrium, which is essential for synthesizability.

Guiding Iterations:
The metrics help in making informed decisions about further refining the model, tuning parameters (e.g., diffusion guidance factor), or selecting candidates for more rigorous validation (e.g., using DFT).

Reporting and Analysis:
The metrics file serves as a standardized output that can be used to generate comprehensive reports, visualize property distributions, and compare model performance against benchmarks.

In essence, the evaluation metrics file bridges the gap between raw model output and actionable insights, ensuring that the generated materials not only look good on paper (or in CIF files) but also hold up to rigorous physical and chemical standards.

The 1st code example creates synthetic data just for demonstration purposes.  The 2nd code example uses mattersim but will need further development on the API calls to compute actual energy values.

In [2]:
# Install required dependency
!pip install pymatgen

import numpy as np
import json
from pymatgen.core import Lattice, Structure

def create_nacl_structure():
    """Creates a simple NaCl structure."""
    lattice = Lattice.cubic(5.64)  # approximate lattice parameter in Å
    structure = Structure(lattice, ["Na", "Cl"], [[0, 0, 0], [0.5, 0.5, 0.5]])
    return structure

def dummy_relaxation(structure, relaxation_factor=0.05):
    """
    Simulate a relaxation process by perturbing fractional coordinates slightly.
    """
    relaxed_structure = structure.copy()
    new_coords = []
    for site in relaxed_structure:
        displacement = np.random.uniform(-relaxation_factor, relaxation_factor, size=3)
        new_coord = (np.array(site.frac_coords) + displacement) % 1.0
        new_coords.append(new_coord)
    for i, site in enumerate(relaxed_structure):
        relaxed_structure.replace(i, site.specie, new_coords[i])
    return relaxed_structure

def compute_rmsd(struct1, struct2):
    """
    Compute the RMSD between the fractional coordinates of two structures.
    """
    coords1 = np.array([site.frac_coords for site in struct1])
    coords2 = np.array([site.frac_coords for site in struct2])
    return np.sqrt(np.mean((coords1 - coords2)**2))

def dummy_energy(structure):
    """
    Compute a dummy energy as the sum of squared fractional coordinates.
    """
    coords = np.array([site.frac_coords for site in structure])
    return np.sum(np.square(coords))

# Create the initial NaCl structure
initial_structure = create_nacl_structure()

# Perform a dummy relaxation
relaxed_structure = dummy_relaxation(initial_structure, relaxation_factor=0.05)
rmsd_value = compute_rmsd(initial_structure, relaxed_structure)

# Compute dummy energies before and after relaxation
energy_initial = dummy_energy(initial_structure)
energy_relaxed = dummy_energy(relaxed_structure)

# Create an evaluation metrics dictionary (similar to MatterSim's output)
metrics = {
    "stable_rate": 1.0 if energy_relaxed < energy_initial else 0.0,
    "rmsd": rmsd_value,
    "energy_initial": energy_initial,
    "energy_relaxed": energy_relaxed,
    "novelty": 1.0,    # Placeholder: in practice, computed by comparing to a reference dataset
    "uniqueness": 1.0  # Placeholder: computed by checking duplicates among generated structures
}

print("Evaluation Metrics (Dummy MatterSim-like):")
print(json.dumps(metrics, indent=2))


Evaluation Metrics (Dummy MatterSim-like):
{
  "stable_rate": 0.0,
  "rmsd": 0.6845152682283773,
  "energy_initial": 0.75,
  "energy_relaxed": 3.553301048390461,
  "novelty": 1.0,
  "uniqueness": 1.0
}


In [1]:
# Install required dependency
!pip install pymatgen

import numpy as np
import json
from pymatgen.core import Lattice, Structure
from IPython.display import display, HTML

# ---------------------------
# Step 1: Create and Relax NaCl Structure
# ---------------------------

def create_nacl_structure():
    """Creates a simple NaCl structure."""
    lattice = Lattice.cubic(5.64)  # approximate lattice parameter in Å
    structure = Structure(lattice, ["Na", "Cl"], [[0, 0, 0], [0.5, 0.5, 0.5]])
    return structure

def dummy_relaxation(structure, relaxation_factor=0.05):
    """
    Simulate a relaxation process by perturbing fractional coordinates slightly.
    """
    relaxed_structure = structure.copy()
    new_coords = []
    for site in relaxed_structure:
        displacement = np.random.uniform(-relaxation_factor, relaxation_factor, size=3)
        new_coord = (np.array(site.frac_coords) + displacement) % 1.0
        new_coords.append(new_coord)
    for i, site in enumerate(relaxed_structure):
        relaxed_structure.replace(i, site.specie, new_coords[i])
    return relaxed_structure

def compute_rmsd(struct1, struct2):
    """
    Compute the RMSD between the fractional coordinates of two structures.
    """
    coords1 = np.array([site.frac_coords for site in struct1])
    coords2 = np.array([site.frac_coords for site in struct2])
    return np.sqrt(np.mean((coords1 - coords2)**2))

def dummy_energy(structure):
    """
    Compute a dummy energy as the sum of squared fractional coordinates.
    """
    coords = np.array([site.frac_coords for site in structure])
    return np.sum(np.square(coords))

# Create the initial NaCl structure and perform dummy relaxation
initial_structure = create_nacl_structure()
relaxed_structure = dummy_relaxation(initial_structure, relaxation_factor=0.05)
rmsd_value = compute_rmsd(initial_structure, relaxed_structure)
energy_initial = dummy_energy(initial_structure)
energy_relaxed = dummy_energy(relaxed_structure)

# ---------------------------
# Step 2: Build the Evaluation Metrics Dictionary
# ---------------------------
metrics = {
    "stable_rate": 1.0 if energy_relaxed < energy_initial else 0.0,
    "rmsd": rmsd_value,
    "energy_initial": energy_initial,
    "energy_relaxed": energy_relaxed,
    "novelty": 1.0,    # Placeholder: typically computed by comparing to a reference database
    "uniqueness": 1.0  # Placeholder: computed by checking duplicates among generated structures
}

# Define interpretation for each metric
metric_definitions = {
    "stable_rate": "Fraction of structures deemed stable (1: stable, 0: unstable).",
    "rmsd": "Root-mean-square deviation between original and relaxed structure; lower values indicate less change.",
    "energy_initial": "Simulated energy of the unrelaxed structure (dummy metric).",
    "energy_relaxed": "Simulated energy after relaxation (lower usually indicates higher stability).",
    "novelty": "Fraction of structures that are novel compared to a reference database (placeholder).",
    "uniqueness": "Fraction of structures that are unique among generated candidates (placeholder)."
}

# ---------------------------
# Step 3: Create and Display an HTML Table for the Metrics
# ---------------------------
table_rows = ""
for label, value in metrics.items():
    # Format value to 4 decimals if it's a float, otherwise keep as is.
    if isinstance(value, float):
        value_str = f"{value:.4f}"
    else:
        value_str = str(value)
    definition = metric_definitions.get(label, "No definition available.")
    table_rows += f"""
      <tr>
        <td style="border: 1px solid #ccc; padding: 8px;">{label}</td>
        <td style="border: 1px solid #ccc; padding: 8px;">{value_str}</td>
        <td style="border: 1px solid #ccc; padding: 8px;">{definition}</td>
      </tr>
    """

html_content = f"""
<!DOCTYPE html>
<html>
<head>
  <meta charset="UTF-8">
  <title>Evaluation Metrics Report</title>
  <style>
    body {{
      font-family: Arial, sans-serif;
      background-color: #f5f5f5;
      color: #333;
      padding: 20px;
    }}
    h1 {{
      text-align: center;
      font-size: 32px;
      font-weight: bold;
      margin-bottom: 20px;
    }}
    table {{
      width: 100%;
      border-collapse: collapse;
      margin: 0 auto;
    }}
    td {{
      border: 1px solid #ccc;
      padding: 8px;
      text-align: left;
    }}
    th {{
      border: 1px solid #ccc;
      padding: 8px;
      background-color: #e0e0e0;
      font-weight: bold;
    }}
    .table-container {{
      background-color: #fff;
      padding: 20px;
      border-radius: 8px;
      box-shadow: 0 0 10px rgba(0,0,0,0.1);
      max-width: 800px;
      margin: auto;
    }}
  </style>
</head>
<body>
  <h1>Evaluation Metrics for Generated NaCl Structure</h1>
  <div class="table-container">
    <table>
      <tr>
        <th>Metric</th>
        <th>Value</th>
        <th>Interpretation</th>
      </tr>
      {table_rows}
    </table>
  </div>
</body>
</html>
"""

display(HTML(html_content))


Collecting pymatgen
  Downloading pymatgen-2025.2.18-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting monty>=2025.1.9 (from pymatgen)
  Downloading monty-2025.3.3-py3-none-any.whl.metadata (3.6 kB)
Collecting palettable>=3.3.3 (from pymatgen)
  Downloading palettable-3.3.3-py2.py3-none-any.whl.metadata (3.3 kB)
Collecting pybtex>=0.24.0 (from pymatgen)
  Downloading pybtex-0.24.0-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting ruamel.yaml>=0.17.0 (from pymatgen)
  Downloading ruamel.yaml-0.18.10-py3-none-any.whl.metadata (23 kB)
Collecting spglib>=2.5 (from pymatgen)
  Downloading spglib-2.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)
Collecting uncertainties>=3.1.4 (from pymatgen)
  Downloading uncertainties-3.2.2-py3-none-any.whl.metadata (6.9 kB)
Collecting latexcodec>=1.0.4 (from pybtex>=0.24.0->pymatgen)
  Downloading latexcodec-3.0.0-py3-none-any.whl.metadata (4.9 kB)
Collecting ruamel.yaml.clib>=0.2.7 (fr

Metric,Value,Interpretation
stable_rate,0.0,"Fraction of structures deemed stable (1: stable, 0: unstable)."
rmsd,0.4028,Root-mean-square deviation between original and relaxed structure; lower values indicate less change.
energy_initial,0.75,Simulated energy of the unrelaxed structure (dummy metric).
energy_relaxed,1.6656,Simulated energy after relaxation (lower usually indicates higher stability).
novelty,1.0,Fraction of structures that are novel compared to a reference database (placeholder).
uniqueness,1.0,Fraction of structures that are unique among generated candidates (placeholder).


**Important Note- The table results are for demonstration purposes, not true calculations.**

The values in the evaluation metrics example are entirely synthetic placeholders meant only to illustrate what an evaluation metrics file might contain. In our dummy example, the energy is computed using a simple function (summing squared fractional coordinates) that doesn’t represent real physical energies. Therefore, the “stable_rate” (set as 1.0 if energy_relaxed < energy_initial else 0.0) might not reflect the actual chemical stability of NaCl.

In reality, sodium chloride (NaCl) is a very stable compound, and a proper relaxation using a realistic force field or DFT would yield an energy decrease upon relaxation, resulting in a stable_rate of 1.0. The discrepancy you observed (stable_rate = 0) is due solely to our simplistic dummy function. For real-world chemistry, you’d need to use a validated ML force field (like MatterSim) or DFT calculations to obtain meaningful energy values.

# The code below needs more development to run correctly using the mattersim API.  Once fully developed it will be able to generate realistic energy values.

Note: The provided MatterSim API calls are hypothetical. You will need to adjust them based on the actual MatterSim documentation and installation details. For full accuracy in energy and relaxation calculations, ensure that you have the proper pretrained model and that MatterSim is configured for the target materials.

Below is an explanation and example code that illustrate how you would update the dummy relaxation and energy functions with calls to a validated ML force field—such as MatterSim—to obtain more meaningful energy values and structure relaxations. (Note: MatterSim is a specialized ML-based force field developed by Microsoft Research. The following code is written using a hypothetical MatterSim API to demonstrate the concept. In practice, you would follow the MatterSim installation and API instructions from its repository.)

Explanation
What You Need to Do
Install MatterSim:
Instead of using a dummy function, you would install MatterSim (if available as a pip package or via cloning its repository) so that you have a validated ML force field. For example:

bash
Copy
!pip install mattersim
(If MatterSim is not available via pip, you would clone the repository and install it as described in its README.)

Import and Initialize the Model:
Use MatterSim’s functions to load a pretrained model (e.g., "MatterSim-v1-1M.pth"). This model is trained to rapidly relax structures and compute energies approximating DFT values.

Perform Structural Relaxation:
Call MatterSim’s relaxation function to obtain a relaxed (lower-energy) structure from the initial input. The model will iteratively adjust atomic positions, element types, and lattice parameters to minimize the energy.

Compute Energies:
Use MatterSim’s energy evaluation function to compute the energy of both the original and the relaxed structure. These energy values are much more meaningful than our dummy sum-of-squares function.

Recalculate Metrics:
With the realistic energy values, the evaluation metrics (such as the stability metric) will reflect the true chemistry. For example, a stable material like NaCl should show a decrease in energy upon relaxation (i.e. energy_relaxed < energy_initial), yielding a stable_rate of 1.

**Example Code Using a Hypothetical MatterSim API**
Below is a complete code snippet that replaces the dummy functions with calls to MatterSim. Copy and paste this code into a single Google Colab cell. (Remember that the MatterSim API here is assumed; you will need to adjust the code based on the actual MatterSim API if available.)

In [None]:
# Install required dependencies
!pip install pymatgen

# Hypothetical installation command for MatterSim (if available via pip)
!pip install mattersim

import numpy as np
import json
from pymatgen.core import Lattice, Structure
from IPython.display import display, HTML

# Import MatterSim (this is hypothetical; please refer to the official MatterSim documentation)
import mattersim

# ---------------------------
# Step 1: Create NaCl Structure
# ---------------------------
def create_nacl_structure():
    """Creates a simple NaCl structure."""
    lattice = Lattice.cubic(5.64)  # approximate lattice parameter in Å
    structure = Structure(lattice, ["Na", "Cl"], [[0, 0, 0], [0.5, 0.5, 0.5]])
    return structure

initial_structure = create_nacl_structure()

# ---------------------------
# Step 2: Use MatterSim for Structural Relaxation and Energy Calculation
# ---------------------------
# Load the pre-trained MatterSim model (adjust the path or model name as required)
ms_model = mattersim.load_pretrained_model("MatterSim-v1-1M.pth")  # Hypothetical API call

# Relax the structure using MatterSim's force field
relaxed_structure = ms_model.relax_structure(initial_structure)

# Compute energy values using MatterSim
energy_initial = ms_model.compute_energy(initial_structure)
energy_relaxed = ms_model.compute_energy(relaxed_structure)

# Compute RMSD between initial and relaxed structures using a helper function
def compute_rmsd(struct1, struct2):
    coords1 = np.array([site.frac_coords for site in struct1])
    coords2 = np.array([site.frac_coords for site in struct2])
    return np.sqrt(np.mean((coords1 - coords2)**2))

rmsd_value = compute_rmsd(initial_structure, relaxed_structure)

# ---------------------------
# Step 3: Build the Evaluation Metrics Dictionary
# ---------------------------
metrics = {
    "stable_rate": 1.0 if energy_relaxed < energy_initial else 0.0,  # Realistically, NaCl is stable
    "rmsd": rmsd_value,
    "energy_initial": energy_initial,
    "energy_relaxed": energy_relaxed,
    "novelty": 1.0,    # Placeholder: In practice, compare against a reference database
    "uniqueness": 1.0  # Placeholder: Check for duplicates among generated structures
}

# Define interpretation for each metric
metric_definitions = {
    "stable_rate": "Fraction of generated structures that are energetically relaxed (1 indicates energy decreased).",
    "rmsd": "Root-mean-square deviation between initial and relaxed structures (lower values indicate minimal distortion).",
    "energy_initial": "Energy of the original unrelaxed structure (computed by the ML force field).",
    "energy_relaxed": "Energy of the relaxed structure; lower energy indicates higher stability.",
    "novelty": "Fraction of structures that do not match any structure in a reference database (placeholder).",
    "uniqueness": "Fraction of structures that are unique among generated candidates (placeholder)."
}

# ---------------------------
# Step 4: Create and Display an HTML Table for the Metrics
# ---------------------------
table_rows = ""
for label, value in metrics.items():
    # Format value to 4 decimals if it's a float
    if isinstance(value, float):
        value_str = f"{value:.4f}"
    else:
        value_str = str(value)
    definition = metric_definitions.get(label, "No definition available.")
    table_rows += f"""
      <tr>
        <td style="border: 1px solid #ccc; padding: 8px;">{label}</td>
        <td style="border: 1px solid #ccc; padding: 8px;">{value_str}</td>
        <td style="border: 1px solid #ccc; padding: 8px;">{definition}</td>
      </tr>
    """

html_content = f"""
<!DOCTYPE html>
<html>
<head>
  <meta charset="UTF-8">
  <title>Evaluation Metrics Report</title>
  <style>
    body {{
      font-family: Arial, sans-serif;
      background-color: #f5f5f5;
      color: #333;
      padding: 20px;
    }}
    h1 {{
      text-align: center;
      font-size: 32px;
      font-weight: bold;
      margin-bottom: 20px;
    }}
    table {{
      width: 100%;
      border-collapse: collapse;
      margin: 0 auto;
    }}
    td, th {{
      border: 1px solid #ccc;
      padding: 8px;
      text-align: left;
    }}
    th {{
      background-color: #e0e0e0;
      font-weight: bold;
    }}
    .table-container {{
      background-color: #fff;
      padding: 20px;
      border-radius: 8px;
      box-shadow: 0 0 10px rgba(0,0,0,0.1);
      max-width: 800px;
      margin: auto;
    }}
  </style>
</head>
<body>
  <h1>Evaluation Metrics for NaCl using MatterSim</h1>
  <div class="table-container">
    <table>
      <tr>
        <th>Metric</th>
        <th>Value</th>
        <th>Interpretation</th>
      </tr>
      {table_rows}
    </table>
  </div>
</body>
</html>
"""

display(HTML(html_content))
