In [72]:
import pandas as pd
import numpy as np
from pymatgen.core import Structure
from pymatgen.util.testing import PymatgenTest

from lematerial_forgebench.benchmarks.stability_benchmark import StabilityBenchmark
from lematerial_forgebench.benchmarks.validity_benchmark import ValidityBenchmark
from lematerial_forgebench.metrics.stability_metrics import StabilityMetric
from lematerial_forgebench.preprocess.reference_energies import (
    get_energy_above_hull,
    get_formation_energy_from_composition_energy,
)
from lematerial_forgebench.preprocess.stability_preprocess import (
    EnergyAboveHull,
    OrbFormationEnergy,
    StabilityPreprocessor,
)
from datasets import load_dataset
from pymatgen.core import Composition, Element

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [73]:
test = PymatgenTest()

filename = 'CsBr.cif'
structure = Structure.from_file(filename)
structure = structure.remove_oxidation_states()

filename2 = 'CsPbBr3.cif'
structure2 = Structure.from_file(filename2)
structure2 = structure2.remove_oxidation_states()

structures = [structure, structure2, test.get_structure("Si"), test.get_structure("LiFePO4")]

Use MatSciTest in pymatgen.util.testing instead.
  test = PymatgenTest()


In [81]:
stability_preprocessor = StabilityPreprocessor()
preprocessor_result = stability_preprocessor(structures)

cpu
energy_above_hull : 0.0
energy_above_hull relaxed : 0.0
energy_above_hull : 0.0322531480375976
energy_above_hull relaxed : 0.028652646023437534
energy_above_hull : 0.010958068559570044
energy_above_hull relaxed : 0.009661548326415748
energy_above_hull : 0.24325226729073446
energy_above_hull relaxed : 0.24249804714564505


In [79]:
preprocessor_result.processed_structures[3].properties

{'relaxed_structure': Structure Summary
 Lattice
     abc : 4.755940218671973 6.076102612517623 10.484207495173267
  angles : 90.21749581353959 90.00226148791631 90.00675269060281
  volume : 302.9660478946557
       A : np.float64(-8.605307158745643e-05) np.float64(0.0001305365991170298) np.float64(-4.755940216102036)
       B : np.float64(-0.038241528037821375) np.float64(-6.0759822449411525) np.float64(0.0005500333633519402)
       C : np.float64(10.48417478779725) np.float64(-0.026187226979330545) np.float64(0.0002233985562976699)
     pbc : True True True
 PeriodicSite: Li (10.45, -6.103, 0.001331) [-0.0001172, 1.0, 0.9999]
 PeriodicSite: Li (-0.0192, -3.038, -4.756) [1.0, 0.5, 8.345e-07]
 PeriodicSite: Li (5.204, -6.089, -2.378) [0.5001, 1.0, 0.5]
 PeriodicSite: Li (5.223, -3.051, -2.377) [0.4999, 0.5001, 0.5]
 PeriodicSite: Fe (2.286, -1.527, -2.488) [0.5231, 0.2504, 0.219]
 PeriodicSite: Fe (2.917, -4.562, -0.1095) [0.02313, 0.7496, 0.281]
 PeriodicSite: Fe (7.528, -1.539, -4.64

In [122]:
benchmark = StabilityBenchmark()
benchmark_result= benchmark.evaluate(preprocessor_result.processed_structures)

0.25
<class 'lematerial_forgebench.evaluator.EvaluationResult'>
0.07161587097197553
<class 'lematerial_forgebench.evaluator.EvaluationResult'>
0.75
<class 'lematerial_forgebench.evaluator.EvaluationResult'>
Cs4 Br4 Formation Energy : -7.149936223230794
Cs4 Pb4 Br12 Formation Energy : -8.492938243082682
Si2 Formation Energy : 0.02191613711914009
Li4 Fe4 P4 O16 Formation Energy : -12.061907388726752
-6.920716429480272
<class 'lematerial_forgebench.evaluator.EvaluationResult'>
Cs4 Br4 Relaxation Stability RMSE : 0.030938683711402914
Cs4 Pb4 Br12 Relaxation Stability RMSE : 0.18214184543102457
Si2 Relaxation Stability RMSE : 0.022634037275015083
Li4 Fe4 P4 O16 Relaxation Stability RMSE : 0.05637670082592471
0.07302281681084181
<class 'lematerial_forgebench.evaluator.EvaluationResult'>
{'stable_ratio': 0.25, 'metastable_ratio': 0.75, 'mean_e_above_hull': 0.07161587097197553, 'mean_formation_energy': -6.920716429480272, 'mean_relaxation_RMSE': 0.07302281681084181}


In [123]:
benchmark_result.final_scores

{'stable_ratio': 0.25,
 'metastable_ratio': 0.75,
 'mean_e_above_hull': 0.07161587097197553,
 'mean_formation_energy': -6.920716429480272,
 'mean_relaxation_RMSE': 0.07302281681084181}

In [185]:
"""Tests for stability benchmark."""

from pymatgen.util.testing import PymatgenTest
import numpy as np
from lematerial_forgebench.benchmarks.stability_benchmark import StabilityBenchmark
from lematerial_forgebench.preprocess.stability_preprocess import (
    StabilityPreprocessor,
)


class TestStabilityBenchmark:
    """Test suite for StabilityBenchmark class."""

    def test_initialization_default(self):
        """Test initialization with default parameters."""
        benchmark = StabilityBenchmark()

        # Check name and properties
        assert benchmark.config.name == "StabilityBenchmark"
        assert "version" in benchmark.config.metadata
        assert benchmark.config.metadata["category"] == "stability"

        # Check correct evaluator
        assert len(benchmark.evaluators) == 5
        assert "stability" in benchmark.evaluators

    def test_initialization_custom(self):
        """Test initialization with custom relaxer configuration."""

        benchmark = StabilityBenchmark(
            name="Custom Stability Benchmark",
            description="Custom description",
            metadata={"test_key": "test_value"},
        )

        # Check custom values
        assert benchmark.config.name == "Custom Stability Benchmark"
        assert benchmark.config.description == "Custom description"
        assert benchmark.config.metadata["test_key"] == "test_value"

    def test_evaluate_with_mp_entries(self):
        """Test benchmark evaluation on structures"""
        benchmark = StabilityBenchmark()

        # Create test structures
        test = PymatgenTest()
        structures = [test.get_structure("Si"), test.get_structure("LiFePO4")]

        # first, we need to preprocess the structures
        stability_preprocessor = StabilityPreprocessor()
        preprocessor_result = stability_preprocessor(structures)
        structures = preprocessor_result.processed_structures

        # Run benchmark
        result = benchmark.evaluate(structures)

        # Check result format
        assert len(result.evaluator_results) == 5
        assert "stability" in result.evaluator_results
        assert "stable_ratio" in result.final_scores

        # Check score types
        assert isinstance(result.final_scores["stable_ratio"], (int, float))
        assert isinstance(result.final_scores["metastable_ratio"], (int, float))
        assert isinstance(result.final_scores["mean_e_above_hull"], (int, float))
        assert isinstance(result.final_scores["mean_formation_energy"], (int, float))
        assert isinstance(result.final_scores["mean_relaxation_RMSE"], (int, float))

    def test_empty_structures(self):
        """Test behavior with empty structure list."""
        benchmark = StabilityBenchmark()

        # Test behavior with no structures - should not raise error
        result = benchmark.evaluate([])

        # Should get default values        
        assert result.final_scores["stable_ratio"] is None
        assert result.final_scores["metastable_ratio"] is None
        assert result.final_scores["mean_e_above_hull"] is None
        assert result.final_scores["mean_formation_energy"] is None
        assert result.final_scores["mean_relaxation_RMSE"] is None

    def test_aggregate_evaluator_results(self):
        """Test result aggregation logic."""
        benchmark = StabilityBenchmark()

        # Mock evaluator_results as passed by BaseBenchmark.evaluate
        # It contains the evaluator's combined_value and the primary metric value
        # of each metric configured for that evaluator (e.g., "metric_name_value").
        mock_evaluator_results_from_base = {
            "stability": {  # Name of the evaluator
                "combined_value": 0.75,  # Evaluator's combined score
                "metric_results": {"stability": {"metrics": {"stable_ratio": 0.75}}}},
            "metastability": {"combined_value": 0.85},
            "formation_energy": {"combined_value": -6.7},
            "mean_e_above_hull": {"combined_value": 0.1},
            "relaxation_stability": {"combined_value": 0.01}}
        # Aggregate results
        scores = benchmark.aggregate_evaluator_results(mock_evaluator_results_from_base)
        # Check scores
        # aggregate_evaluator_results should pick up combined_value as stability_score
        # and stability_value as stable_ratio.
        # mean_e_above_hull and metastable_ratio will be defaults (nan, 0.0) because
        # they are not present in the input dict.
        assert scores["stable_ratio"] == 0.75
        assert scores["metastable_ratio"] == 0.85
        assert scores["mean_e_above_hull"] == 0.1
        assert scores["mean_formation_energy"] == -6.7
        assert scores["mean_relaxation_RMSE"] == 0.01

    def test_benchmark_metadata(self):
        """Test benchmark metadata structure."""
        benchmark = StabilityBenchmark()

        metadata = benchmark.config.metadata

        # Check required metadata fields
        assert metadata["version"] == "0.1.0"
        assert metadata["category"] == "stability"


def test_evaluator_configuration():
    """Test that evaluator is properly configured."""
    benchmark = StabilityBenchmark()

    # Check evaluator configuration
    stability_evaluator = benchmark.evaluators["stability"]
    # print(stability_evaluator)
    assert stability_evaluator.config.name == "stability"
    assert stability_evaluator.config.weights == {"stability": 1.0}
    assert stability_evaluator.config.aggregation_method == "weighted_mean"


In [186]:
test = TestStabilityBenchmark()

In [187]:
test.test_empty_structures()

{'stable_ratio': None, 'metastable_ratio': None, 'mean_e_above_hull': None, 'mean_formation_energy': None, 'mean_relaxation_RMSE': None}
None
