In [1]:
import re
import warnings

import pandas as pd
import datetime

from pymatgen.core import Structure, Lattice, Composition
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from pymatgen.ext.matproj import MPRester
from m3gnet.models import Relaxer

In [2]:
data = pd.read_html("http://en.wikipedia.org/wiki/Lattice_constant")[0]
data = data[~data["Crystal structure"].isin(["Hexagonal", "Wurtzite", "Wurtzite (HCP)", 
                                             "Orthorombic", "Tetragonal perovskite", "Orthorhombic perovskite"])]
data.rename(columns = {'Lattice constant (Å)':'a'}, inplace = True)
data.drop(columns=["Ref."], inplace=True)
data["a"] = data["a"].map(float)
data = data[["Material", "Crystal structure", "a"]]
data = data[data["Material"] != "NC0.99"]

additional_fcc = """10 Ne 4.43 54 Xe 6.20
13 Al 4.05 58 Ce 5.16
18 Ar 5.26 70 Yb 5.49
20 Ca 5.58 77 Ir 3.84
28 Ni 3.52 78 Pt 3.92
29 Cu 3.61 79 Au 4.08
36 Kr 5.72 82 Pb 4.95
38 Sr 6.08 47 Ag 4.09
45 Rh 3.80 89 Ac 5.31
46 Pd 3.89 90 Th 5.08"""

additional_bcc = """3 Li 3.49 42 Mo 3.15
11 Na 4.23 55 Cs 6.05
19 K 5.23 56 Ba 5.02
23 V 3.02 63 Eu 4.61
24 Cr 2.88 73 Ta 3.31
26 Fe 2.87 74 W 3.16
37 Rb 5.59 41 Nb 3.30"""

def add_new(str_, structure_type, df):
    toks = str_.split()
    new_crystals = []
    for i in range(int(len(toks) / 3)):
        el = toks[3*i+1].strip()
        if el not in df["Material"].values:
            new_crystals.append([toks[3*i+1], structure_type, float(toks[3*i+2])])
    df2 = pd.DataFrame(new_crystals, columns=data.columns)
    return pd.concat([df, df2])

data = add_new(additional_fcc, "FCC", data)
data = add_new(additional_bcc, "BCC", data)
data.set_index("Material", inplace=True)
print(data)

             Crystal structure         a
Material                                
C (diamond)      Diamond (FCC)  3.567000
Si               Diamond (FCC)  5.431021
Ge               Diamond (FCC)  5.658000
AlAs         Zinc blende (FCC)  5.660500
AlP          Zinc blende (FCC)  5.451000
...                        ...       ...
K                          BCC  5.230000
Ba                         BCC  5.020000
Eu                         BCC  4.610000
Cr                         BCC  2.880000
Rb                         BCC  5.590000

[92 rows x 2 columns]


In [3]:
predicted = []
mp = []
relaxation_time = []
mpr = MPRester()
relaxer = Relaxer()  # This loads the default pre-trained model

for formula, v in data.iterrows():
    with warnings.catch_warnings(record=True) as caught_warnings:  # just to capture all the annoying TF warnings.
        warnings.simplefilter("always")
        formula = formula.split()[0]
        c = Composition(formula)
        els = sorted(c.elements)
        cs = v["Crystal structure"]

        # We initialize all the crystals with an arbitrary lattice constant of 5 angstroms.
        if "Zinc blende" in cs:
            s = Structure.from_spacegroup("F-43m", Lattice.cubic(5), [els[0], els[1]], [[0, 0, 0], [0.25, 0.25, 0.75]])
        elif "Halite" in cs:
            s = Structure.from_spacegroup("Fm-3m", Lattice.cubic(5), [els[0], els[1]], [[0, 0, 0], [0.5, 0, 0]])
        elif "Caesium chloride" in cs:
            s = Structure.from_spacegroup("Pm-3m", Lattice.cubic(5), [els[0], els[1]], [[0, 0, 0], [0.5, 0.5, 0.5]])
        elif "Cubic perovskite" in cs:
            s = Structure(Lattice.cubic(5), [els[0], els[1], els[2], els[2], els[2]], 
                          [[0., 0., 0.], [0.5, 0.5, 0.5], [0.5, 0.5, 0], [0., 0.5, 0.5], [0.5, 0, 0.5]])
        elif "Diamond" in cs:
            s = Structure.from_spacegroup("Fd-3m", Lattice.cubic(5), [els[0]], [[0.25, 0.75, 0.25]])
        elif "BCC" in cs:
            s = Structure(Lattice.cubic(5), [els[0]] * 2, [[0., 0., 0.], [0.5, 0.5, 0.5]])
        elif "FCC" in cs:
            s = Structure(Lattice.cubic(5), [els[0]] * 4, [[0., 0., 0.], [0.5, 0.5, 0], [0., 0.5, 0.5], [0.5, 0, 0.5]])
        else:
            predicted.append(0)
            mp.append(0)
            continue

        start = datetime.datetime.now()

        relax_results = relaxer.relax(s)

        final_structure = relax_results['final_structure']

        timetaken = datetime.datetime.now() - start

        predicted.append(final_structure.lattice.a)
        relaxation_time.append(timetaken)

        try:
            entries = mpr.get_entries({"pretty_formula": s.composition.reduced_formula}, 
                                      inc_structure=True, property_data=["e_above_hull"])
            entries = sorted(entries, key=lambda e: e.data["e_above_hull"])
            for e in entries:
                try:
                    sga = SpacegroupAnalyzer(e.structure)
                    sga2 = SpacegroupAnalyzer(final_structure)
                    if sga.get_space_group_number() == sga2.get_space_group_number():
                        conv = sga.get_conventional_standard_structure()
                        mp.append(conv.lattice.a)
                        break
                except:
                    pass
            else:
                raise RuntimeError
        except Exception as ex:
            mp.append(0)
            import traceback
            traceback.print_exc()
data["MP a"] = mp
data["Predicted a"] = predicted

2022-06-16 20:00:37.678599: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
Traceback (most recent call last):
  File "/var/folders/ql/m5k56v8n5sz5880n5sksmc9w0000gn/T/ipykernel_6696/1671824916.py", line 62, in <cell line: 7>
    raise RuntimeError
RuntimeError


In [4]:
data["% error vs Expt"] = (data["Predicted a"] - data["a"])/data["a"] * 100
data["% error vs MP"] = (data["Predicted a"] - data["MP a"])/data["MP a"] * 100

In [5]:
data["% error vs Expt"] = data["% error vs Expt"].map('{:,.2f}%'.format)
data["% error vs MP"] = data["% error vs MP"].map('{:,.2f}%'.format)

In [6]:
print(data.sort_index().to_markdown())

| Material    | Crystal structure   |       a |    MP a |   Predicted a | % error vs Expt   | % error vs MP   |
|:------------|:--------------------|--------:|--------:|--------------:|:------------------|:----------------|
| Ac          | FCC                 | 5.31    | 5.66226 |       5.6646  | 6.68%             | 0.04%           |
| Ag          | FCC                 | 4.079   | 4.16055 |       4.16702 | 2.16%             | 0.16%           |
| Al          | FCC                 | 4.046   | 4.03893 |       4.04108 | -0.12%            | 0.05%           |
| AlAs        | Zinc blende (FCC)   | 5.6605  | 5.73376 |       5.73027 | 1.23%             | -0.06%          |
| AlP         | Zinc blende (FCC)   | 5.451   | 5.50711 |       5.50346 | 0.96%             | -0.07%          |
| AlSb        | Zinc blende (FCC)   | 6.1355  | 6.23376 |       6.22817 | 1.51%             | -0.09%          |
| Ar          | FCC                 | 5.26    | 5.64077 |       5.62745 | 6.99%             | -0.24%    