### ASE Symmetry Equivalence Check

In [3]:
%%time

import os
from ase.io import read
from ase.utils.structure_comparator import SymmetryEquivalenceCheck
from collections import defaultdict

# Settings
folder = '123'  # set this to your folder path
output_file = f"{folder}_symmetry_groups.txt"

cif_files = sorted([f for f in os.listdir(folder) if f.endswith('.cif') and '_virtual_' in f])
check = SymmetryEquivalenceCheck(angle_tol=1.0, ltol=0.05, stol=0.05, vol_tol=0.1)

structures = [(f, read(os.path.join(folder, f))) for f in cif_files]

# Grouping logic
groups = []
group_map = {}  # file -> group id

for fname, atoms in structures:
    found_group = False
    for i, ref in enumerate(groups):
        if check.compare(atoms, ref[0][1]):
            ref.append((fname, atoms))
            group_map[fname] = i
            found_group = True
            break
    if not found_group:
        groups.append([(fname, atoms)])
        group_map[fname] = len(groups) - 1

# Write output to text file
with open(output_file, 'w') as f:
    f.write("Symmetry-equivalent groups:\n")
    for i, group in enumerate(groups):
        f.write(f"\nGroup {i+1}:\n")
        for fname, _ in group:
            f.write(f"  {fname}\n")

    degeneracy = len(cif_files) - len(groups)
    f.write(f"\nDegeneracy figure: {degeneracy} (out of {len(cif_files)} total cells)\n")

print(f"\nDegeneracy figure: {degeneracy} (out of {len(cif_files)} total cells)\n")
print(f"Written results to: {output_file}")


Degeneracy figure: 12 (out of 400 total cells)

Written results to: 123_symmetry_groups.txt
CPU times: total: 56min 9s
Wall time: 59min 2s


#### (0-tolerance)
Somehow, this doesn't work

In [None]:
import os
from ase.io import read
from ase.utils.structure_comparator import SymmetryEquivalenceCheck
from collections import defaultdict

# Settings
folder = '112'  # set this to your folder path
output_file = f"{folder}_symmetry_groups.txt"

cif_files = sorted([f for f in os.listdir(folder) if f.endswith('.cif') and '_virtual_' in f])
check = SymmetryEquivalenceCheck(angle_tol=0.0001, ltol=0.0001, stol=0.01, vol_tol=0.0001)

structures = [(f, read(os.path.join(folder, f))) for f in cif_files]

# Grouping logic
groups = []
group_map = {}  # file -> group id

for fname, atoms in structures:
    found_group = False
    for i, ref in enumerate(groups):
        if check.compare(atoms, ref[0][1]):
            ref.append((fname, atoms))
            group_map[fname] = i
            found_group = True
            break
    if not found_group:
        groups.append([(fname, atoms)])
        group_map[fname] = len(groups) - 1

# Write output to text file
with open(output_file, 'w') as f:
    f.write("Symmetry-equivalent groups:\n")
    for i, group in enumerate(groups):
        f.write(f"\nGroup {i+1}:\n")
        for fname, _ in group:
            f.write(f"  {fname}\n")

    degeneracy = len(cif_files) - len(groups)
    f.write(f"\nDegeneracy figure: {degeneracy} (out of {len(cif_files)} total cells)\n")

print(f"\nDegeneracy figure: {degeneracy} (out of {len(cif_files)} total cells)\n")
print(f"Written results to: {output_file}")

### Total Energy check (float)

In [138]:
import os
import pandas as pd
from collections import defaultdict

# Settings
folder = '122'  # set this to your folder path
output_file = f"{folder}_energy_groups_float.txt"

# Read CSV
df = pd.read_csv(os.path.join(folder, 'total_energies.csv'))

# Apply tolerance-based rounding
tolerance = 1e-6
df['Rounded Energy'] = df['Total Energy (eV)'].apply(lambda x: round(x / tolerance) * tolerance)

# Group by rounded energy
energy_groups = defaultdict(list)
for _, row in df.iterrows():
    energy_groups[row['Rounded Energy']].append(int(row['virtual-id']))

# Write output
with open(output_file, 'w') as f:
    f.write("Energy-equivalent groups (with tolerance):\n")
    for i, (energy, ids) in enumerate(energy_groups.items()):
        f.write(f"\nGroup {i+1} (Energy ~ {energy:.6f} eV):\n")
        for vid in ids:
            f.write(f"  virtual-id = {vid}\n")

    degeneracy = len(df) - len(energy_groups)
    f.write(f"\nDegeneracy figure: {degeneracy} (out of {len(df)} total entries)\n")

print(f"\nDegeneracy figure: {degeneracy} (out of {len(cif_files)} total cells)\n")
print(f"Written results to: {output_file}")


Degeneracy figure: 299 (out of 400 total cells)

Written results to: 122_energy_groups_float.txt


### Total Energy Check (strings)

In [1]:
import os
import pandas as pd
from collections import defaultdict

# Settings
folder = '123'  # set this to your folder path
output_file = f"{folder}_energy_groups_string.txt"

# Read CSV
df = pd.read_csv(os.path.join(folder, 'total_energies.csv'))

# Convert energy to string and group by exact matches
df['Energy String'] = df['Total Energy (eV)'].astype(str)

# Grouping
energy_groups = defaultdict(list)
for _, row in df.iterrows():
    energy_groups[row['Energy String']].append(row['virtual-id'])

# Write output to text file
with open(output_file, 'w') as f:
    f.write("Energy-equivalent groups (exact match):\n")
    for i, (energy_str, ids) in enumerate(energy_groups.items()):
        f.write(f"\nGroup {i+1} (Energy = {energy_str} eV):\n")
        for vid in ids:
            f.write(f"  virtual-id = {vid}\n")

    degeneracy = len(df) - len(energy_groups)
    f.write(f"\nDegeneracy figure: {degeneracy} (out of {len(df)} total entries)\n")

print(f"\nDegeneracy figure: {degeneracy} (out of {len(cif_files)} total cells)\n")
print(f"Written results to: {output_file}")

NameError: name 'cif_files' is not defined