# See matches from grouplist

### 1. Total Energy (string) comparison

In [23]:
import re
import itertools
import csv

# Load from a text file or paste the raw text here
with open("123_energy_groups_string.txt", "r") as f:
    text = f.read()

# Match each group
group_blocks = re.findall(r"Group \d+ \(Energy = .*?eV\):\n(.*?)(?=\nGroup|\Z)", text, re.DOTALL)

pairs = []

for block in group_blocks:
    # Extract all virtual-ids in this group
    ids = list(map(int, re.findall(r"virtual-id\s*=\s*(\d+)", block)))
    if len(ids) > 1:
        # Get all combinations of pairs
        group_pairs = list(itertools.combinations(ids, 2))
        pairs.extend(group_pairs)

# Save to CSV (two columns, no header)
with open("123_energy_groups_string_pairs.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerows(pairs)


### 2. ASE symmetry equivalence comparison

In [33]:
import re
import itertools
import csv

# Load the file
with open("123_symmetry_groups.txt", "r") as f:
    text = f.read()

# Match group blocks: "Group N:" followed by lines starting with "  _virtual_###.cif"
group_blocks = re.findall(r"Group \d+:\n(.*?)(?=\nGroup|\nDegeneracy|\Z)", text, re.DOTALL)

pairs = set()

for block in group_blocks:
    # Extract virtual IDs as integers
    ids = sorted(map(int, re.findall(r"_virtual_(\d+)\.cif", block)))
    if len(ids) > 1:
        # Generate all unique ordered pairs (n1 < n2)
        for n1, n2 in itertools.combinations(ids, 2):
            pairs.add((n1, n2))

# Sort final pairs for consistency
pairs = sorted(pairs)

# Write to CSV (two columns, no header)
with open("123_symmetry_groups_pairs.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerows(pairs)


# Stats for comparing between the two methods

In [35]:
import csv

def load_pairs(filename):
    with open(filename, "r") as f:
        reader = csv.reader(f)
        return set(tuple(sorted(map(int, row))) for row in reader)

# Load sets
pairs_A = load_pairs("123_energy_groups_string_pairs.csv")
pairs_B = load_pairs("123_symmetry_groups_pairs.csv")

# Calculate differences and intersection
only_in_A = pairs_A - pairs_B
only_in_B = pairs_B - pairs_A
intersection = pairs_A & pairs_B
union = pairs_A | pairs_B

# Report
print(f"Total pairs in A (energy): {len(pairs_A)}")
print(f"Total pairs in B (symmetry): {len(pairs_B)}")
print(f"Pairs in both (A ∩ B): {len(intersection)}")
print(f"Pairs only in A: {len(only_in_A)}")
print(f"Pairs only in B: {len(only_in_B)}")
print(f"Jaccard index: {len(intersection) / len(union) * 100:.2f}%")

# Optional: print the actual differences
print("\nSome pairs only in A:")
for p in list(only_in_A)[:10]:
    print(p)

print("\nSome pairs only in B:")
for p in list(only_in_B)[:10]:
    print(p)


Total pairs in A (energy): 15
Total pairs in B (symmetry): 13
Pairs in both (A ∩ B): 10
Pairs only in A: 5
Pairs only in B: 3
Jaccard index: 55.56%

Some pairs only in A:
(66, 271)
(94, 168)
(130, 182)
(107, 369)
(114, 154)

Some pairs only in B:
(94, 325)
(135, 381)
(104, 292)
