# Function to eval total energies (for degeneracy); Initial runs

In [29]:
from virp import PermutativeFill
from pymatgen.core.structure import Structure
from chgnet.model.model import CHGNet
import warnings
from pathlib import Path
import pandas as pd
import numpy as np

# Load CHGNET zeropoint
model = CHGNet.load()

def CIFSupercell (inputcif, outputcif, supercellsize, verbose = True):
    # inputcif, outputcif: path to cif file
    # supercellsize: vector of 3 integers

    # Load the structure from a CIF file
    structure = Structure.from_file(inputcif)

    # Define the scaling matrix for the supercell
    # For example, [2, 0, 0], [0, 2, 0], [0, 0, 2] creates a 2x2x2 supercell
    scaling_matrix = [[supercellsize[0], 0, 0], 
                      [0, supercellsize[1], 0], 
                      [0, 0, supercellsize[2]]]

    # Create the supercell
    structure.make_supercell(scaling_matrix)

    # Save the supercell to a new CIF file (optional)
    structure.to(fmt="cif", filename=outputcif)
    if verbose: print("Supercell created and saved as ", outputcif)


def EquivalentStructures(csv_path):
    # Read CSV and sort by FormationEnergy
    df = pd.read_csv(csv_path)
    number_rows = df.shape[0]
    df_sorted = df.sort_values(by='Total Energy (eV)').reset_index(drop=True)
    
    # Convert FormationEnergy values to strings
    energy_strings = df_sorted['Total Energy (eV)'].astype(str)
    
    # Count exact string matches between adjacent terms
    overlaps = sum(energy_strings.iloc[i] == energy_strings.iloc[i+1] for i in range(len(energy_strings)-1))
    percentage = 100*overlaps/number_rows
    
    print(f" - Number of redundant structures: {overlaps}/{number_rows} ({percentage:.2f}%)")
    return overlaps

def SVC(input_cif, supercell, fname, sample_size=400, chgnetmodel = model):
    """
    Given a disordered .cif file, create an output folder
    containing a number (sample_size) of virtual cells
    
    Args:
        input_cif (str): Path to .cif (disordered)
        supercell [int,int,int]: multiplicity of supercell
        sample_size (int): Number of virtual cells to generate (default is 400)
        
    Returns:
        void
    """

    # Suppress warnings in this block
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        # Init data
        data = []

        # Make output folder directory
        Path(fname).mkdir(exist_ok=True)  # `exist_ok=True` avoids errors if the directory exists.

        # Make the supercell
        sc_file = Path(fname) / (str(Path(fname)) + "_supercell.cif")
        CIFSupercell(input_cif, sc_file, supercell, verbose = False)

        # Execution
        for i in range(sample_size):
            # Permutative fill only, no structure optimization
            pfill_file = Path(fname) / Path("_virtual_"+str(i)+".cif")
            PermutativeFill(sc_file, pfill_file, verbose = False)
            print(f"\rGenerating virtual cell #{i} ({i+1}/{sample_size})", end="", flush=True)

            # Predict total energy
            structure = Structure.from_file(pfill_file)
            prediction = model.predict_structure(structure)
            total_energy = prediction["e"]  # Energy in eV

            # data append
            data.append({
                "virtual-id": i,
                "Total Energy (eV)": total_energy,
            })

        # Create a DataFrame
        df = pd.DataFrame(data)

        # Save the DataFrame to a CSV file
        output_file = Path(fname) / "total_energies.csv"
        df.to_csv(output_file, index = False)

        # Perform enumeration
        overlaps = EquivalentStructures(output_file)
        
        with open(Path(fname) / "_JOBDONE", 'w') as file: pass # make an empty file signalling completion

        return overlaps

CHGNet v0.3.0 initialized with 412,525 parameters
CHGNet will run on cuda


# Main Body of Code (serious runs)

In [8]:
brass_file = "197803-brass-Cu62Zn38.cif"

In [34]:
def brass_degenerate(name, sc):
    overlaps = []
    for i in range(30):
        ol = SVC(brass_file, sc, name, sample_size=400, chgnetmodel = model) 
        overlaps.append(ol)
    npol = np.array(overlaps)
    print("Supercell:", name, "Mean: ", np.mean(npol), "/ std: ", np.std(npol))
    return np.mean(npol), np.std(npol))

In [None]:
name = "111"
sc = [1,1,1]
mean_111, std_111 = brass_degenerate(name, sc)

Generating virtual cell #399 (400/400) - Number of redundant structures: 398/400 (99.50%)
Generating virtual cell #399 (400/400) - Number of redundant structures: 398/400 (99.50%)
Generating virtual cell #399 (400/400) - Number of redundant structures: 398/400 (99.50%)
Generating virtual cell #399 (400/400) - Number of redundant structures: 398/400 (99.50%)
Generating virtual cell #399 (400/400) - Number of redundant structures: 398/400 (99.50%)
Generating virtual cell #399 (400/400) - Number of redundant structures: 398/400 (99.50%)
Generating virtual cell #399 (400/400) - Number of redundant structures: 398/400 (99.50%)
Generating virtual cell #399 (400/400) - Number of redundant structures: 398/400 (99.50%)
Generating virtual cell #358 (359/400)

In [None]:
name = "211"
sc = [2,1,1]
mean_211, std_211 = brass_degenerate(name, sc)

In [None]:
name = "221"
sc = [2,2,1]
mean_221, std_221 = brass_degenerate(name, sc)

In [None]:
name = "222"
sc = [2,2,2]
mean_222, std_222 = brass_degenerate(name, sc)

In [None]:
name = "322"
sc = [3,2,2]
mean_322, std_322 = brass_degenerate(name, sc)

In [None]:
name = "332"
sc = [3,3,2]
mean_332, std_332 = brass_degenerate(name, sc)

In [None]:
name = "333"
sc = [3,3,3]
mean_333, std_333 = brass_degenerate(name, sc)

In [None]:
name = "123"
sc = [1,2,3]
mean_123, std_123 = brass_degenerate(name, sc)

In [None]:
name = "113"
sc = [1,1,3]
mean_113, std_113 = brass_degenerate(name, sc)