## Init and functions

In [22]:
from pymatgen.core.structure import Structure
import os
import csv
import torch
import matgl
from chgnet.model.model import CHGNet
from chgnet.model import StructOptimizer
from chgnet.model.dynamics import MolecularDynamics

def calculate_properties_and_write_csv(folder_path, output_csv):
    # Load the MEGNet band gap model
    bandgap_model = matgl.load_model("MEGNet-MP-2019.4.1-BandGap-mfi")
    
    # Load the CHGNet model for total energy prediction
    chgnet = CHGNet.load()

    # Initialize data storage
    data = []

    for filename in os.listdir(folder_path):
        if filename.endswith(".cif"):
            filepath = os.path.join(folder_path, filename)
            try:
                # Load the structure
                structure = Structure.from_file(filepath)
                
                # Predict total energy
                total_energy = chgnet.predict_structure(structure)['e']

                # Calculate density and convert to float
                density = float(structure.density)

                # Predict band gaps for different methods
                bandgaps = {}
                for i, method in ((0, "PBE"), (1, "GLLB-SC"), (2, "HSE"), (3, "SCAN")):
                    graph_attrs = torch.tensor([i])
                    bandgap = bandgap_model.predict_structure(structure=structure, state_attr=graph_attrs)
                    bandgaps[method] = float(bandgap)
                
                # Append results to data
                data.append({
                    "File": filename,
                    "Total Energy (eV)": total_energy,
                    "Density": density,
                    "PBE Bandgap (eV)": bandgaps["PBE"],
                    "GLLB-SC Bandgap (eV)": bandgaps["GLLB-SC"],
                    "HSE Bandgap (eV)": bandgaps["HSE"],
                    "SCAN Bandgap (eV)": bandgaps["SCAN"],
                })
                
                print(f"Processed: {filename}")
            
            except Exception as e:
                print(f"Error processing {filename}: {e}")
    
    # Write results to CSV
    with open(output_csv, mode='w', newline='') as csvfile:
        fieldnames = ["File", "Total Energy (eV)", "Density", "PBE Bandgap (eV)", "GLLB-SC Bandgap (eV)", "HSE Bandgap (eV)", "SCAN Bandgap (eV)"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        writer.writeheader()
        writer.writerows(data)

    print(f"Results saved to {output_csv}")

In [24]:
# Example usage
folder_path = "1000-3x"
output_csv = "1000-3x.csv"
calculate_properties_and_write_csv(folder_path, output_csv)

CHGNet v0.3.0 initialized with 412,525 parameters
CHGNet will run on cuda:0




Processed: geller-wernick_supercell_virtual_843_stropt.cif




Processed: geller-wernick_supercell_virtual_464_stropt.cif
Processed: geller-wernick_supercell_virtual_78_stropt.cif




Processed: geller-wernick_supercell_virtual_603_stropt.cif




Processed: geller-wernick_supercell_virtual_738_stropt.cif
Processed: geller-wernick_supercell_virtual_222_stropt.cif
Processed: geller-wernick_supercell_virtual_469_stropt.cif
Processed: geller-wernick_supercell_virtual_433_stropt.cif
Processed: geller-wernick_supercell_virtual_23_stropt.cif
Processed: geller-wernick_supercell_virtual_508_stropt.cif
Processed: geller-wernick_supercell_virtual_351_stropt.cif
Processed: geller-wernick_supercell_virtual_553_stropt.cif
Processed: geller-wernick_supercell_virtual_140_stropt.cif
Processed: geller-wernick_supercell_virtual_667_stropt.cif
Processed: geller-wernick_supercell_virtual_457_stropt.cif
Processed: geller-wernick_supercell_virtual_173_stropt.cif
Processed: geller-wernick_supercell_virtual_566_stropt.cif
Processed: geller-wernick_supercell_virtual_863_stropt.cif
Processed: geller-wernick_supercell_virtual_801_stropt.cif
Processed: geller-wernick_supercell_virtual_629_stropt.cif
Processed: geller-wernick_supercell_virtual_470_stropt.ci



Processed: geller-wernick_supercell_virtual_343_stropt.cif
Processed: geller-wernick_supercell_virtual_999_stropt.cif




Processed: geller-wernick_supercell_virtual_277_stropt.cif
Processed: geller-wernick_supercell_virtual_188_stropt.cif
Processed: geller-wernick_supercell_virtual_324_stropt.cif
Processed: geller-wernick_supercell_virtual_998_stropt.cif
Processed: geller-wernick_supercell_virtual_403_stropt.cif
Processed: geller-wernick_supercell_virtual_984_stropt.cif
Processed: geller-wernick_supercell_virtual_755_stropt.cif
Processed: geller-wernick_supercell_virtual_598_stropt.cif
Processed: geller-wernick_supercell_virtual_735_stropt.cif
Processed: geller-wernick_supercell_virtual_899_stropt.cif
Processed: geller-wernick_supercell_virtual_787_stropt.cif
Processed: geller-wernick_supercell_virtual_772_stropt.cif
Processed: geller-wernick_supercell_virtual_386_stropt.cif
Processed: geller-wernick_supercell_virtual_751_stropt.cif
Processed: geller-wernick_supercell_virtual_677_stropt.cif
Processed: geller-wernick_supercell_virtual_498_stropt.cif
Processed: geller-wernick_supercell_virtual_196_stropt.c



Processed: geller-wernick_supercell_virtual_931_stropt.cif
Processed: geller-wernick_supercell_virtual_972_stropt.cif
Processed: geller-wernick_supercell_virtual_24_stropt.cif
Processed: geller-wernick_supercell_virtual_502_stropt.cif
Processed: geller-wernick_supercell_virtual_696_stropt.cif
Processed: geller-wernick_supercell_virtual_276_stropt.cif
Processed: geller-wernick_supercell_virtual_753_stropt.cif
Processed: geller-wernick_supercell_virtual_572_stropt.cif
Processed: geller-wernick_supercell_virtual_206_stropt.cif
Processed: geller-wernick_supercell_virtual_158_stropt.cif
Processed: geller-wernick_supercell_virtual_741_stropt.cif
Processed: geller-wernick_supercell_virtual_7_stropt.cif
Processed: geller-wernick_supercell_virtual_197_stropt.cif
Processed: geller-wernick_supercell_virtual_706_stropt.cif
Processed: geller-wernick_supercell_virtual_969_stropt.cif
Processed: geller-wernick_supercell_virtual_958_stropt.cif
Processed: geller-wernick_supercell_virtual_229_stropt.cif




Processed: geller-wernick_supercell_virtual_337_stropt.cif
Processed: geller-wernick_supercell_virtual_65_stropt.cif
Processed: geller-wernick_supercell_virtual_354_stropt.cif
Processed: geller-wernick_supercell_virtual_691_stropt.cif
Processed: geller-wernick_supercell_virtual_246_stropt.cif
Processed: geller-wernick_supercell_virtual_764_stropt.cif
Processed: geller-wernick_supercell_virtual_985_stropt.cif
Processed: geller-wernick_supercell_virtual_918_stropt.cif
Processed: geller-wernick_supercell_virtual_344_stropt.cif
Processed: geller-wernick_supercell_virtual_993_stropt.cif
Processed: geller-wernick_supercell_virtual_848_stropt.cif
Processed: geller-wernick_supercell_virtual_513_stropt.cif
Processed: geller-wernick_supercell_virtual_275_stropt.cif
Processed: geller-wernick_supercell_virtual_861_stropt.cif
Processed: geller-wernick_supercell_virtual_701_stropt.cif
Processed: geller-wernick_supercell_virtual_453_stropt.cif
Processed: geller-wernick_supercell_virtual_549_stropt.ci



Processed: geller-wernick_supercell_virtual_442_stropt.cif
Processed: geller-wernick_supercell_virtual_153_stropt.cif
Processed: geller-wernick_supercell_virtual_338_stropt.cif
Processed: geller-wernick_supercell_virtual_471_stropt.cif
Processed: geller-wernick_supercell_virtual_260_stropt.cif
Processed: geller-wernick_supercell_virtual_943_stropt.cif
Processed: geller-wernick_supercell_virtual_625_stropt.cif
Processed: geller-wernick_supercell_virtual_537_stropt.cif
Processed: geller-wernick_supercell_virtual_690_stropt.cif
Processed: geller-wernick_supercell_virtual_811_stropt.cif
Processed: geller-wernick_supercell_virtual_87_stropt.cif
Processed: geller-wernick_supercell_virtual_699_stropt.cif
Processed: geller-wernick_supercell_virtual_522_stropt.cif
Processed: geller-wernick_supercell_virtual_803_stropt.cif
Processed: geller-wernick_supercell_virtual_329_stropt.cif
Processed: geller-wernick_supercell_virtual_4_stropt.cif
Processed: geller-wernick_supercell_virtual_407_stropt.cif


In [1]:
import virp.matprop as mprop

# Example usage
folder_path = "test"
output_csv = "test.csv"
mprop.predict_properties(folder_path, output_csv)

  _check_ver(cls_, v)  # Check version of any subclasses too.


CHGNet v0.3.0 initialized with 412,525 parameters
CHGNet will run on cpu


  struct = parser.parse_structures(primitive=primitive)[0]


Error processing geller-wernick_supercell_virtual_0_stropt.cif: Buffer dtype mismatch, expected 'const int64_t' but got 'long'


  struct = parser.parse_structures(primitive=primitive)[0]


Error processing geller-wernick_supercell_virtual_1_stropt.cif: Buffer dtype mismatch, expected 'const int64_t' but got 'long'
Error processing geller-wernick_supercell_virtual_2_stropt.cif: Buffer dtype mismatch, expected 'const int64_t' but got 'long'
Error processing geller-wernick_supercell_virtual_3_stropt.cif: Buffer dtype mismatch, expected 'const int64_t' but got 'long'
Error processing geller-wernick_supercell_virtual_4_stropt.cif: Buffer dtype mismatch, expected 'const int64_t' but got 'long'


  struct = parser.parse_structures(primitive=primitive)[0]


Error processing geller-wernick_supercell_virtual_5_stropt.cif: Buffer dtype mismatch, expected 'const int64_t' but got 'long'
Results saved to test.csv


## Postprocessing

In [6]:
import pandas as pd
import numpy as np

def expectation_value(csv_path, temperature, properties):
    """
    Calculate Boltzmann-weighted expectation values for specified properties
    
    Args:
        csv_path (str): Path to CSV file
        temperature (float): Temperature in Kelvin
        properties (list): List of column names to calculate expectation values for
        
    Returns:
        tuple: (DataFrame, total_weights, dictionary of expectation values)
    """
    # Read the CSV file
    df = pd.read_csv(csv_path)
    
    # Boltzmann constant in eV/K = 0.00008617
    k_B = 0.00008617
    
    # Calculate weights using the Boltzmann distribution formula
    df['weights'] = np.exp(-df['Total Energy (eV)']/(k_B * temperature))
    
    # Calculate total weights
    total_weights = df['weights'].sum()
    
    # Dictionary to store expectation values
    expectation_values = {}
    
    # Calculate weighted properties and their expectation values
    for prop in properties:
        weighted_col_name = f'weighted_{prop}'
        df[weighted_col_name] = (df[prop] * df['weights']) / total_weights
        expectation_values[prop] = df[weighted_col_name].sum()
    
    return df, expectation_values

# Example usage
if __name__ == "__main__":
    # Replace with your CSV file path
    csv_file = "400-2x.csv"
    temperature = 300  # Set your desired temperature in Kelvin
    
    # List of properties to calculate expectation values for
    properties_list = ["Density", "GLLB-SC Bandgap (eV)", "HSE Bandgap (eV)"]
    
    df, exp_values = expectation_value(csv_file, temperature, properties_list)
    
    print("First few rows of the dataframe with weights and weighted properties:")
    print(df.head())
    print("\nTotal weights:", total_weights)
    print("\nExpectation values:")
    for prop, value in exp_values.items():
        print(f"{prop}: {value:.6f}")

First few rows of the dataframe with weights and weighted properties:
                                              File  Total Energy (eV)  \
0   geller-wernick_supercell_virtual_78_stropt.cif          -3.581327   
1  geller-wernick_supercell_virtual_222_stropt.cif          -3.584575   
2   geller-wernick_supercell_virtual_23_stropt.cif          -3.585701   
3  geller-wernick_supercell_virtual_351_stropt.cif          -3.585306   
4  geller-wernick_supercell_virtual_140_stropt.cif          -3.586442   

    Density  PBE Bandgap (eV)  GLLB-SC Bandgap (eV)  HSE Bandgap (eV)  \
0  7.002917         -0.007729              0.569314          0.004754   
1  7.033245         -0.007371              0.562357          0.012788   
2  6.914358         -0.007772              0.523779         -0.008054   
3  7.063052         -0.007512              0.565617          0.016543   
4  7.065182         -0.007460              0.569464          0.021321   

   SCAN Bandgap (eV)       weights  weighted_Density

In [5]:
exp_values

{'Density': 7.022685680031463,
 'GLLB-SC Bandgap (eV)': 0.5593840296483945,
 'HSE Bandgap (eV)': 0.014280765195099993}

In [9]:
import pandas as pd
import numpy as np

def expectation_value(csv_path, temperature):
    """
    Calculate Boltzmann-weighted expectation values for all numeric properties
    
    Args:
        csv_path (str): Path to CSV file
        temperature (float): Temperature in Kelvin
        
    Returns:
        tuple: (DataFrame, dictionary of expectation values)
    """
    # Read the CSV file
    df = pd.read_csv(csv_path)
    
    # Boltzmann constant in eV/K = 0.00008617
    k_B = 0.00008617
    
    # Calculate weights using the Boltzmann distribution formula
    df['weights'] = np.exp(-df['Total Energy (eV)']/(k_B * temperature))
    
    # Calculate total weights
    total_weights = df['weights'].sum()
    
    # Dictionary to store expectation values
    expectation_values = {}
    
    # Get all numeric columns except 'Total Energy (eV)' and 'weights'
    excluded_cols = ['File', 'Total Energy (eV)', 'weights']
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    properties = [col for col in numeric_cols if col not in excluded_cols]
    
    # Calculate weighted properties and their expectation values
    for prop in properties:
        weighted_col_name = f'weighted_{prop}'
        df[weighted_col_name] = (df[prop] * df['weights']) / total_weights
        expectation_values[prop] = df[weighted_col_name].sum()
    
    return df, expectation_values

# Example usage
if __name__ == "__main__":
    # Replace with your CSV file path
    csv_file = "400-2x.csv"
    temperature = 300  # Set your desired temperature in Kelvin
    
    df, exp_values = expectation_value(csv_file, temperature)
    
    print("First few rows of the dataframe with weights and weighted properties:")
    print(df.head())
    print("\nExpectation values:")
    for prop, value in exp_values.items():
        print(f"{prop}: {value:.6f}")

First few rows of the dataframe with weights and weighted properties:
                                              File  Total Energy (eV)  \
0   geller-wernick_supercell_virtual_78_stropt.cif          -3.581327   
1  geller-wernick_supercell_virtual_222_stropt.cif          -3.584575   
2   geller-wernick_supercell_virtual_23_stropt.cif          -3.585701   
3  geller-wernick_supercell_virtual_351_stropt.cif          -3.585306   
4  geller-wernick_supercell_virtual_140_stropt.cif          -3.586442   

    Density  PBE Bandgap (eV)  GLLB-SC Bandgap (eV)  HSE Bandgap (eV)  \
0  7.002917         -0.007729              0.569314          0.004754   
1  7.033245         -0.007371              0.562357          0.012788   
2  6.914358         -0.007772              0.523779         -0.008054   
3  7.063052         -0.007512              0.565617          0.016543   
4  7.065182         -0.007460              0.569464          0.021321   

   SCAN Bandgap (eV)       weights  weighted_Density

In [10]:
exp_values

{'Density': 7.022685680031463,
 'PBE Bandgap (eV)': -0.007460182489153363,
 'GLLB-SC Bandgap (eV)': 0.5593840296483945,
 'HSE Bandgap (eV)': 0.014280765195099993,
 'SCAN Bandgap (eV)': -0.00607662847607184}

In [12]:
from virp.matprop import expectation_values

# Example usage
if __name__ == "__main__":
    # Replace with your CSV file path
    csv_file = "400-2x.csv"
    temperature = 300  # Set your desired temperature in Kelvin
    
    df, exp_values = expectation_values(csv_file, temperature)
    
    print("First few rows of the dataframe with weights and weighted properties:")
    print(df.head())
    print("\nExpectation values:")
    for prop, value in exp_values.items():
        print(f"{prop}: {value:.6f}")

First few rows of the dataframe with weights and weighted properties:
                                              File  Total Energy (eV)  \
0   geller-wernick_supercell_virtual_78_stropt.cif          -3.581327   
1  geller-wernick_supercell_virtual_222_stropt.cif          -3.584575   
2   geller-wernick_supercell_virtual_23_stropt.cif          -3.585701   
3  geller-wernick_supercell_virtual_351_stropt.cif          -3.585306   
4  geller-wernick_supercell_virtual_140_stropt.cif          -3.586442   

    Density  PBE Bandgap (eV)  GLLB-SC Bandgap (eV)  HSE Bandgap (eV)  \
0  7.002917         -0.007729              0.569314          0.004754   
1  7.033245         -0.007371              0.562357          0.012788   
2  6.914358         -0.007772              0.523779         -0.008054   
3  7.063052         -0.007512              0.565617          0.016543   
4  7.065182         -0.007460              0.569464          0.021321   

   SCAN Bandgap (eV)       weights  weighted_Density