In [1]:
# Cell 1: Setup (RUN THIS ONCE PER NOTEBOOK SESSION)
import sys
import os
import pandas as pd

# Assuming your notebook is at the project root
project_root = os.getcwd() 
src_path = os.path.join(project_root, 'src')

if src_path not in sys.path:
    sys.path.insert(0, src_path)
    print(f"Added to sys.path: {src_path}")

# Import all your parser modules as you develop them
from parsers import band_parser
from parsers import scalar_property_parser
from parsers import elastic_parser
from parsers import optical_parser # Add as you create them
from parsers import dos_parser
# from parsers import dos_parser
# from parsers import outcar_parser
# from parsers import elastic_parser

print("Setup complete. Parser modules can be imported.")
# Optional:
# %load_ext autoreload
# %autoreload 2

Added to sys.path: c:\Users\E1009134\OneDrive - ams OSRAM\Portfolio_ San\Cs2NaTlBr6-xClx\src
Setup complete. Parser modules can be imported.


In [2]:
# --- Cell 2: Load File Inventory (RUN THIS ONCE AFTER SETUP) ---
try:
    inventory_path = os.path.join(project_root, 'data', 'interim', 'file_inventory.csv')
    inventory_df = pd.read_csv(inventory_path)
    print(f"Successfully loaded inventory from: {inventory_path}")
    if inventory_df.empty:
        print("Warning: Inventory DataFrame is empty.")
    else:
        # Display a sample to ensure it's loaded
        print("\nInventory sample (first 3 rows):")
        print(inventory_df.head(3))
except FileNotFoundError:
    print(f"ERROR: file_inventory.csv not found at {inventory_path}.")
    print("Please ensure '01_unzip_and_inventory.py' has been run successfully.")
    inventory_df = pd.DataFrame() # Prevents errors in cells below if inventory fails to load
except Exception as e:
    print(f"An error occurred loading the inventory: {e}")
    inventory_df = pd.DataFrame()

Successfully loaded inventory from: c:\Users\E1009134\OneDrive - ams OSRAM\Portfolio_ San\Cs2NaTlBr6-xClx\data\interim\file_inventory.csv

Inventory sample (first 3 rows):
                                  file_path_absolute  \
0  C:\Users\E1009134\OneDrive - ams OSRAM\Portfol...   
1  C:\Users\E1009134\OneDrive - ams OSRAM\Portfol...   
2  C:\Users\E1009134\OneDrive - ams OSRAM\Portfol...   

     file_path_relative_to_content_root  material_x_value material_formula  \
0              Cs2NaTlCl6\BAND\BAND.dat                 6       Cs2NaTlCl6   
1              Cs2NaTlCl6\BAND\BAND_GAP                 6       Cs2NaTlCl6   
2  Cs2NaTlCl6\BAND\HIGH_SYMMETRY_POINTS                 6       Cs2NaTlCl6   

  potential_property_type             file_name  
0                    BAND              BAND.dat  
1                    BAND              BAND_GAP  
2                    BAND  HIGH_SYMMETRY_POINTS  


In [3]:
# --- Cell 3: Function to Get File Path from Inventory (HELPER FUNCTION) ---
def get_file_path_from_inventory(x_val, prop_type, file_name_target, inventory=inventory_df):
    """
    Searches the inventory DataFrame for a specific file and returns its absolute path.
    x_val: The material_x_value (e.g., 0, 1, ..., 6)
    prop_type: The potential_property_type (e.g., 'BAND', 'HSE', 'OPTIC')
    file_name_target: The exact file_name (e.g., 'REFORMATTED_BAND.dat', 'BAND_GAP')
    inventory: The loaded inventory DataFrame.
    """
    if inventory.empty:
        print("Inventory is empty. Cannot get file path.")
        return None

    # Handle potential case variations in prop_type from inventory by searching case-insensitively
    # or by ensuring your inventory 'potential_property_type' is standardized.
    # For now, let's assume exact match for prop_type from your inventory's E column.
    
    filtered_files = inventory[
        (inventory['material_x_value'] == x_val) &
        (inventory['potential_property_type'].str.upper() == prop_type.upper()) & # Case-insensitive prop_type
        (inventory['file_name'] == file_name_target)
    ]

    if filtered_files.empty:
        print(f"File not found in inventory for: x={x_val}, prop_type='{prop_type}', file_name='{file_name_target}'")
        return None
    elif len(filtered_files) > 1:
        print(f"Warning: Multiple files found for: x={x_val}, prop_type='{prop_type}', file_name='{file_name_target}'. Using the first one.")
        # You might want to inspect 'filtered_files' here to see why there are duplicates.
    
    # Get the absolute path from the inventory
    file_path = filtered_files.iloc[0]['file_path_absolute']
    
    if os.path.exists(file_path):
        return file_path
    else:
        print(f"ERROR: Path from inventory does not exist on disk: {file_path}")
        return None


In [5]:
# --- Cell 4: Test a SPECIFIC PARSER with a file found via Inventory ---
# ***** THIS IS THE CELL YOU WILL MODIFY MOST OFTEN FOR TESTING *****

# 1. DEFINE WHAT YOU WANT TO TEST:
x_to_test = 6  # e.g., 0 for Cs2NaTlBr6, 6 for Cs2NaTlCl6, 3 for Cl3Br3 etc.
property_folder_to_test = 'BAND'  # e.g., 'BAND', 'HSE', 'OPTIC', 'DOS', 'SR', 'STRUC'
file_name_to_test = 'BAND_GAP' # e.g., 'REFORMATTED_BAND.dat', 'KLABELS', 'BAND_GAP', 'ABSORPTION.dat'
parser_function_to_test = band_parser.parse_band_gap_summary_vaspkit # CHANGE THIS to test different parsers
#elastic_parser_function_to_test = elastic_parser.parse_elastic_info_simple # Example for elastic parser
#optic_parser_function_to_test = optical_parser.parse_vaspkit_optic_file
#dos_parser_function_to_test = dos_parser.parse_dos
# 2. GET THE FILE PATH using our helper function
print(f"\n--- Testing: x={x_to_test}, Folder='{property_folder_to_test}', File='{file_name_to_test}' ---")
test_file_path = get_file_path_from_inventory(
    x_val=x_to_test, 
    prop_type=property_folder_to_test, 
    file_name_target=file_name_to_test
)

# 3. RUN THE PARSER AND INSPECT if the path was found
if test_file_path:
    print(f"Attempting to parse: {test_file_path}")
    # Call the selected parser function
    #parsed_result = parser_function_to_test(test_file_path)
    # If testing elastic parser, you can uncomment the next line
    parsed_result = parser_function_to_test(test_file_path)

    # Inspect the result (assuming it's a DataFrame or a dictionary for scalars)
    if isinstance(parsed_result, pd.DataFrame):
        if not parsed_result.empty:
            print("\nSuccessfully parsed! DataFrame info:")
            parsed_result.info()
            print("\nDataFrame head:")
            print(parsed_result.head(20))
            print(parsed_result.shape)
            # Add more specific checks based on expected DataFrame structure
        else:
            print("\nParser returned an empty DataFrame. Check parser's internal print/error messages.")
    elif isinstance(parsed_result, dict):
        print("\nSuccessfully parsed! Dictionary result:")
        print(parsed_result)
        #print(parsed_result.shape)
    elif isinstance(parsed_result, list):
        print("\nSuccessfully parsed! List result (first 5 items):")
        print(parsed_result[:5])
        print(parsed_result.shape)
    else:
        print("\nParser returned an unexpected data type or None.")
        print(parsed_result)
        #print(parsed_result.shape)
else:
    print("Skipping parser test as file path was not retrieved.")


--- Testing: x=6, Folder='BAND', File='BAND_GAP' ---
Attempting to parse: C:\Users\E1009134\OneDrive - ams OSRAM\Portfolio_ San\Cs2NaTlBr6-xClx\data\raw\HBD_Mixed_anions\Cs2NaTlCl6\BAND\BAND_GAP
  DEBUG -> Matched 'gapfile_band_character' with value: Direct
  DEBUG -> Matched 'gapfile_band_gap_ev' with value: 1.7502
  DEBUG -> Matched 'gapfile_vbm_ev' with value: -0.5668
  DEBUG -> Matched 'gapfile_cbm_ev' with value: 1.1834
  DEBUG -> Matched 'gapfile_fermi_energy_ev' with value: -0.335
  DEBUG -> Matched 'gapfile_homo_lumo_bands' with value: None
DEBUG: Final parsed dictionary for C:\Users\E1009134\OneDrive - ams OSRAM\Portfolio_ San\Cs2NaTlBr6-xClx\data\raw\HBD_Mixed_anions\Cs2NaTlCl6\BAND\BAND_GAP: {'gapfile_band_character': 'Direct', 'gapfile_band_gap_ev': 1.7502, 'gapfile_vbm_ev': -0.5668, 'gapfile_cbm_ev': 1.1834, 'gapfile_fermi_energy_ev': -0.335, 'gapfile_homo_band_idx': 160, 'gapfile_lumo_band_idx': 161}

Successfully parsed! Dictionary result:
{'gapfile_band_character': 'Di