In [3]:
import os
import rarfile # For handling .rar archives
from pymatgen.core import Structure
from pymatgen.io.vasp.inputs import Poscar # For parsing POSCAR content
from typing import List, Dict, Optional

# --- Definition of parse_single_poscar_content function (from previous response) ---
def parse_single_poscar_content(poscar_string_content: str, original_filename: str) -> Optional[Dict]:
    """
    Parses the string content of a single POSCAR file.
    Args:
        poscar_string_content (str): The string content of the POSCAR file.
        original_filename (str): The original filename (for context in messages).
    Returns:
        Optional[Dict]: A dictionary with parsed structure information or None if parsing fails.
    """
    try:
        if not poscar_string_content.strip():
            # print(f"Info: Content of '{original_filename}' is empty. Skipping.")
            return None
            
        poscar_obj = Poscar.from_string(poscar_string_content)
        structure = poscar_obj.structure

        return {
            "filename_in_archive": original_filename,
            "comment": poscar_obj.comment,
            "formula": structure.composition.reduced_formula,
            "natoms": len(structure),
            "lattice_vectors": structure.lattice.matrix.tolist(),
            "sites": [{"species": site.species_string,
                       "xyz_frac": site.frac_coords.tolist(),
                       "xyz_cart": site.coords.tolist()}
                      for site in structure.sites],
            "selective_dynamics": poscar_obj.selective_dynamics,
            "velocities": poscar_obj.velocities.tolist() if poscar_obj.velocities is not None else None,
            "pymatgen_structure": structure
        }
    except Exception as e:
        print(f"Error parsing content from '{original_filename}': {e}")
        return None

# --- Definition of process_rar_archive_for_poscars function (from previous response) ---
def process_rar_archive_for_poscars(archive_filepath: str, parse_only_first_poscar: bool = False) -> List[Dict]:
    """
    Extracts and parses POSCAR files from a .rar archive.
    Args:
        archive_filepath (str): Path to the .rar file.
        parse_only_first_poscar (bool): If True, stops after successfully parsing the first POSCAR found.
    Returns:
        List[Dict]: A list of dictionaries, each containing parsed data for one POSCAR.
    """
    parsed_structures_data = []
    
    if not os.path.exists(archive_filepath):
        print(f"Error: Archive file not found at '{archive_filepath}'")
        return parsed_structures_data

    try:
        with rarfile.RarFile(archive_filepath, 'r') as rf:
            print(f"Opened archive: '{archive_filepath}'. Contains {len(rf.namelist())} total entries.")
            
            processed_poscar_count = 0
            member_infos = rf.infolist() 

            for member_info in member_infos:
                filename_in_archive = member_info.filename
                
                if member_info.is_dir():
                    continue

                is_potential_poscar = (
                    "poscar" in filename_in_archive.lower() or
                    "contcar" in filename_in_archive.lower() or
                    filename_in_archive.lower().endswith(('.vasp', '.pos')) or
                    '.' not in os.path.basename(filename_in_archive) 
                )

                if not is_potential_poscar:
                    continue
                
                print(f"Attempting to extract and parse: '{filename_in_archive}'")
                
                try:
                    file_content_bytes = rf.read(member_info)
                    try:
                        file_content_string = file_content_bytes.decode('utf-8')
                    except UnicodeDecodeError:
                        file_content_string = file_content_bytes.decode('latin-1', errors='ignore')
                    
                    parsed_data = parse_single_poscar_content(file_content_string, filename_in_archive)
                    
                    if parsed_data:
                        parsed_structures_data.append(parsed_data)
                        processed_poscar_count += 1
                        if parse_only_first_poscar:
                            print(f"Successfully parsed the first POSCAR ('{filename_in_archive}'). Stopping as requested.")
                            return parsed_structures_data 
                            
                except Exception as e:
                    print(f"An unexpected error occurred while processing entry '{filename_in_archive}': {e}")

    except rarfile.NeedFirstVolume:
        print(f"Error: '{archive_filepath}' appears to be part of a multi-volume RAR archive.")
    except rarfile.BadRarFile:
        print(f"Error: '{archive_filepath}' is not a valid RAR file or is corrupted.")
    except Exception as e:
        print(f"Failed to open or process RAR archive '{archive_filepath}': {e}")
        print("Please ensure the 'unrar' utility is installed and accessible.")

    return parsed_structures_data


if __name__ == '__main__':
    # IMPORTANT: Replace this with the actual path to YOUR .rar file
    your_rar_file_path = "/Users/abiralshakya/Downloads/total_poscar_files.rar"

    if not os.path.exists(your_rar_file_path) or your_rar_file_path == "path/to/your/actual/file.rar":
        print(f"Please update the 'your_rar_file_path' variable in the script with the correct path to your .rar file.")
        # Create a dummy .rar for demonstration if the user hasn't set their path
        # This part is tricky as creating .rar programmatically is hard.
        # For now, we'll just print a message.
        print("Skipping execution as a valid .rar path is not provided.")
    else:
        print(f"--- Processing YOUR RAR file ('{your_rar_file_path}') to get ONLY THE FIRST POSCAR structure ---")
        
        # Call the function with parse_only_first_poscar=True
        first_structure_info_list = process_rar_archive_for_poscars(your_rar_file_path, parse_only_first_poscar=True)

        if first_structure_info_list: # This list should contain at most one item
            info = first_structure_info_list[0]
            print(f"\n--- Successfully Parsed First POSCAR From Your Archive ---")
            print(f"Original Filename in Archive: {info['filename_in_archive']}")
            print(f"Comment: {info['comment']}")
            print(f"Formula: {info['formula']}")
            print(f"Number of atoms: {info['natoms']}")
            print(f"Lattice Vectors (first vector): {info['lattice_vectors'][0]}")
            print(f"First site: {info['sites'][0]['species']} @ {info['sites'][0]['xyz_frac']}")
            if info.get('selective_dynamics'):
                print(f"Selective dynamics present for first atom (example): {info['selective_dynamics'][0] if info['selective_dynamics'] else 'N/A'}")
        elif len(first_structure_info_list) == 0:
            print("No POSCAR structures were successfully parsed from the archive, or the first attempted file was not a valid POSCAR.")

--- Processing YOUR RAR file ('/Users/abiralshakya/Downloads/total_poscar_files.rar') to get ONLY THE FIRST POSCAR structure ---
Opened archive: '/Users/abiralshakya/Downloads/total_poscar_files.rar'. Contains 33808 total entries.
Attempting to extract and parse: 'total_poscar_files/1.POSCAR'
An unexpected error occurred while processing entry 'total_poscar_files/1.POSCAR': Failed the read enough data: req=2336 got=51
Attempting to extract and parse: 'total_poscar_files/10000.POSCAR'
An unexpected error occurred while processing entry 'total_poscar_files/10000.POSCAR': Failed the read enough data: req=438 got=51
Attempting to extract and parse: 'total_poscar_files/100001.POSCAR'
An unexpected error occurred while processing entry 'total_poscar_files/100001.POSCAR': Failed the read enough data: req=993 got=51
Attempting to extract and parse: 'total_poscar_files/100002.POSCAR'
An unexpected error occurred while processing entry 'total_poscar_files/100002.POSCAR': Failed the read enough d

KeyboardInterrupt: 