In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import tqdm

# Import v2 modules with abbreviated names
import src.data_management_v2 as dm
from src.structure_manager_v2 import StructureManager
import src.rdf_v2 as rdf
import src.ensembles_v2 as ens
import src.Birch_Murnaghan2 as bm
import src.entropy as ent
import quests

# Set random seed for reproducibility
np.random.seed(42)

folder_path = "data/vasps"
vasp_files = [f for f in os.listdir(folder_path) if f.endswith('.vasp')]
struct_ids = [f.replace('.vasp', '') for f in vasp_files]
struct_ids.sort()

['SiO2_24_0',
 'SiO2_24_1',
 'SiO2_24_10',
 'SiO2_24_100',
 'SiO2_24_1000',
 'SiO2_24_1001',
 'SiO2_24_1002',
 'SiO2_24_1003',
 'SiO2_24_1004',
 'SiO2_24_1005',
 'SiO2_24_1006',
 'SiO2_24_1007',
 'SiO2_24_1009',
 'SiO2_24_101',
 'SiO2_24_1010',
 'SiO2_24_1011',
 'SiO2_24_1012',
 'SiO2_24_1013',
 'SiO2_24_1014',
 'SiO2_24_1015',
 'SiO2_24_1016',
 'SiO2_24_1017',
 'SiO2_24_1018',
 'SiO2_24_1019',
 'SiO2_24_102',
 'SiO2_24_1020',
 'SiO2_24_1021',
 'SiO2_24_1022',
 'SiO2_24_1023',
 'SiO2_24_1024',
 'SiO2_24_1025',
 'SiO2_24_1026',
 'SiO2_24_1027',
 'SiO2_24_1028',
 'SiO2_24_1029',
 'SiO2_24_103',
 'SiO2_24_1030',
 'SiO2_24_1031',
 'SiO2_24_1032',
 'SiO2_24_1033',
 'SiO2_24_1034',
 'SiO2_24_1035',
 'SiO2_24_1036',
 'SiO2_24_1037',
 'SiO2_24_1038',
 'SiO2_24_1039',
 'SiO2_24_104',
 'SiO2_24_1041',
 'SiO2_24_1042',
 'SiO2_24_1043',
 'SiO2_24_1044',
 'SiO2_24_1045',
 'SiO2_24_1046',
 'SiO2_24_1047',
 'SiO2_24_1048',
 'SiO2_24_1049',
 'SiO2_24_105',
 'SiO2_24_1050',
 'SiO2_24_1051',
 'SiO2_24_1

In [6]:
# 1. Energies - Extract energies from VASP files
dm.populate_energies(struct_ids, folder_path="data/vasps")

# 2. Structures - Convert VASP to pymatgen structures
dm.populate_structures(struct_ids, folder_path="data/vasps")

# 3. Densities - Calculate number densities
dm.populate_densities(struct_ids, folder_path="data/vasps")

# 4. RDFs - Radial distribution functions
dm.populate_rdfs(struct_ids, folder_path="data/vasps", r_range=(0, 10), bins=500)

# 5. Counting Functions
dm.populate_counting_functions(struct_ids, folder_path="data/vasps", r_range=(0, 10), bins=200)

# 6. Qn Distributions - Silicon coordination environments
dm.populate_qn_distributions(struct_ids, folder_path="data/vasps")

# 7. Bond Angle Distributions - Si-O-Si angles
dm.populate_bond_angle_distributions(struct_ids, folder_path="data/vasps", angle_range=(60, 180), bins=120)

# 8. Ring Statistics
dm.populate_ring_statistics(struct_ids, folder_path="data/vasps", max_ring_size=20)

# 9. Smooth RDFs (optional - smooths already-computed RDFs)
dm.smooth_all_rdfs(w=0.03, overwrite=False)


Populating energies: 100%|██████████| 3125/3125 [00:05<00:00, 541.93it/s]
Populating structures: 100%|██████████| 3125/3125 [00:04<00:00, 649.02it/s]
Calculating densities: 100%|██████████| 3125/3125 [00:04<00:00, 761.08it/s]
Populating RDFs: 100%|██████████| 3125/3125 [11:15<00:00,  4.63it/s]
Populating counting functions: 100%|██████████| 3125/3125 [09:06<00:00,  5.72it/s]
Calculating Qn distributions: 100%|██████████| 3125/3125 [00:44<00:00, 70.08it/s]
Calculating bond angle distributions: 100%|██████████| 3125/3125 [00:30<00:00, 103.34it/s]
Calculating ring statistics: 100%|██████████| 3125/3125 [00:40<00:00, 77.27it/s]


Found 12500 RDF files to smooth


Smoothing RDFs: 100%|██████████| 12500/12500 [06:05<00:00, 34.23it/s]


Smoothing complete!
Processed: 12500
Skipped (already exist): 0
Errors: 0
Smoothed RDFs saved to: data/smooth_rdfs





In [7]:
import shutil

def copy_and_rename(source_path, destination_path):
    """
    Copy a file from source to destination.
    
    Parameters:
    -----------
    source_path : str
        Path to the source file
    destination_path : str
        Path where the file should be copied (including new filename)
    """
    try:
        shutil.copy2(source_path, destination_path)
        print(f"Copied: {source_path} -> {destination_path}")
        return True
    except FileNotFoundError:
        print(f"Skipping: {source_path} not found")
        return False
    except Exception as e:
        print(f"Error: {e}")
        return False


# Example: Copy all files from 3k_poscar to another directory with new names
source_dir = "/Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/3k_poscar"
dest_dir = "/Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/vasps"

# Make sure destination directory exists
os.makedirs(dest_dir, exist_ok=True)

# Copy specific files
for i in range(3000):  # or whatever range you need
    source_file = f"{source_dir}/{i}.vasp"
    dest_file = f"{dest_dir}/SiO2_24_{i}.vasp"  # Change naming pattern as needed
    copy_and_rename(source_file, dest_file)


Copied: /Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/3k_poscar/0.vasp -> /Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/vasps/SiO2_24_0.vasp
Copied: /Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/3k_poscar/1.vasp -> /Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/vasps/SiO2_24_1.vasp
Copied: /Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/3k_poscar/2.vasp -> /Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/vasps/SiO2_24_2.vasp
Copied: /Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/3k_poscar/3.vasp -> /Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/vasps/SiO2_24_3.vasp
Copied: /Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/3k_poscar/4.vasp -> /Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/vasps/SiO2_24_4.vasp
Copied: /Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/3k_poscar/

In [None]:
def convert_results_to_vasp(results_file_path, output_path):
    """
    Convert a QE results file to VASP POSCAR format.
    
    Parameters:
    -----------
    results_file_path : str
        Path to the results file (e.g., results_SiO2_36_0_iteration3)
    output_path : str
        Path where the .vasp file should be saved
    """
    with open(results_file_path, 'r') as f:
        lines = f.readlines()
    
    # Extract energy (first occurrence after "Total Energy")
    energy = None
    for line in lines:
        if "Total Energy" in line:
            # Energy is on the same line after the colon
            energy = float(line.split(':')[1].strip())
            break
    
    # Extract cell parameters
    cell_start = None
    for i, line in enumerate(lines):
        if "CELL_PARAMETERS" in line:
            cell_start = i + 1
            break
    
    cell_matrix = []
    for i in range(3):
        values = [float(x) for x in lines[cell_start + i].split()]
        cell_matrix.append(values)
    cell_matrix = np.array(cell_matrix)
    
    # Extract atomic positions
    pos_start = None
    for i, line in enumerate(lines):
        if "ATOMIC_POSITIONS" in line:
            pos_start = i + 1
            break
    
    # Read all atomic positions
    atoms = []
    positions = []
    i = pos_start
    while i < len(lines) and lines[i].strip() and not lines[i].startswith('='):
        parts = lines[i].split()
        if len(parts) >= 4:
            atoms.append(parts[0])
            positions.append([float(parts[1]), float(parts[2]), float(parts[3])])
        i += 1
    
    positions = np.array(positions)
    
    # Count atoms by type
    unique_atoms = []
    atom_counts = []
    for atom in atoms:
        if atom not in unique_atoms:
            unique_atoms.append(atom)
            atom_counts.append(atoms.count(atom))
    
    # Write VASP file
    with open(output_path, 'w') as f:
        # Line 1: Comment with energy
        f.write(f"Generated from results file, Energy = {energy:.8f} Ry\n")
        
        # Line 2: Universal scaling factor
        f.write("1.0\n")
        
        # Lines 3-5: Lattice vectors
        for row in cell_matrix:
            f.write(f"   {row[0]:15.9f} {row[1]:15.9f} {row[2]:15.9f}\n")
        
        # Line 6: Atom types
        f.write(" ".join(unique_atoms) + "\n")
        
        # Line 7: Atom counts
        f.write(" ".join(map(str, atom_counts)) + "\n")
        
        # Line 8: Coordinate type
        f.write("Direct\n")
        
        # Lines 9+: Atomic positions (grouped by type)
        for atom_type in unique_atoms:
            for atom, pos in zip(atoms, positions):
                if atom == atom_type:
                    f.write(f"    {pos[0]:15.12f} {pos[1]:15.12f} {pos[2]:15.12f}\n")
    
    print(f"Converted {results_file_path}")
    print(f"  -> {output_path}")
    print(f"  Energy: {energy:.8f} Ry")
    print(f"  Atoms: {dict(zip(unique_atoms, atom_counts))}")


# Example: Convert the iteration3 file
for i in range(501):
    results_file = f"/Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/copy_36/SiO2_36_{i}/results_SiO2_36_{i}_iteration3"
    output_file = f"/Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/vasps/SiO2_36_{i}.vasp"

    if not os.path.exists(results_file):
        print(f"Skipping: {results_file} not found")
        continue

    convert_results_to_vasp(results_file, output_file)

import shutil

def copy_and_rename(source_path, destination_path):
    """
    Copy a file from source to destination.
    
    Parameters:
    -----------
    source_path : str
        Path to the source file
    destination_path : str
        Path where the file should be copied (including new filename)
    """
    try:
        shutil.copy2(source_path, destination_path)
        print(f"Copied: {source_path} -> {destination_path}")
        return True
    except FileNotFoundError:
        print(f"Skipping: {source_path} not found")
        return False
    except Exception as e:
        print(f"Error: {e}")
        return False


# Example: Copy all files from 3k_poscar to another directory with new names
source_dir = "/Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/3k_poscar"
dest_dir = "/Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/vasps"

# Make sure destination directory exists
os.makedirs(dest_dir, exist_ok=True)

# Copy specific files
for i in range(3000):  # or whatever range you need
    source_file = f"{source_dir}/{i}.vasp"
    dest_file = f"{dest_dir}/SiO2_24_{i}.vasp"  # Change naming pattern as needed
    copy_and_rename(source_file, dest_file)


Converted /Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/copy_36/SiO2_36_0/results_SiO2_36_0_iteration3
  -> /Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/vasps/SiO2_36_0.vasp
  Energy: -1563.59773174 Ry
  Atoms: {'O': 24, 'Si': 12}
Skipping: /Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/copy_36/SiO2_36_1/results_SiO2_36_1_iteration3 not found
Skipping: /Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/copy_36/SiO2_36_2/results_SiO2_36_2_iteration3 not found
Converted /Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/copy_36/SiO2_36_3/results_SiO2_36_3_iteration3
  -> /Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/vasps/SiO2_36_3.vasp
  Energy: -1563.24453616 Ry
  Atoms: {'O': 24, 'Si': 12}
Converted /Users/raphaelzstone/Documents/CMU/Research/Ensemble-Analysis/data/copy_36/SiO2_36_4/results_SiO2_36_4_iteration3
  -> /Users/raphaelzstone/Documents/CMU/Research/Ensemble-An