In [1]:
import os 
from glob import glob
from ase.io import read,write
from ovito.io import import_file
import WarrenCowleyParameters as wc
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import warnings

def progress_print(i, total, comment=''):
    """
    Print progress percentage and current file number being processed.
    
    Parameters
    ----------
    i : int
        Current iteration index (0-based).
    total : int
        Total number of items to process.
    
    Returns
    -------
    None
        Prints progress to stdout with carriage return for dynamic updating.
    """
    progress = (i + 1) / total * 100
    print(f'{comment} Progress: {progress:.0f}% \t Processing File {i+1}/{total}', end='\r', flush=True)

simulation_folder = '/nfshome/winkelmann/running_sims/with_perfect_starting_cells/'

# truncate to only Li and X Atoms

In [2]:
#find Ovito files to process, excluding truncated files and files in the filelist
filelist_to_exclude = os.path.join(simulation_folder, './filelist_failed.txt')

# find all .XDATCAR files in all subfolders but not the base folder
pattern = os.path.join(simulation_folder, '*', '**', '*.XDATCAR')
path_all_ovito_files = sorted(glob(pattern, recursive=True))

user_defined_files_to_exclude = []
# exclude files on the basis of a filelist
if os.path.exists(filelist_to_exclude):
    with open(filelist_to_exclude, 'r') as f:
        for line in f:
            line = line.strip()
            if line and not line.startswith('#'):
                user_defined_files_to_exclude.append(os.path.realpath(line))
else: user_defined_files_to_exclude = []  # If filelist does not exist, just use an empty list

# Find all truncated files and create list of their original files
path_truncated_files = [f for f in path_all_ovito_files if f.endswith('_truncated.XDATCAR')]
#[f.replace('_truncated.XDATCAR', '.XDATCAR') for f in truncated_files]

# Exclude both truncated files and their base files
path_all_ovito_files = [f for f in path_all_ovito_files if f not in path_truncated_files and 
                        f not in user_defined_files_to_exclude and 
                        f not in [t.replace('_truncated.XDATCAR', '.XDATCAR') for t in path_truncated_files]]
print(f"Found {len(path_all_ovito_files)} ovito files to process.")
print(f'excluded {len(path_truncated_files)} truncated files and their base files.')
print(f'excluded {len(user_defined_files_to_exclude)} user defined files from filelist.')

Found 0 ovito files to process.
excluded 233 truncated files and their base files.
excluded 55 user defined files from filelist.


In [None]:
#truncate the files to just Li and X
def truncate_ovito_files(path_ovito_file, wanted_species=['Li','X']):
    with open(path_ovito_file, 'r') as file:
        lines = file.readlines()
    step, time = [], []
    for line in lines:
        if 'step' in line:
            words = line.split(' ')
            step.append(int(words[1]))
            time.append(float(words[3]))
    
    all_frames = read(path_ovito_file, index=':')
    truncated_ovito_file = []
    for n,atoms in enumerate(all_frames):  
        indices = [i for i, atom in enumerate(atoms) if atom.symbol in wanted_species]
        new_atoms = atoms[indices]
        new_atoms.info = atoms.info
        new_atoms.write(filename=path_ovito_file.replace('.XDATCAR', f'_truncated.XDATCAR'),
                        format='vasp-xdatcar', append=True, label=f'step {step[n]} time {time[n]}')
        truncated_ovito_file.append(new_atoms)      
    return truncated_ovito_file

for i, path_ovito_file in enumerate(path_all_ovito_files):
    progress_print(i, len(path_all_ovito_files))
    truncate_ovito_files(path_ovito_file)

# calculate Warren Cowley and safe it to a CSV

In [4]:
# find all truncated files to process for Warren-Cowley analysis
pattern = os.path.join(simulation_folder, '*', '**', '*_truncated.XDATCAR')
path_truncated_files = sorted(glob(pattern, recursive=True))

# Exclude files that already have a corresponding _warren_cowley.csv
path_truncated_files = []
path_existing_wc_files = [f for f in path_truncated_files if os.path.exists(f.replace('_truncated.XDATCAR', '_warren_cowley.csv'))]
path_truncated_files = [f for f in path_truncated_files if f not in path_existing_wc_files]

print(f"Found {len(path_truncated_files)} truncated files to process.")
print(f"Skipped {len(path_existing_wc_files)} files that already have Warren-Cowley CSV files.")

Found 0 truncated files to process.
Skipped 0 files that already have Warren-Cowley CSV files.


In [5]:
def create_warren_cowley_csv(structure_file):
    try:
        with open(structure_file, 'r') as file:
            lines = file.readlines()
        step, time = [], []
        for line in lines:
            if 'step' in line:
                words = line.split(' ')
                step.append(int(words[1]))
                time.append(float(words[3]))
        ovito_file = import_file(structure_file)
        num_frames = ovito_file.source.num_frames
        if num_frames != len(step):
            warnings.warn(f"Number of frames does not match number of steps found. frames = {num_frames}, steps = {len(step)}")
        
        mod = wc.WarrenCowleyParameters(nneigh=[0, 6])
        ovito_file.modifiers.append(mod)
        
        data = ovito_file.compute(0)
        wc_names = list(data.attributes['Warren-Cowley parameters by particle name'][0].keys())
        
        wc_file = structure_file.replace('Ovito_truncated.XDATCAR', '_warren_cowley.csv')
        with open(wc_file, 'w') as f:
            f.write(f"{'step':<7},\t{'time':<15},\t{wc_names[0]:<20},\t{wc_names[1]:<20},\t{wc_names[2]:<20},\t{wc_names[3]:<20}\n")
        # Iterate through frames
        for frame in range(num_frames):
            data = ovito_file.compute(frame)
            wc_for_shells = data.attributes['Warren-Cowley parameters'][0]
            with open(wc_file, 'a') as f:
                f.write(
                    f"{step[frame]:7},\t{time[frame]:15},\t{wc_for_shells[0][0]:20},\t{wc_for_shells[0][1]:20},\t"
                    f"{wc_for_shells[1][0]:20},\t{wc_for_shells[1][1]:20}\n"
                )
    except Warning as w:
        warnings.warn(f"While processing {structure_file}: {w}")
    except Exception as e:
        warnings.warn(f"Error while processing {structure_file}: {e}")
    

for i, path in enumerate(path_truncated_files):
    progress_print(i, len(path_truncated_files))
    create_warren_cowley_csv(path)
    

# Plot Warren Cowley files

In [6]:
def read_csv_without_na(csv_file):
    try:
        # Read data
        dataset = pd.read_csv(path, sep=r',\s*', engine='python')
        dataset.columns = dataset.columns.str.strip()
        
        # Remove rows with NaN values and keep track of dropped indices
        original_indices = dataset.index
        dataset.dropna(inplace=True)
        dropped_indices = original_indices.difference(dataset.index)
        if not dropped_indices.empty:
            warnings.warn(f"Dropped {len(dropped_indices)} rows with NaN values from {csv_file}. Dropped indices: {dropped_indices.tolist()}")
        return dataset
    except Exception as e:
        warnings.warn(f"Error reading {csv_file}: {e}")
        return None

In [7]:
pattern = os.path.join(simulation_folder, '*', '**', '*_warren_cowley.csv')
path_wc_files = sorted(glob(pattern, recursive=True))

paths_wc_files_per_concentration = {} 
for path in path_wc_files:
    dir_name = os.path.dirname(os.path.dirname(path)).split('/')[-1]
    if dir_name not in paths_wc_files_per_concentration.keys():
        paths_wc_files_per_concentration[dir_name] = []
    paths_wc_files_per_concentration[dir_name].append(path)
print(f"Found {len(path_wc_files)} warren cowley files to process.")

Found 233 warren cowley files to process.


In [8]:
# plots for each warren cowley file
for i, path in enumerate(path_wc_files):
    try:
        progress_print(i, len(path_wc_files), 'single wc plots')
        
        # Read data
        dataset = read_csv_without_na(path)

        # Create figure
        plt.figure(figsize=(10, 6))
        
        # Seaborn regression plot with scatter and confidence interval
        # Using lowess=True for smooth non-linear curve (locally weighted regression)
        sns.regplot(data=dataset, 
                    x='time', 
                    y='Li-X', 
                    scatter_kws={'alpha':0.5, 's':20},
                    line_kws={'color':'red'},
                    lowess=True)
        
        plt.xlabel('Time')
        plt.ylabel('Li-X')
        plt.title('Warren-Cowley Parameter: Li-X vs Time')
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.savefig(path.replace('.csv', '_Li-X_warren_cowley.png'))
        plt.close()
    except Warning as w:
        warnings.warn(f"while processing {path}: {w}")
    except Exception as e:
        warnings.warn(f"Error while processing {path}: {e}")


single wc plots Progress: 100% 	 Processing File 233/233

In [None]:
# plots per concentration
for i, (conc, paths) in enumerate(paths_wc_files_per_concentration.items()):
    try:
        progress_print(i, len(paths_wc_files_per_concentration), f'wc plots')
        plt.figure(figsize=(10, 6))
        
        # Combine all datasets from the cluster into one
        collected_datasets = []
        for run_num, path in enumerate(paths):
            collected_dataset = read_csv_without_na(path)
            collected_dataset['run'] = f'{run_num+1}'
            collected_datasets.append(collected_dataset)
        
        # Concatenate all data into single dataframe
        combined_dataset_per_conc = pd.concat(collected_datasets, ignore_index=True)

        # Plot scatter with color by run
        sns.scatterplot(data=combined_dataset_per_conc,
                        x='time', 
                        y='Li-X', 
                        hue='run',  
                        alpha=0.5, 
                        s=20)
        
        # Add single regression line (without hue)
        sns.regplot(data=combined_dataset_per_conc, 
                    x='time', 
                    y='Li-X',
                    scatter=False,
                    #line_kws={'color':'red', 'linewidth':2},
                    lowess=True)
        plt.legend().remove()  # Remove legend for runs since it's to big
        plt.xlabel('Time')
        plt.ylabel('Li-X')
        plt.title(f'Warren-Cowley Parameter: Li-X vs Time for Li_{conc}%')
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.savefig(os.path.join(simulation_folder, f'{conc}_warren_cowley.png'))
        plt.close()
    except Warning as w:
        warnings.warn(f"While processing {conc}: {w}")
    except Exception as e:
        warnings.warn(f"Error while processing {conc}: {e}")


In [10]:
# one plot for all concentrations
all_combined_datasets = []
for conc, paths in paths_wc_files_per_concentration.items():
    
    # Combine all datasets from the cluster into one
    collected_datasets = []
    for run_num, path in enumerate(paths):
        collected_dataset = read_csv_without_na(path)
        collected_dataset['run'] = f'{run_num+1}'
        collected_dataset['concentration'] = conc
        collected_datasets.append(collected_dataset)    
        
    # Concatenate all data into single dataframe
    combined_dataset_per_conc = pd.concat(collected_datasets, ignore_index=True)
    all_combined_datasets.append(combined_dataset_per_conc)

# Combine all concentrations
final_dataset = pd.concat(all_combined_datasets, ignore_index=True)


plt.figure(figsize=(10, 6))

# Add single regression line (without hue)
sns.lmplot(data=final_dataset,x='time', 
            y='Li-X',  
            hue='concentration', 
            scatter=False, 
            lowess=True)

plt.xlabel('Time')
plt.ylabel('Li-X')
plt.xlim(0, 1)
plt.title(f'Warren-Cowley Parameter: Li-X vs Time for all Concentrations')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(os.path.join(simulation_folder, f'Li_all_concentrations_warren_cowley.png'))
plt.close()


<Figure size 1000x600 with 0 Axes>