In [5]:
import os
import re

def extract_grain_sizes(file_path):
    """Extracts grain sizes from a given text file."""
    grain_sizes = []
    with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
        lines = file.readlines()
        for line in lines:
            match = re.search(r'Group \d+: (\d+) elements, ([\d.]+) total area', line)
            if match:
                total_area = float(match.group(2))
                grain_sizes.append(total_area)
    return grain_sizes

def compute_average_grain_size(sample_folder):
    """Computes the average grain size for a given sample, excluding the file with the largest grain size,
       and removing values that are more than 50% smaller than the average in a second pass."""
    grain_sizes = []
    # outlier_file = find_outlier_file(sample_folder)
    
    for file_name in os.listdir(sample_folder):
        # if file_name == outlier_file:
            # continue  # Skip the file with the largest grain size
        file_path = os.path.join(sample_folder, file_name)
        if os.path.isfile(file_path):
            grain_sizes.extend(extract_grain_sizes(file_path))
    
    if not grain_sizes:
        return None
    
    # Initial average calculation
    avg_grain_size = sum(grain_sizes) / len(grain_sizes)
    
    # Remove values that are more than 50% smaller than the average
    # filtered_grain_sizes = [size for size in grain_sizes if size >= 0.5 * avg_grain_size]
    
    # if filtered_grain_sizes:
    #     avg_grain_size = sum(filtered_grain_sizes) / len(filtered_grain_sizes)
    
    return avg_grain_size

def main(root_folder, output_file):
    """Processes all samples and writes the average grain size to an output file in alphabetical order."""
    results = {}
    
    for sample_name in os.listdir(root_folder):
        sample_folder = os.path.join(root_folder, sample_name)
        if os.path.isdir(sample_folder):
            avg_grain_size = compute_average_grain_size(sample_folder)
            if avg_grain_size is not None:
                results[sample_name] = avg_grain_size
    
    with open(output_file, 'w') as out_file:
        for sample_name in sorted(results.keys()):
            out_file.write(f"{sample_name}: {results[sample_name]:.2f} pixels\n")


if __name__ == "__main__":
    root_folder = "voronoi_outputs/mixed_dots/"  # Replace with actual folder path
    output_file = "grain_size_summary_mixed.txt"
    main(root_folder, output_file)