# Welcome to the CILAS grain size processing script!

### <div style="text-align: right"> Last modified by A.A. Lehrmann 22 November 2024 </div>


### The script below will extract .MES data, process the bin sizes, and plot the grain size distribuiton curves

### Important instructions before you begin:

    1. NEVER edit raw data. Do not delete .MES files. 
    
    2. Make an /CORE_GrainsizeOutput/ folder to put all of your script's outputs, and a /CORE_GrainsizeProcessed/ folder to put your script's processed data

    3. When copying folder paths, make sure to remove quotation marks

    4. Always add the extension .csv to your output files

# Convert .MES files to .csv 


In [None]:
import os
import csv

def process_mes_file(input_file):
    output_rows = []
    with open(input_file, 'r') as file:
        lines = file.readlines()
        core_name = lines[4].strip()
        date = lines[53].strip()

        # Process um values
        um_values = lines[45:144]
        um_values = [float(value.strip()) for value in um_values]

        # Process percent values
        percent_values = lines[145:244]
        percent_values = [float(value.strip()) for value in percent_values]

        # Calculate individual percentages
        indiv_perc = [percent_values[0]]  # First value remains the same
        for i in range(1, len(percent_values)):
            indiv_perc.append(percent_values[i] - percent_values[i - 1])

        # Zip the data together
        for um, percent, indiv_percent in zip(um_values, percent_values, indiv_perc):
            output_rows.append([core_name, date, um, percent, indiv_percent])

    return output_rows

def write_to_csv(output_file, rows):
    with open(output_file, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Core name", "Date", "um", "percent", "indiv perc"])
        writer.writerows(rows)

def process_folder(folder_path, output_folder):
    for filename in os.listdir(folder_path):
        if filename.endswith('.MES'):
            input_file = os.path.join(folder_path, filename)
            output_file = os.path.join(output_folder, os.path.splitext(filename)[0] + '.csv')
            rows = process_mes_file(input_file)
            write_to_csv(output_file, rows)
            print(f"Processed {filename} and saved as {output_file}")

# Ask the user for the input and output folder paths
input_folder = input("Enter the path to the input folder (e.g., /path/to/input_folder): ")
output_folder = input("Enter the path to the output folder (e.g., /path/to/output_folder): ")

# Process the folder
process_folder(input_folder, output_folder)


# To double check the conversion worked correctly, check the D50 of the csv

In [None]:
import pandas as pd
import numpy as np

# Ask the user for the input file path
input_file_path = input("Please enter the path to the CSV file: ")

# Ask the user for the percentile to calculate
percentile = float(input("Please enter the percentile to calculate (e.g., 50 for the 50th percentile): "))

# Load the CSV file
data = pd.read_csv(input_file_path)

# Sort the data by grain size (um)
data = data.sort_values('um').reset_index(drop=True)

# Calculate the cumulative percentage from the 'percent' column (percent * 100)
data['Cumulative (%)'] = data['percent'] * 100  # Multiply percent by 100 to get cumulative %

# Print the entire table of grain sizes and their cumulative percentages
print("Complete Cumulative Percentage Table:")
print(data[['um', 'percent', 'Cumulative (%)']])

# Find the grain size at the requested percentile
grain_size_percentile = data.loc[data['Cumulative (%)'] >= percentile, 'um']

if not grain_size_percentile.empty:
    grain_size_value = grain_size_percentile.iloc[0]
    print(f"\nThe grain size at the {percentile}th percentile is approximately {grain_size_value:.2f} microns.")
else:
    print(f"No grain size found for the {percentile}th percentile.")


# Individiual sample: Process the grain size data and plot

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import cmcrameri.cm as cmc  # Import the cmcrameri colormap

# Ask the user for input file path and output file path
input_file_path = input("Enter the path to the input CSV file: ")  # Prompt user for input file path
output_file_path = input("Enter the name of the output CSV file (e.g., 'output_file12.csv'): ")  # Prompt user for output file name

# Read the data
data = pd.read_csv(input_file_path)

# Initialize new columns
data['Cumulative (%) Q3'] = 0.0
data['Computation'] = 0.0

# Calculate Cumulative (%) Q3 for each row
for i in range(len(data)):
    data.loc[i, 'Cumulative (%) Q3'] = data.loc[i, 'percent'] * 100  # Percent * 100 to get the cumulative percentage

# Calculate Computation for each row
for i in range(2, len(data)):  # Start from row 3
    data.loc[i, 'Computation'] = (data.loc[i, 'Cumulative (%) Q3'] - data.loc[i-1, 'Cumulative (%) Q3']) / np.log(data.loc[i, 'um'] / data.loc[i-1, 'um'])

# Calculate Total Computation
data['Total Computation'] = data['Computation'].sum()

# Calculate Histogram (%) q3
data['Histogram (%) q3'] = (data['Computation'] / data['Total Computation']) * 100

# Plotting
plt.figure(figsize=(10, 6))

# Create the first axis for Cumulative (%) Q3
ax1 = plt.gca()
ax1.plot(data['um'], data['Cumulative (%) Q3'], linestyle='-', color=cmc.batlow(0.8), linewidth=2, label='Cumulative (%) Q3')
ax1.set_xscale('log')  # Set x-axis to logarithmic scale
ax1.set_ylabel('Cumulative (%) Q3', fontsize=14, color=cmc.batlow(0.8))
ax1.tick_params(axis='y', labelcolor=cmc.batlow(0.8))

# Create a second y-axis for Histogram (%) q3
ax2 = ax1.twinx()
ax2.plot(data['um'], data['Histogram (%) q3'], linestyle='-', color=cmc.batlow(0.5), linewidth=2, label='Histogram (%) q3')
ax2.set_ylabel('Histogram (%) q3', fontsize=14, color=cmc.batlow(0.5))
ax2.tick_params(axis='y', labelcolor=cmc.batlow(0.5))

# Common x-axis label and title
plt.xlabel('Grain Size (µm)', fontsize=14)
plt.title('Cumulative (%) Q3 and Histogram (%) q3 vs. Grain Size', fontsize=16)
plt.grid(True, which='both', ls='--', linewidth=0.5)

# Show legends for both axes
ax1.legend(loc='upper left')
ax2.legend(loc='upper right')

# Tight layout
plt.tight_layout()

# Save the plot
plt.savefig('cumulative_and_histogram_vs_grain_size.png')  # Save as a PNG file
plt.show()  # Display the plot

# Save the updated DataFrame to a new CSV file
data.to_csv(output_file_path, index=False)
print('Calculations completed, plot saved, and data exported to', output_file_path)


# Plots for a folder of grain size data

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import cmcrameri.cm as cmc  # Import the cmcrameri colormap

# Ask the user for the folder path containing input CSV files and output folder
input_folder_path = input("Enter the path to the folder containing input CSV files: ").strip()
output_folder_path = input("Enter the path to the folder to save processed files: ").strip()

# Ensure the output folder exists
os.makedirs(output_folder_path, exist_ok=True)

# Get all CSV files in the input folder
csv_files = [f for f in os.listdir(input_folder_path) if f.endswith('.csv')]

# Process each CSV file in the folder
for file_name in csv_files:
    input_file_path = os.path.join(input_folder_path, file_name)
    output_file_name = f"processed_{file_name}"
    output_file_path = os.path.join(output_folder_path, output_file_name)
    output_plot_path = os.path.join(output_folder_path, f"{file_name.replace('.csv', '')}_plot.png")
    
    # Read the data
    data = pd.read_csv(input_file_path)

    # Check if required columns are present
    if 'um' in data.columns and 'percent' in data.columns:
        # Initialize new columns
        data['Cumulative (%) Q3'] = 0.0
        data['Computation'] = 0.0

        # Calculate Cumulative (%) Q3 for each row
        for i in range(len(data)):
            data.loc[i, 'Cumulative (%) Q3'] = data.loc[i, 'percent'] * 100  # Percent * 100 to get the cumulative percentage

        # Calculate Computation for each row
        for i in range(2, len(data)):  # Start from row 3
            data.loc[i, 'Computation'] = (data.loc[i, 'Cumulative (%) Q3'] - data.loc[i-1, 'Cumulative (%) Q3']) / np.log(data.loc[i, 'um'] / data.loc[i-1, 'um'])

        # Calculate Total Computation
        data['Total Computation'] = data['Computation'].sum()

        # Calculate Histogram (%) q3
        data['Histogram (%) q3'] = (data['Computation'] / data['Total Computation']) * 100

        # Plotting
        plt.figure(figsize=(10, 6))

        # Create the first axis for Cumulative (%) Q3
        ax1 = plt.gca()
        ax1.plot(data['um'], data['Cumulative (%) Q3'], linestyle='-', color=cmc.batlow(0.8), linewidth=2, label='Cumulative (%) Q3')
        ax1.set_xscale('log')  # Set x-axis to logarithmic scale
        ax1.set_ylabel('Cumulative (%) Q3', fontsize=14, color=cmc.batlow(0.8))
        ax1.tick_params(axis='y', labelcolor=cmc.batlow(0.8))

        # Create a second y-axis for Histogram (%) q3
        ax2 = ax1.twinx()
        ax2.plot(data['um'], data['Histogram (%) q3'], linestyle='-', color=cmc.batlow(0.5), linewidth=2, label='Histogram (%) q3')
        ax2.set_ylabel('Histogram (%) q3', fontsize=14, color=cmc.batlow(0.5))
        ax2.tick_params(axis='y', labelcolor=cmc.batlow(0.5))

        # Common x-axis label and title
        plt.xlabel('Grain Size (µm)', fontsize=14)
        plt.title(f'Cumulative (%) Q3 and Histogram (%) q3 - {file_name}', fontsize=16)
        plt.grid(True, which='both', ls='--', linewidth=0.5)

        # Show legends for both axes
        ax1.legend(loc='upper left')
        ax2.legend(loc='upper right')

        # Tight layout
        plt.tight_layout()

        # Save the plot
        plt.savefig(output_plot_path, bbox_inches='tight')  # Save as a PNG file
        plt.close()  # Close the plot to avoid memory issues

        # Save the updated DataFrame to a new CSV file
        data.to_csv(output_file_path, index=False)
        print(f"Processed and saved {file_name} -> CSV and plot saved.")
    else:
        print(f"Skipped {file_name} - missing required columns.")


# Plot grain size distribution for one core

In [None]:
import os
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cmcrameri.cm as cmc  # Import the cmcrameri colormap

# Ask the user for the input folder path and core name
input_folder_path = input("Enter the path to the folder containing processed CSV files: ").strip()
core_name = input("Enter the core name for the title of the graph: ").strip()

# Helper function to extract the first number in the last two numbers of the filename
def extract_sort_key(file_name):
    match = re.search(r"(\d+)-(\d+)", file_name)  # Matches patterns like '0-1'
    if match:
        return int(match.group(1))  # Return the first number for sorting
    return float('inf')  # Place files without matching pattern at the end

# Get and sort the files based on the extracted number
processed_files = [f for f in os.listdir(input_folder_path) if f.endswith(".csv")]
processed_files.sort(key=extract_sort_key)

# Initialize a colormap
colors = cmc.batlow(np.linspace(0, 1, len(processed_files)))

# Initialize the plot
plt.figure(figsize=(12, 8))

# Counter for assigning colors
color_index = 0

# Iterate through all processed CSV files in sorted order
for file_name in processed_files:
    input_file_path = os.path.join(input_folder_path, file_name)
    
    # Read the processed data
    data = pd.read_csv(input_file_path)

    # Check if required columns are present
    if 'um' in data.columns and 'Histogram (%) q3' in data.columns:
        grain_sizes = data['um']
        histogram_q3 = data['Histogram (%) q3']

        # Plot the histogram on the same graph
        plt.plot(grain_sizes, histogram_q3, linestyle='-', linewidth=2, label=file_name, color=colors[color_index])
        color_index += 1
    else:
        print(f"Skipped {file_name} - missing required columns.")

# Configure the plot
plt.xscale('log')  # Set x-axis to logarithmic scale
plt.xlabel('Grain Size (µm)', fontsize=14)
plt.ylabel('Histogram (%) q3', fontsize=14)
plt.title(f'Combined Histograms of Grain Size Data for Core {core_name}', fontsize=16)
plt.grid(True, which='both', linestyle='--', linewidth=0.5)

# Place legend below the plot, sorted by filename order
plt.legend(
    fontsize=10, 
    title="Files", 
    title_fontsize=12, 
    loc='upper center', 
    bbox_to_anchor=(0.5, -0.15),  # Place legend below the plot
    ncol=3  # Adjust the number of columns in the legend
)

plt.tight_layout()

# Save the combined plot
output_plot_path = os.path.join(input_folder_path, f"combined_histograms_core_{core_name}.png")
plt.savefig(output_plot_path, bbox_inches='tight')  # Save as PNG file
plt.show()  # Display the plot

print(f"Combined histogram plot for core {core_name} saved as {output_plot_path}")
