In [1]:
## Surface Quantification

In [2]:
#------------#------------#
# Get them Libraries
#------------#------------#

import numpy as np
import pandas as pd
import os
import sys
import time
import math
import pathlib

import matplotlib as mpl
from scipy.ndimage import label

import h5py


#------------#------------#
# Font Settings
#------------#------------#

# Set global font to Times New Roman and font size to 10pt
mpl.rcParams['font.family'] = 'Times New Roman'
mpl.rcParams['font.size'] = 10
plot_font_size = 20


In [3]:
#------------#------------#
# File Names and Paths
#------------#------------#

# Did it Gall?? ## Only use for running in this file (not from main)
#did_it_gall = False

# Folder Name ## Only use for running in this file (not from main)
#xyz_folder_name = "A - 316L Rod"

# File Names ## Only use for running in this file (not from main)
#sample_name = 'A1 Pre-Galling Test.xyz'

# Code File Location
code_dir = os.getcwd()

# Source File relative to code file
csv_path = os.path.join(code_dir, "..", "xyz Files", xyz_folder_name, sample_name)

# Replace the file extension for the output CSV file
saving_h5_name = sample_name.replace('.xyz', '_processed.h5')

# Output Folder
target_folder = xyz_folder_name

# Find the file path within the target folder
saving_h5_path = os.path.join(code_dir, "..", "Processed Files", target_folder, saving_h5_name)

# Quantification file name
quantification_file_name = 'Surface Quantification Values.txt'

# Find the file path to save
quantification_file_path = os.path.join(code_dir, "..", "Processed Files", quantification_file_name)


In [4]:
#------------#------------#
# Profilometer Scan Parameters
#------------#------------#

%run Profilometer_Scan_Parameters.ipynb {csv_path}


Check parameter selecetion, skip_rows =  3
Edge size / mm:  0.1


In [5]:
#------------#------------#
# Surface Quantification
#------------#------------#


#------------#------------#
# Data File
#------------#------------#

start_time = time.time()

def check_and_write_headers(file_path, headers):
    # Check if the file exists
    if not os.path.isfile(file_path):
        # If file does not exist, create it and write the headers
        with open(file_path, 'w') as file:
            file.write(', '.join(headers) + '\n')
    else:
        # If file exists, check for missing headers
        with open(file_path, 'r+') as file:
            existing_headers = file.readline().strip().split(',')
            new_headers = [header for header in headers if header not in existing_headers]
            
            if new_headers:
                # Move to start of file and add new headers
                file.seek(0)
                updated_headers = existing_headers + new_headers
                file.write(','.join(updated_headers) + '\n')
                
                # Read rest of the data, skip first line, and rewrite the file
                data = file.readlines()[1:]
                file.seek(0)
                file.write(','.join(updated_headers) + '\n')
                file.writelines(data)


headers = ['File Name', 'Processing Date', 'Sq', 'Ssk (µm)', 'Mean Height (µm)', 'Max Height (μm)', 'Mean of Squares (µm²)', 
           'RSM (µm)', 'Sku (Kurtosis)', 'Displaced Volume (mm^3)',
           
           'Trough area (%)', 'Peak area (%)', 'Galled area (%)', 'Trough Volume (mm^3)', 
           'Peak Volume (mm^3)', 'Galled Volume (mm^3)', 'Average Galled Radius (mm)', 
           'Gall Track Width (mm)', 'Swept Galled Angle (˚)']

check_and_write_headers(quantification_file_path, headers)




def quanfitication(saving_h5_path):

#------------#------------#
# Load Data
#------------#------------#
    print('File name is: ', saving_h5_path)
# Load data from HDF5 file
    with h5py.File(saving_h5_path, 'r') as f:
        X = f['X_new'][:]
        Y = f['Y_new'][:]
        pixel_size = f.attrs['pixel_size']
        data = pd.DataFrame(f['translated_data'][:])
        raw = pd.DataFrame(f['raw_data'][:])
        tilted_data = pd.DataFrame(f['tilted_data_3'][:])

# Resolution
    X_res = X[1] - X[0] # mm
    Y_res = Y[1] - Y[0] # mm
    mean_res = (X_res + Y_res)/2 # mm
    
# Generate z_clean
    z_clean = data.to_numpy().flatten()[~np.isnan(data.to_numpy().flatten())]
# Generate Z_sort
    Z_sort_nan = np.sort(np.ndarray.flatten(np.asarray(data))) #Flattening the dataframe into a 1D array & ordering (doesn't matter but easy to do)
    Z_sort = Z_sort_nan[~np.isnan(Z_sort_nan)] #Removing the NaN cells (enabling further operations to be performed), -ve sign to remove NaN


#------------#------------#
# Quantify Bulk Surface data
#------------#------------#

#Finding Sq and Ssk (quantification of data skew)

    Z2 = Z_sort**2
    Z3 = Z_sort**3

    R_Area = (1000*X_res)*(1000*Y_res)*len(Z_sort) #Multiplied by 1000 (twice) since Z is in um andd X/Ynewsize are in mm


    Sq = np.sqrt(np.sum(Z2)/R_Area)
    print('Sq / - :', Sq)

    Ssk = (np.sum(Z3)/R_Area)/(Sq**3)
    print('Ssk / um:', Ssk)


# Calculate the mean (Sa)
    mean_value = Z_sort.mean()
    print(f"Mean of the array (Sa): {mean_value:.4g} µm")

# Calculate differences from the mean
    differences = (z_clean - mean_value)

# Calculate the max height
    max_height = max(differences) - min(differences)
    
# Calculate squared differences from the mean
    squared_differences = np.square(z_clean - mean_value)
    #print("Squared differences from the mean:", squared_differences)

# Calculate the mean of the squared differences
    mean_of_squared_differences = np.mean(squared_differences)
    print(f"Mean of the squared differences: {mean_of_squared_differences:.4g} µm²")

# Calculate the RMS value
    rms_value = np.sqrt(mean_of_squared_differences)
    print(f"The RMS value of the 2D array is: {rms_value:.4g} µm")

# Calculate the Sku (Kurtosis)
    Sku = (1/rms_value**4) * np.sqrt(np.sqrt(mean(np.square(np.square(z_clean - mean_value))))) # um
    
# Displaced Volume
    DV = X_res*Y_res*sum(abs(Z_sort))/1000 # mm^3
    
# Aspect Ratio needs polar coordinates to really mean anything
    #aspect_ratio =  # no units
    
#------------#------------#
# Quantify Galled Surface data
#------------#------------#
    if did_it_gall:
# Set a threshold range around the median
        threshold = 10  # um Adjust this value to your needs
    else:
        threshold = 1  # um Adjust this value to your needs

# Recalculate cumulative counts
    cumulative_counts = np.arange(1, len(Z_sort) + 1)
# Convert counts to normalized percentages
    percentage_below = cumulative_counts / len(Z_sort)

    lower_bound = -threshold
    upper_bound = threshold
    
# Filter values outside the acceptable range
    below_lower_bound_values = Z_sort[Z_sort < lower_bound]
    above_upper_bound_values = Z_sort[Z_sort > upper_bound]

# Calculate how many points are outside the acceptable range
    points_below_lower_bound = len(below_lower_bound_values)
    points_above_upper_bound = len(above_upper_bound_values)

    percentage_below_lower_bound = points_below_lower_bound / len(Z_sort) * 100
    percentage_above_upper_bound = points_above_upper_bound / len(Z_sort) * 100
    percentage_out_of_bounds = percentage_below_lower_bound + points_above_upper_bound

# Volume calculation for points in mm^3
    volume_below_lower_bound = np.sum(below_lower_bound_values/1000 * pixel_size)
    volume_above_upper_bound = np.sum(above_upper_bound_values/1000 * pixel_size)
    volume_out_of_bounds = volume_below_lower_bound + volume_above_upper_bound

#Finding the sample area and galled area
    sample_area = math.pi*(outer_radius**2 - inner_radius**2) #From ASTM G196 sample drawing
    trough_area = points_below_lower_bound * pixel_size
    peak_area = points_above_upper_bound * pixel_size
    galled_area = trough_area + peak_area
    
# Radius of Galled Area
    galled_data = np.where(pd.isna(data[data < lower_bound]), data[data > upper_bound], data[data < lower_bound] + np.nan_to_num(data[data > upper_bound]))  
    X_grid, Y_grid = np.meshgrid(X, Y)
    X_0 = np.where(X == 0)[0]
    Y_0 = np.where(Y == 0)[0]
    radius_data = np.sqrt(((X_grid - X_0) * X_res)**2 + ((Y_grid - Y_0) * Y_res)**2);
    mask = np.isnan(galled_data)
    radius_data = radius_data[~mask] # it auto converts to a 1D array??
    average_galled_radius = np.mean(radius_data)
# Length and Width of galled region
    min_radius = radius_data.min().min()
    max_radius = radius_data.max().max()
    gall_track_width = max_radius - min_radius

        
# Angle of Galled Area
    # Find points where height data is either above +threshold or below -threshold
    binary_mask = (data >= threshold) | (data <= -threshold)

# Label the connected components
    labeled, num_features = label(binary_mask)

# Optionally, filter out small features (e.g., islands)
    sizes = np.bincount(labeled.ravel())

    if len(sizes) < 3:
        print("Not enough components to find the second largest.")
        min_size = 100000  # Minimum size of connected component (in terms of number of pixels)
        max_size = 5000000
        mask_size = (sizes >= min_size) & (sizes <= max_size)
        valid_components = mask_size[labeled]
    else:
        largest_label = np.argmax(sizes[1:]) + 1  # Find the largest component label, ignoring background
        sizes[largest_label] = 0  # Temporarily set largest size to zero to find the second largest
        second_largest_label = np.argmax(sizes[1:]) + 1  # Find second-largest component label
    # Create a mask for the second-largest component
        second_largest_component = (labeled == second_largest_label) 
        valid_components = second_largest_component ### second_largest_component[labeled] ---- something in here may have killed the kernal ----####

# Now valid_components contains only the large connected regions
    valid_points = np.argwhere(valid_components)

# Assuming valid_points is an array of shape (N, 2) representing coordinates
    x_points = X[valid_points[:, 1]]
    y_points = Y[valid_points[:, 0]]
    curve_points = np.column_stack((x_points, y_points))

# Angle-based sorting for roughly circular/continuous curve
    centroid = [0,0]#np.mean(curve_points, axis = 0)
    angles = np.degrees(np.arctan2(curve_points[:, 1] - centroid[1], curve_points[:, 0] - centroid[0]))
    sorted_indices = np.argsort(angles)
    ordered_curve_points = curve_points[sorted_indices]

    min_angle = np.min(angles)
    max_angle = np.max(angles)
    angle_range = max_angle - min_angle

    if angle_range > 3.1:
# Split points into two halves based on `y = 0` relative to centroid
        upper_half = angles[curve_points[:, 1] >= centroid[1]]
        lower_half = angles[curve_points[:, 1] < centroid[1]]

# Calculate min and max angles for each half
        upper_min, upper_max = upper_half.min(), upper_half.max()
        lower_min, lower_max = lower_half.min(), lower_half.max()
        angle_range = upper_max - upper_min + upper_max - upper_min

    print("min angle: ", min_angle, " max angle: ", max_angle)

#------------#------------#
# Save quantified data
#------------#------------#

    file_name = os.path.basename(saving_h5_path)
    file_path = pathlib.Path(saving_h5_path)
    date = os.path.getmtime(file_path)

    
    
    quantified_data = [file_name, date, Sq, Ssk, mean_value, max_height, mean_of_squared_differences, 
                       rms_value, percentage_below_lower_bound, percentage_above_upper_bound, 
                       percentage_out_of_bounds, average_galled_radius, gall_track_width, 
                       angle_range]

    def append_data(file_path, data, headers):
# Load existing lines and check for duplicates of 'file_name' value
        file_name_index = headers.index('File Name')
        new_file_name = data[file_name_index]
    
# Read file data
        lines_to_keep = []
        duplicate_found = False
    
# Read all lines and filter out duplicates
        if os.path.isfile(file_path):
            with open(file_path, 'r') as file:
                lines = file.readlines()
                header_line = lines[0]  # Preserve headers
                for line in lines[1:]:
                    line_data = line.strip().split(', ')
                    if line_data[file_name_index] == new_file_name:
                        duplicate_found = True
                    else:
                        lines_to_keep.append(line)

# Write back the file without duplicates
        with open(file_path, 'w') as file:
            file.write(header_line)  # Write header line
            file.writelines(lines_to_keep)  # Write non-duplicate data lines
            file.write(', '.join(map(str, data)) + '\n')  # Append the new data

        if duplicate_found:
            print(f"Duplicate entry for file_name '{new_file_name}' found and removed before appending new data.")

    append_data(quantification_file_path, quantified_data, headers)


quanfitication(saving_h5_path)

end_time = time.time()
print('Time elapsed: ', end_time - start_time, ' seconds')

File name is:  /Users/kkir0008/Desktop/PhD/Imaging/Optical Profilometer/Profilometry Code/../Processed Files/A - 316L Rod/A1 Pre-Galling Test_processed.h5
Sq / - : 2.087844138755609
Ssk / um: -28.959705858558223
Mean of the array: 0.6632 µm
Squared differences from the mean: [18.47167131 19.48811992 19.92702073 ...  2.09787467  1.82031927
  1.60141478]
Mean of the squared differences: 28.49 µm²
The RMS value of the 2D array is: 5.337 µm
min angle:  -3.1408327755208876  max angle:  3.141592653589793
Duplicate entry for file_name '/Users/kkir0008/Desktop/PhD/Imaging/Optical Profilometer/Profilometry Code/../Processed Files/A - 316L Rod/A1 Pre-Galling Test_processed.h5' found and removed before appending new data.
Time elapsed:  2.946150064468384  seconds
