# Analysis Software comparison
This notebook will investigate the output of the same powder batches viewed with different methods, mainly: FlowCam Image Software, Scanning Electron Microsopy (SEM), and Optical Imaging

In [1]:
# Import 
import plotly.graph_objects as go
import pandas as pd
import os
from diameter_operations import build_diameter_metrics_dataframe


In [2]:
# Assign file paths
flow_data_path = "/Users/Daniel/Desktop/results/particle_analysis/flowcam_results/Run_01/Results_3_Full_8_14_2024.csv"
imagej_data_path = "/Users/Daniel/Desktop/results/particle_analysis/pyImagej_results/Run_01/PyImageJ Results.csv"

if not os.path.isfile( flow_data_path ):
    print("FlowCam File path doesn't exist.")

if not os.path.isfile( imagej_data_path ):
    print("PyImageJ File path doesn't exist.")

In [3]:
# Create pandas dataframes from file paths

# Try to open file with decoding method "ISO-8859-1" (latin1)
try: 
    flow_df = pd.read_csv( flow_data_path , encoding= "latin1" )
except UnicodeDecodeError:
    print( "Error caught: ", UnicodeDecodeError )

imagej_df = pd.read_csv( imagej_data_path )

In [4]:
# Get Information about columns

flow_column_names = flow_df.columns.values
imagej_column_names = imagej_df.columns.values

# Display column names
print( "FlowCam column names: ")
print( flow_column_names )

print( "\nPyImageJ column names: ")
print( imagej_column_names )


FlowCam column names: 
['Name' 'Area (ABD) (µm²)' 'Area (Filled) (µm²)' 'Aspect Ratio'
 'Biovolume (Cylinder) (µm³)' 'Biovolume (P. Spheroid) (µm³)'
 'Biovolume (Sphere) (µm³)' 'Capture ID' 'Capture X (px)' 'Capture Y (px)'
 'Ch1 Area' 'Ch2 Area' 'Ch2/Ch1 Ratio' 'Circle Fit' 'Circularity'
 'Circularity (Hu)' 'Convex Perimeter (µm)' 'Convexity'
 'Diameter (ABD) (µm)' 'Diameter (ESD) (µm)' 'Diameter (FD) (µm)'
 'Edge Gradient' 'Elongation' 'Feret Angle Max (°)' 'Feret Angle Min (°)'
 'Fiber Straightness' 'Geodesic Aspect Ratio' 'Geodesic Length (µm)'
 'Geodesic Thickness (µm)' 'Length (µm)' 'Particles Per Chain'
 'Perimeter (µm)' 'Roughness' 'Sphere Complement (µm³)' 'Sphere Count'
 'Sphere Unknown (µm³)' 'Sphere Volume (µm³)' 'Sqrt Circularity'
 'Symmetry' 'Timestamp' 'Volume (ABD) (µm³)' 'Volume (ESD) (µm³)'
 'Width (µm)']

PyImageJ column names: 
[' ' 'Area' 'Mean' 'StdDev' 'Min' 'Max' 'X' 'Y' 'Perim.' 'BX' 'BY' 'Width'
 'Height' 'Major' 'Minor' 'Angle' 'Circ.' 'Feret' 'IntDen' 'Media

In [5]:
def get_column_indicies( is_flowcam = True):
    area_index = None
    perimeter_index = None
    diameter_index = None

    for item in list(["area", "perimeter", ("diameter", is_flowcam)]):
        if item == "area":
            area_index = int( input( prompt="Enter the index of the area column: ") )
        elif item == "perimeter":
            perimeter_index = int( input(prompt="Enter rthe index of the perimeter column: "))
        # Set diameter index if the dataset is flowcam
        elif isinstance(item, tuple) and item[0] == "diameter" and item[1] == True:
            diameter_index = int( input(prompt="Enter the index of the diameter column: "))
        elif isinstance(item, tuple) and item[0] == "diameter" and item[1] == False:
            diameter_index = None
    
    return area_index, perimeter_index, diameter_index

### unpacking indicies results for each dataframe 

In [6]:
flow_area_index, flow_perimeter_index, flow_diameter_index = get_column_indicies( is_flowcam=True)


In [12]:
imagej_area_index, imagej_perimeter_index, imagej_diameter_index = get_column_indicies( is_flowcam=False)

In [8]:
# Determine area perimeter diameter column names

flow_area = flow_column_names[flow_area_index]
flow_perimeter = flow_column_names[flow_perimeter_index]
flow_diameter = flow_column_names[flow_diameter_index]

imagej_area = imagej_column_names[imagej_area_index]
imagej_perimeter = imagej_column_names[imagej_perimeter_index]

In [15]:
print(f"FlowCam (area, perimeter, diameter) values are: ({flow_area, flow_perimeter,flow_diameter}).")

print(f"PyImageJ (area, perimeter, diameter) values are: ({imagej_area, imagej_perimeter, imagej_diameter_index}).")


FlowCam (area, perimeter, diameter) values are: (('Area (Filled) (µm²)', 'Perimeter (µm)', 'Diameter (ABD) (µm)')).
PyImageJ (area, perimeter, diameter) values are: (('Area', 'Perim.', None)).


In [16]:
flow_diameter_df = build_diameter_metrics_dataframe(flow_df,
                                                    flow_area,
                                                    flow_perimeter,
                                                    flow_diameter,
                                                    unit="",
                                                    suf="d",
                                                    FlowCam=True)


                                             
                                                


Target column exist.

123     30.31
9       39.11
47      43.34
63      44.61
31      45.57
        ...  
30     107.11
35     110.99
53     114.57
15     144.95
8      180.76
Name: Diameter (ABD) (µm), Length: 144, dtype: float64

total_particles:  144
Target column exist.

123     30.31
9       39.11
47      43.34
63      44.61
31      45.57
        ...  
30     107.11
35     110.99
53     114.57
15     144.95
8      180.76
Name: Diameter (ABD) (µm), Length: 144, dtype: float64

total_particles:  144


In [17]:

imagej_diameter_df = build_diameter_metrics_dataframe(imagej_df,
                                                      imagej_area,
                                                      imagej_perimeter,
                                                      "eff_diameter_microns",
                                                      unit="microns",
                                                      suf="d",
                                                      FlowCam=False)

A KeyError has occured: <class 'KeyError'>. 
The input dataframe consist of one file.

Target column exist.

92      27.081100
107     33.405436
134     36.326870
73      37.401421
93      37.732804
          ...    
16      82.019708
22      84.947963
40      87.365022
117    108.439269
68     136.022615
Name: eff_diameter_microns, Length: 144, dtype: float64

total_particles:  144
Target column exist.

92      27.081100
107     33.405436
134     36.326870
73      37.401421
93      37.732804
          ...    
16      82.019708
22      84.947963
40      87.365022
117    108.439269
68     136.022615
Name: eff_diameter_microns, Length: 144, dtype: float64

total_particles:  144


In [39]:
# Define function to extract columns
def get_column_names(df, guess_name):
    filtered_df = df.filter( like= guess_name)
    return filtered_df.columns.values

def index_series(df, guess_name):
    names = get_column_names( df, guess_name )

    if len(names) == 1:
        # If there is only one matching column, use it
        x_data = df[names[0]]
    else:
        #if there are multiple matching columns
        print( "The following column names were found: " )
        for idx, name in enumerate( names ):
            print(f"{idx}: {name}")
        # Prompt user for input
        user_index = int( input(prompt="Select the index of the column wanted") )
        #Ensure the user_index is valid
        if 0 <= user_index <= len(names):
            x_data = df[names[user_index]]
        else:
            print( "Invalid index Selection." )
    return x_data


# For earlier steps
def index_column_name(df, guess_name):
    names = get_column_names( df, guess_name )

    if len(names) == 1:
        # If there is only one matching column, use it
        column_index = df.columns.tolist().index( names[0] ) 
        column_name = df.columns.values[ column_index ]
        
    else:
        #if there are multiple matching columns
        print( "The following column names were found: " )
        for idx, name in enumerate( names ):
            print(f"{idx}: {name}")
        # Prompt user for input
        user_index = int( input(prompt="Select the index of the column wanted") )
        # Ensure the user_index is valid
        if 0 <= user_index <= len(names):
            column_index = df.columns.tolist().index( names[0] ) 
            column_name = df.columns.values[ column_index ]
        else:
            print( "Invalid index Selection." )
    return column_name

# Invoke function : index_column_names
# flow_area_name1 = index_column_name(flow_df,"Area" )
# # flow_df.columns.tolist()
# print( flow_area_name1 )


In [42]:
# FlowCam Data
x1_data = index_series(flow_diameter_df, "Diameter (ABD)")
y1_data = index_series( flow_diameter_df, "cumulative")

# PyImageJ Data
x2_data = index_series( imagej_diameter_df, "diameter")
y2_data = index_series( imagej_diameter_df, "cumulative")


The following column names were found: 
0: cumulative frequency d
1: cumulative_%_particle d
The following column names were found: 
0: cumulative frequency d
1: cumulative_%_particle d


In [113]:
# Create Traces
trace1 = go.Scatter(
    x=x1_data,
    y=y1_data,
    name="FlowCam Series",
    mode="lines",
    
)

trace2 = go.Scatter(
    x=x2_data,
    y=y2_data,
    name="PyImageJ Series",
    mode="lines",
)

In [118]:
# Create Layout
mylayout = go.Layout( 
    title="Virgin Ti64 Powder Run #1 - Count (%) vs Effective Diameter (microns)",
    width=600, height= 400,
    plot_bgcolor="White",
    paper_bgcolor="white",
    xaxis=dict(
        title="Effective Diameter (microns)", 
        ticks="outside", range=[0, 200], dtick=20, showticklabels=True, # tick numbers
        zeroline=True, zerolinewidth=1, zerolinecolor="black",
        ),
    yaxis=dict(
        title="Cumulative Percentage (%)", 
        ticks="outside", range=[0,110], dtick=10, showticklabels=True,
        zeroline=True, zerolinewidth=1, zerolinecolor="black"
        ),
    legend= dict(
        bordercolor="Black",
        title=dict(text="Software"),),)



ValueError: 
    Invalid value of type 'builtins.int' received for the 'autosize' property of layout
        Received value: 11

    The 'autosize' property must be specified as a bool
    (either True, or False)

In [115]:
# Create figure 1
figure1 = go.Figure(data=[trace1, trace2], layout=mylayout)

In [116]:
# Display figure 
figure1.show()

# save = True
