# This file will Visualize FlowCam Exported Data

In [2]:
# Import Libraries, packages, and modules
import os
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
import numpy as np
# from cum_relative_freq import cum_relative_freq
from frequency_operations import *
from diameter_operations import add_eff_diameter
from sphericity_operations import add_sphericity

In [3]:
working_path = "/Users/Daniel/Desktop/particle_analysis/flowcam_results/Results_3_Full_8_14_2024.csv"

if os.path.isfile(working_path): 
    print("Working path is a file:")
    print( working_path.split("/")[-1] )    # Use string method to show output file

else:
    print("Error please provide a valid file path.")

Working path is a file:
Results_3_Full_8_14_2024.csv


## Create a pandas dataframe

In [4]:

# File not encoded with UFT-8

# 1. Try encoding method "ISO-8859-1" (latin1)
method = "latin1"   
try: 
    df = pd.read_csv( working_path, encoding= method )
except UnicodeDecodeError:
    print( "The file could not be decoded with {method} encoding. Try another encoding.".format(method) )

print( f"This dataframe has a shape of {df.shape}.\n" )

This dataframe has a shape of (144, 43).



Information about the dataframe

In [5]:
# df.filter( like="Circularity")
df.dtypes


Name                              object
Area (ABD) (µm²)                 float64
Area (Filled) (µm²)              float64
Aspect Ratio                     float64
Biovolume (Cylinder) (µm³)       float64
Biovolume (P. Spheroid) (µm³)    float64
Biovolume (Sphere) (µm³)         float64
Capture ID                       float64
Capture X (px)                   float64
Capture Y (px)                   float64
Ch1 Area                         float64
Ch2 Area                         float64
Ch2/Ch1 Ratio                    float64
Circle Fit                       float64
Circularity                      float64
Circularity (Hu)                 float64
Convex Perimeter (µm)            float64
Convexity                        float64
Diameter (ABD) (µm)              float64
Diameter (ESD) (µm)              float64
Diameter (FD) (µm)               float64
Edge Gradient                    float64
Elongation                       float64
Feret Angle Max (°)              float64
Feret Angle Min 

* Since Area (ABD) and Area (filled) are identical, I will omit one. This could explain why Area (ESD) doesn't exist?

In [6]:
df.columns

Index(['Name', 'Area (ABD) (µm²)', 'Area (Filled) (µm²)', 'Aspect Ratio',
       'Biovolume (Cylinder) (µm³)', 'Biovolume (P. Spheroid) (µm³)',
       'Biovolume (Sphere) (µm³)', 'Capture ID', 'Capture X (px)',
       'Capture Y (px)', 'Ch1 Area', 'Ch2 Area', 'Ch2/Ch1 Ratio', 'Circle Fit',
       'Circularity', 'Circularity (Hu)', 'Convex Perimeter (µm)', 'Convexity',
       'Diameter (ABD) (µm)', 'Diameter (ESD) (µm)', 'Diameter (FD) (µm)',
       'Edge Gradient', 'Elongation', 'Feret Angle Max (°)',
       'Feret Angle Min (°)', 'Fiber Straightness', 'Geodesic Aspect Ratio',
       'Geodesic Length (µm)', 'Geodesic Thickness (µm)', 'Length (µm)',
       'Particles Per Chain', 'Perimeter (µm)', 'Roughness',
       'Sphere Complement (µm³)', 'Sphere Count', 'Sphere Unknown (µm³)',
       'Sphere Volume (µm³)', 'Sqrt Circularity', 'Symmetry', 'Timestamp',
       'Volume (ABD) (µm³)', 'Volume (ESD) (µm³)', 'Width (µm)'],
      dtype='object')

## Create subsets from the intial dataframe

In [7]:
relavent_data = pd.DataFrame( 
    {
    "Area (Filled) (µm²)": df["Area (Filled) (µm²)"],
    'Aspect Ratio': df['Aspect Ratio'],
    'Diameter (FD) (µm)': df['Diameter (FD) (µm)'],
    'Convexity': df['Convexity'],
    'Convex Perimeter (µm)': df['Convex Perimeter (µm)'],
    'Circularity': df['Circularity'],
    'Elongation': df['Elongation'],
    'Perimeter (µm)': df['Perimeter (µm)'],
    'Roughness': df['Roughness']
    } )

In [8]:
relavent_data.filter( like="Perim", axis="columns" )

Unnamed: 0,Convex Perimeter (µm),Perimeter (µm)
0,316.46,342.68
1,332.28,368.14
2,291.09,318.11
3,226.76,245.22
4,239.80,268.72
...,...,...
139,258.07,285.77
140,176.78,193.23
141,167.69,184.39
142,170.84,186.98


In [9]:
# Create a list of all column names in dataframe
column_names = df.columns.tolist()
print(column_names)


['Name', 'Area (ABD) (µm²)', 'Area (Filled) (µm²)', 'Aspect Ratio', 'Biovolume (Cylinder) (µm³)', 'Biovolume (P. Spheroid) (µm³)', 'Biovolume (Sphere) (µm³)', 'Capture ID', 'Capture X (px)', 'Capture Y (px)', 'Ch1 Area', 'Ch2 Area', 'Ch2/Ch1 Ratio', 'Circle Fit', 'Circularity', 'Circularity (Hu)', 'Convex Perimeter (µm)', 'Convexity', 'Diameter (ABD) (µm)', 'Diameter (ESD) (µm)', 'Diameter (FD) (µm)', 'Edge Gradient', 'Elongation', 'Feret Angle Max (°)', 'Feret Angle Min (°)', 'Fiber Straightness', 'Geodesic Aspect Ratio', 'Geodesic Length (µm)', 'Geodesic Thickness (µm)', 'Length (µm)', 'Particles Per Chain', 'Perimeter (µm)', 'Roughness', 'Sphere Complement (µm³)', 'Sphere Count', 'Sphere Unknown (µm³)', 'Sphere Volume (µm³)', 'Sqrt Circularity', 'Symmetry', 'Timestamp', 'Volume (ABD) (µm³)', 'Volume (ESD) (µm³)', 'Width (µm)']


#### Find index of relavent columns

In [10]:

# Grab index for a specific column name
area_filled_index = column_names.index("Area (Filled) (µm²)")
print( area_filled_index )

perimeter_index = column_names.index("Perimeter (µm)")
print( perimeter_index )


2
31


### Effecive Diameter 

In [11]:


# Add effective diameter column
diameter_data = add_eff_diameter( 
    df = relavent_data,
    area_column_name = column_names[ area_filled_index ],
    perimeter_column_name = column_names[ perimeter_index ],
    unit="µm")
print("\nAdded Effective Diameter column\n")
print( "index of diameter data is: ", diameter_data.columns.tolist().index("eff_diameter_µm") )

print( diameter_data.columns.tolist() )
print( type(diameter_data.columns[2]) )

A KeyError has occured: <class 'KeyError'>. 
The input dataframe consist of one file.


Added Effective Diameter column

index of diameter data is:  2
['Area_µm^2', 'Perim_µm', 'eff_diameter_µm', 'eff_radius_µm']
<class 'str'>


In [12]:
diameter_data

Unnamed: 0,Area_µm^2,Perim_µm,eff_diameter_µm,eff_radius_µm
0,7136.33,342.68,95.321863,47.660931
1,7262.97,368.14,96.163926,48.081963
2,5884.41,318.11,86.557862,43.278931
3,3542.34,245.22,67.158375,33.579188
4,3724.86,268.72,68.866821,34.433411
...,...,...,...,...
139,4408.07,285.77,74.916814,37.458407
140,2081.38,193.23,51.479077,25.739538
141,1833.77,184.39,48.320063,24.160031
142,1925.64,186.98,49.515664,24.757832


#### Determine cumulative frequency for Eff. Diameter 

In [13]:

diameter_data = add_cumulative_frequency(
    df= diameter_data,
    target_column_name= diameter_data.columns[2],
    suffix="dia"
)
print("\nAdded Diamter-cumulative frequency column\n")





Target column exist.

123     30.312263
9       39.112786
47      43.343569
63      44.611380
31      45.572411
          ...    
30     107.107020
35     110.985542
53     114.568055
15     145.128775
8      180.918068
Name: eff_diameter_µm, Length: 144, dtype: float64

total_particles:  144

Added Diamter-cumulative frequency column



#### Determine cumulative percentage for Effective Diameter

In [14]:

diameter_data = add_cumulative_percentage(
    df= diameter_data,
    target_column_name= diameter_data.columns[2],
    column_suffix="dia"
)
print("\nAdded Diamter-cumulative percentage column\n")

diameter_data.head()


Target column exist.

123     30.312263
9       39.112786
47      43.343569
63      44.611380
31      45.572411
          ...    
30     107.107020
35     110.985542
53     114.568055
15     145.128775
8      180.918068
Name: eff_diameter_µm, Length: 144, dtype: float64

total_particles:  144

Added Diamter-cumulative percentage column



Unnamed: 0,Area_µm^2,Perim_µm,eff_diameter_µm,eff_radius_µm,Particle_number,cumulative_frequency_dia,cumulative_%_particle_dia
123,721.65,122.92,30.312263,15.156132,1,1,0.694444
9,1201.51,151.6,39.112786,19.556393,2,2,1.388889
47,1475.5,166.7,43.343569,21.671784,3,3,2.083333
63,1563.08,172.32,44.61138,22.30569,4,4,2.777778
31,1631.15,174.47,45.572411,22.786206,5,5,3.472222


### Sphericity

#### Add effective diameter column

In [15]:

sph_data = add_sphericity( 
    df= relavent_data, 
    area_column_name=column_names[area_filled_index], 
    perim_column_name=column_names[perimeter_index] )
print("\nAdded Sphericity column\n")



Added Sphericity column



#### Determine cumulative frequency for Sphericity

In [16]:
sph_data = add_cumulative_frequency(
    df= sph_data,
    target_column_name="Sphericity",
    suffix="sph"
)
print("\nAdded sphericity-cumulative frequency column\n")

Target column exist.

8      0.533987
16     0.709129
72     0.709143
106    0.732586
15     0.735095
         ...   
87     0.880727
52     0.881548
34     0.881871
60     0.882747
35     0.883450
Name: Sphericity, Length: 144, dtype: float64

total_particles:  144

Added sphericity-cumulative frequency column



#### Determine cumulative percentage for Sphericity 

In [17]:

sph_data = add_cumulative_percentage(
    df= sph_data,
    target_column_name="Sphericity",
    column_suffix="sph"
)
print("\nAdded sphericity-cumulative percentage column\n")

sph_data.head()

Target column exist.

8      0.533987
16     0.709129
72     0.709143
106    0.732586
15     0.735095
         ...   
87     0.880727
52     0.881548
34     0.881871
60     0.882747
35     0.883450
Name: Sphericity, Length: 144, dtype: float64

total_particles:  144

Added sphericity-cumulative percentage column



Unnamed: 0,Area (Filled) (µm²),Aspect Ratio,Diameter (FD) (µm),Convexity,Convex Perimeter (µm),Circularity,Elongation,Perimeter (µm),Roughness,Sphericity,Particle_number,cumulative_frequency_sph,cumulative_%_particle_sph
8,25707.14,0.63,180.92,0.8,698.95,0.31,7.95,1064.39,1.52,0.533987,1,1,0.694444
16,8748.5,0.56,105.54,0.92,390.13,0.58,3.05,467.57,1.2,0.709129,2,2,1.388889
72,3763.82,0.97,69.23,0.92,258.49,0.63,2.6,306.68,1.19,0.709143,3,3,2.083333
106,2395.21,0.76,55.22,0.96,206.13,0.71,1.91,236.82,1.15,0.732586,4,4,2.777778
15,16542.34,0.55,145.13,0.93,524.53,0.6,2.86,620.24,1.18,0.735095,5,5,3.472222


## Plots

In [18]:
print( "columns in daimeter dataframe: ", diameter_data.columns )
print("columns in sphericity datafram: ", sph_data.columns)

columns in daimeter dataframe:  Index(['Area_µm^2', 'Perim_µm', 'eff_diameter_µm', 'eff_radius_µm',
       'Particle_number', 'cumulative_frequency_dia',
       'cumulative_%_particle_dia'],
      dtype='object')
columns in sphericity datafram:  Index(['Area (Filled) (µm²)', 'Aspect Ratio', 'Diameter (FD) (µm)',
       'Convexity', 'Convex Perimeter (µm)', 'Circularity', 'Elongation',
       'Perimeter (µm)', 'Roughness', 'Sphericity', 'Particle_number',
       'cumulative_frequency_sph', 'cumulative_%_particle_sph'],
      dtype='object')


### Relational plots

In [19]:
fig_cpp_dia = px.scatter(diameter_data,
                      x = diameter_data.columns[2],
                      y = diameter_data.columns[6],
                      hover_name= "Particle_number",
                      title="Cumulative Percentage (particle) vs. Effective Diameter (µm)", )
fig_cpp_dia.show()

In [20]:
print( sph_data.columns[9] )

Sphericity


In [21]:
fig_cpp_sph = px.scatter(sph_data,
                      x = sph_data.columns[9],
                      y = sph_data.columns[12],
                      hover_name= "Particle_number",
                      title="Cumulative Percentage (particle) vs. Sphericity ", )
fig_cpp_sph.show()

### Distributional Plots

#### Histograms

In [27]:
fig_hist_dia = px.histogram(diameter_data, 
                            x= diameter_data.columns[2],
                        text_auto=True, marginal="rug",
                        title="Histogramt of Virgin Powder Effective Diameter (µm)",
                        nbins = 40,
                          )
fig_hist_dia.show()

# Expect most virgin particles to have sizes >= 40, <= 100 microns
# Expect  resuse powders to be larger in size, and less sattelite particles (actually good): >= 45 , <= 150

In [23]:
fig_hist_sph = px.histogram(sph_data, 
                        x= sph_data.columns[9],
                        text_auto=True, marginal="rug",
                        title="Histogram of Virgin Powder Sphericity", )
fig_hist_sph.show()

# Expect nearly all of our virgin powder to spherical >0.6
# Expect most of our resue powder to fall betwen 0.2-0.8 sphericity histogram

#### Strip plots

In [24]:

fig_strip_dia = px.strip(diameter_data, 
              x="eff_diameter_µm",
              title="Strip plot of Virgin Powder Effective Diameter (µm)", 
              )
fig_strip_dia.show()


In [25]:

fig_strip_sph = px.strip(sph_data, 
              x="Sphericity", 
              title="Strip plot of Virgin Powder Sphericity", 
              )
fig_strip_sph.show()
