# Air hydrate filtering

This notebook filters air hydrates obtained from the segmentation step to produce the final segmentation result

![title](Filtering_step.png)

#### Import necessary packages

In [None]:
import cv2
import skimage
###################################################
from skimage import color, filters, measure
from skimage import img_as_float, img_as_ubyte
from skimage.feature import canny
from skimage.morphology import closing, dilation, skeletonize
###################################################
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
import pandas as pd
###################################################
from scipy import ndimage as ndi
###################################################
import glob
import time
import os
import sys
from pathlib import Path

#### Version control

In [None]:
from datetime import date 
today = date.today().isoformat()

print(f"Notebook last run in {today}")

In [None]:
sys.version #Python

In [None]:
cv2.__version__ #OpenCV

In [None]:
skimage.__version__ #scikit-image

In [None]:
pd.__version__ #Pandas

In [None]:
np.__version__ #Numpy

#### Last edited:

29.10.2024

#### Set paths and name

In [None]:
today = time.strftime("%d_%m_%Y")

In [None]:
### Image series
series = "EDML C"

In [None]:
### Segmentation category
category = "category_3"

In [None]:
### Insert the date when the segmentation step was performed
seg_date = "29_10_2024"

In [None]:
### Insert the directory where your images are saved
img_path = f".../{series}/{category}/"

In [None]:
### Insert the directory where the segmentation results where saved (i.e. save_path in the segmentation jupyter notebook)
path_read = f".../{series}/{seg_date}/{category}"

#### Create paths for cleaned data

Create folders for an optional cleaning step

In [None]:
# path_write = f"{path_read}/cleaned_data/{today}"
# os.makedirs(path_write)

In [None]:
# path_data = f"{path_write}/data"
# os.mkdir(path_data)
# path_visual = f"{path_write}/visual_test"
# os.mkdir(path_visual)
# path_final = f"{path_write}/seg_final"
# os.mkdir(path_final)

#### Get names of images in folder

In [None]:
def load_image_names(img_path):
    img_names = []
    for filename in os.listdir(img_path):
        img_names.append(filename.replace(".tif", "")) ### change .tif to .png etc... depending on the type of your images
    return img_names

In [None]:
names = load_image_names(img_path)

In [None]:
names

## Filtering routine

Create empty lists for saving metadata

In [None]:
filt_time=[]
total_S1=[]
total_S2=[]
ar_area_S1=[]
ar_area_S2=[]

#### Filter variables

Adjust the main filtering variables

In [None]:
area_min=150 ### area in pixel
area_max=10000 ### area in pixel
intensity=50 ### mean object intensity
circularity=0.3

Additional specific variables

In [None]:
size = 320 ### area in pixel
aspect_ratio = 2.3

#### Variables for cleaning

Optional cleaning step

In [None]:
# eq_diameterI
# circ_clean
# int_min
# int_max

In [None]:
### Use this to only filter certain files in each category. Switch out "selected" for "names" in the loop below. 
selected = [names[1], names[3], names[5], names[8]]
selected

#### Measure process time

In [None]:
start_all = time.time()

#### Filtering loop

The following for-loop automatically loads and filters all the images included in the choosen directory (or selected names above).

In [None]:
a=0
for img in names: ### Put "selected" instead of "names" here to filter above selected images.
    now=time.strftime("%d%m%Y-%H%M")
    filt_time.append(now)
    ##############Load the images##############
    original=cv2.imread(f"{img_path}{img}.tif", cv2.IMREAD_GRAYSCALE) ### change .tif to .png etc... depending on the type of your images
    segmented_S1=cv2.imread(f"{path_read}/segmented/{img}_segmented_S1.tif", cv2.IMREAD_GRAYSCALE)
    segmented_S3=cv2.imread(f"{path_read}/segmented/{img}_segmented_S3.tif", cv2.IMREAD_GRAYSCALE)
    ##############Label the segmented image####
    img_label_S1=measure.label(segmented_S1>0)
    img_label_S2=measure.label(segmented_S3>0)
    ##############Regionproperties#############
    props_S1=measure.regionprops_table(img_label_S1,original,
    properties=("label","area","equivalent_diameter_area","perimeter","coords",
                "axis_minor_length","axis_major_length","eccentricity",
                "intensity_mean"))
    
    props_S2=measure.regionprops_table(img_label_S2,original,
    properties=("label","area","equivalent_diameter_area","perimeter","coords",
                "axis_minor_length","axis_major_length","eccentricity",
                "intensity_mean"))
    ##############Data#########################
    data_1=pd.DataFrame(props_S1)
    data_2=pd.DataFrame(props_S2)
    full_S1 = len(data_1)
    total_S1.append(full_S1)
    full_S2 = len(data_2)
    total_S2.append(full_S2)
    ##############Filter the data S1###########
    data_filtered_1=data_1.copy()
    data_filtered_1=data_filtered_1[(area_min < data_filtered_1['area']) & (data_filtered_1['area'] < area_max)]
    data_filtered_1=data_filtered_1[data_filtered_1["intensity_mean"]>intensity]
    data_filtered_1["circularity"] = 4*(np.pi)*(data_filtered_1["area"]/np.square(data_filtered_1["perimeter"]))
    ########Additional noise filter for cleaning step#########
    ### Uncomment the following 3 lines for individual cleaning variables.
    #filtered_values = data_filtered_1.loc[(data_filtered_1['equivalent_diameter_area'] < eq_diameterI) & 
    #                        (data_filtered_1['circularity'] > circ_clean) & 
    #                        (data_filtered_1['intensity_mean'] < int_max) & (data_filtered_1['intensity_mean'] > int_min)]
    ######Filter für aspect-ratio and area S1######
    data_filtered_1["AR"] = (data_filtered_1["axis_major_length"])/(data_filtered_1["axis_minor_length"])
    filtered_values_1_ar = data_filtered_1.loc[(data_filtered_1['area'] < size) & 
                            (data_filtered_1['AR'] > aspect_ratio)]
    filtered_index_1_ar=filtered_values_1_ar.index.to_numpy(copy=True)
    ar_filt_S1 = len(filtered_index_1_ar) # Count the amount of filtered objects.
    ar_area_S1.append(ar_filt_S1)
    data_filtered_1 = data_filtered_1.drop(filtered_index_1_ar)
    ##############Filter the data S2###########
    data_filtered_2=data_2.copy() 
    data_filtered_2=data_filtered_2[(area_min < data_filtered_2['area']) & (data_filtered_2['area'] < area_max)]
    data_filtered_2["circularity"] = 4*(np.pi)*(data_filtered_2["area"]/np.square(data_filtered_2["perimeter"]))
    data_filtered_2=data_filtered_2[data_filtered_2["intensity_mean"]>intensity]
    data_filtered_circ_2=data_filtered_2[data_filtered_2["circularity"]> circularity]
    ########Additional noise filter for cleaning step S2#########
    ### Uncomment the following 3 lines for individual cleaning variables.
    #filtered_values_2 = data_filtered_circ_2.loc[(data_filtered_circ_2['equivalent_diameter_area'] < eq_diameterI) & 
    #                        (data_filtered_circ_2['circularity'] > circ_clean) & 
    #                        (data_filtered_circ_2['intensity_mean'] < int_max) & (data_filtered_circ_2['intensity_mean'] > int_min)]
    ######Filter für aspect-ratio and area S2######
    data_filtered_circ_2["AR"] = (data_filtered_circ_2["axis_major_length"])/(data_filtered_circ_2["axis_minor_length"])
    filtered_values_2_ar = data_filtered_circ_2.loc[(data_filtered_circ_2['area'] < size) & 
                            (data_filtered_circ_2['AR'] > aspect_ratio)]
    filtered_index_2_ar=filtered_values_2_ar.index.to_numpy(copy=True)
    ar_filt_S2 = len(filtered_index_2_ar) # Count the amount of filtered objects.
    ar_area_S2.append(ar_filt_S2)
    data_filtered_circ_2 = data_filtered_circ_2.drop(filtered_index_2_ar)
    ############Plot data_1 on image############
    x_1=data_filtered_1["coords"].to_numpy(copy=True)
    base_1=np.zeros(original.shape, dtype=np.uint8)
    for y in x_1:
        for coord in y:
            base_1[coord[0],coord[1]]=255
    ############Plot data_2 on image############
    x_2=data_filtered_circ_2["coords"].to_numpy(copy=True)
    base_2=np.zeros(original.shape, dtype=np.uint8)
    for y in x_2:
        for coord in y:
            base_2[coord[0],coord[1]]=255        
    ##############Merge data###################   
    final=img_as_ubyte(np.where(((base_1==255) | (base_2==255)),255,0))
    cv2.imwrite(f"{path_read}/seg_final/{img}_filtered_II.tif", final) ### FINAL binary segmented image.
    ##############Visual test##################
    base=np.zeros(original.shape, dtype=np.uint8)
    r=img_as_ubyte(final)
    g=base
    b=base
    bgr=cv2.merge((b,g,r))
    new=cv2.cvtColor(original,cv2.COLOR_GRAY2BGR)
    visual_test = cv2.addWeighted(new,0.6,bgr,0.4,0)
    cv2.imwrite(f"{path_read}/visual_test/{img}_filtered_II.jpg",visual_test) ### Saves an image (jpg) for a FINAL visual assessment.
    ###########################################
    img_label_final=measure.label(final>0)
    props_final=measure.regionprops_table(img_label_final,original,
    properties=("label","area","equivalent_diameter_area","perimeter","centroid",
                "axis_minor_length","axis_major_length","feret_diameter_max","eccentricity",
                "orientation","intensity_mean","slice","coords"))
    data_final=pd.DataFrame(props_final)
    data_final["circularity"] = 4*(np.pi)*(data_final["area"]/np.square(data_final["perimeter"]))
    # arr = data_final["coords"].to_numpy()
    # np.save(f"{path_read}/data/{img}_coords.npy", arr)
    data_save=data_final.drop(columns=["coords"])
    data_save.to_csv(f"{path_read}/data/{img}_filtered_II.csv",sep=";") ### FINAL segmented data.
    a=a+1

## Create metadata

Saves the metadata as .csv file

In [None]:
### Change "names" to "selected" as index if you chose to filter selected images.
metadata = pd.DataFrame(columns=['time','Nr_objects_S1','Nr_objects_S2',
                                 '<area_min','>area_max','<intensity','<circularity',
                                 '<size_ar','>AR','filt_AR_S1','filt_AR_S2'],index=names)

In [None]:
metadata['time'] = filt_time
##
metadata['Nr_objects_S1'] = total_S1
metadata['Nr_objects_S2'] = total_S1
##
metadata.loc[:,'<area_min'] = area_min
metadata.loc[:,'>area_max'] = area_max
metadata.loc[:,'<intensity'] = intensity
metadata.loc[:,'<circularity'] = circularity
##
metadata.loc[:,'<size_ar'] = size
metadata.loc[:,'>AR'] = aspect_ratio
metadata['filt_AR_S1'] = ar_area_S1
metadata['filt_AR_S2'] = ar_area_S2

In [None]:
metadata.head()

In [None]:
metadata.to_csv(f"{path_read}/{category}_metadata_filtered_II.csv",sep=";")

#### Total time elapsed

In [None]:
end_all = time.time()
time_all = (end_all-start_all)

In [None]:
print("Elapsed time:", time_all, "seconds")

# Finished