# Image Pre-processing 


In [None]:
""" All the imports needed by the main file """
import cv2 as cv
import matplotlib.pyplot as plt 
import numpy as np
import os
from utils import *
from preprocessing import *

## Resizing of Images 

Images in our dataset are large and will take time to be processed, an efficient way to solve this can be resizing the image to a smaller dimension. This will help us by: 
1. Easier to process
2. Orignal features will be kept intact with a smaller size


In [None]:
# Only run if you need to change the orignal size of the image.
files = get_all_files("./dataset")
for file in files:
    img = cv.imread(file)
    fileName = file.split("/")[-1].replace("JPG", "jpg")
    updated = cv.resize(img, (1200, 1200))
    if not os.path.isdir("./resized_dataset"):
        os.mkdir("./resized_dataset")
    cv.imwrite(f"./resized_dataset/{fileName}", updated)

In [None]:
""" Global variables/constants """
if os.path.isdir("./resized_dataset"):
    files = get_all_files("./resized_dataset")
else:
    files = get_all_files("./dataset")

# Image filtering and smoothing

We have to choose a filter that smoothens our image while preserving the important edges. We have the two following filter that we can use:
1. Gaussian Filter
2. Bilateral Filter

In [None]:
# Image pre-processing 
P = PreProcessor()
fig = plt.figure(figsize=(12, 18))

for idx, file in enumerate(files):
    img = cv.imread(file)
    
    gaussian = P.apply_gaussian_filter(img, sigma=1)
    gaussian = cv.cvtColor(gaussian, cv.COLOR_BGR2GRAY)
    bilateral = P.apply_bilateral_filter(img, 9, 75, 75)
    bilateral = cv.cvtColor(bilateral, cv.COLOR_BGR2GRAY)
    
    plt.subplot(6, 2, idx * 2 + 1)
    plt.title(f"Gaussian Filter - Image {idx+1}")
    plt.imshow(gaussian, cmap="gray")
    plt.axis('off')
    
    plt.subplot(6, 2, idx * 2 + 2)
    plt.title(f"Bilateral Filter - Image {idx+1}")
    plt.imshow(bilateral, cmap="gray")
    plt.axis('off')

plt.tight_layout()
plt.savefig("./output/smoothend_outputs.png")
plt.show()

In [None]:
fig = plt.figure(figsize=(18, 18))

# Loop through the first 6 images in the files array
for idx, file in enumerate(files):
    img = cv.imread(file)
    hsv = cv.cvtColor(img, cv.COLOR_BGR2HSV)
    h, s, v = cv.split(hsv)

    # Plot Hue channel
    plt.subplot(6, 3, idx * 3 + 1)
    plt.title(f"Hue - Image {idx+1}")
    plt.imshow(h, cmap="hsv")
    plt.axis('off')

    # Plot Saturation channel
    plt.subplot(6, 3, idx * 3 + 2)
    plt.title(f"Saturation - Image {idx+1}")
    plt.imshow(s, cmap="gray")
    plt.axis('off')

    # Plot Value channel
    plt.subplot(6, 3, idx * 3 + 3)
    plt.title(f"Value - Image {idx+1}")
    plt.imshow(v, cmap="gray")
    plt.axis('off')

plt.tight_layout()
plt.savefig("./output/hsv_comparison.png")
plt.show()


# Edges Detection

We will compare three edge detectors, ```sobel```, ```canny```, ```laplacian```. So far the best results comes out of Laplacian filter.

In [None]:
P = PreProcessor()
fig = plt.figure(figsize=(24, 18))
for idx, file in enumerate(files[:6]):
    img = cv.imread(file)
    gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)

    sobel_edges = P.apply_sobel_filter(gray, direction='both', ksize=3)
    canny_edges = P.apply_canny_edge(gray, threshold1=100, threshold2=200)
    laplacian_edges = cv.Laplacian(gray, cv.CV_64F, ksize=3)
    laplacian_edges = cv.convertScaleAbs(laplacian_edges)

    plt.subplot(6, 4, idx * 4 + 1)
    plt.title(f"Original - Image {idx+1}")
    plt.imshow(cv.cvtColor(img, cv.COLOR_BGR2RGB))
    plt.axis('off')

    plt.subplot(6, 4, idx * 4 + 2)
    plt.title("Sobel Edges")
    plt.imshow(sobel_edges, cmap='gray')
    plt.axis('off')

    plt.subplot(6, 4, idx * 4 + 3)
    plt.title("Canny Edges")
    plt.imshow(canny_edges, cmap='gray')
    plt.axis('off')

    plt.subplot(6, 4, idx * 4 + 4)
    plt.title("Laplacian Edges")
    plt.imshow(laplacian_edges, cmap='gray')
    plt.axis('off')

plt.tight_layout()
plt.savefig("./output/edge_comparison.png")
plt.show()
