In [7]:
import cv2
import numpy as np
import glob
import pickle
import os
from google.colab.patches import cv2_imshow
from google.colab import drive

# Initialize the path to the shared folder globally
data_path = '/content/drive/MyDrive/ML final project'
os.makedirs(data_path + '/SerializedData/', exist_ok=True)  # Create SerializedData directory

# Mounts drive to google drive
drive.mount('/content/drive')

# Global definition for Cascade Classifier from google drive
eye_cascade = cv2.CascadeClassifier(data_path + '/code/haarcascade_eye.xml')

imgs_paths = []  # List to store file paths for all images
eye_detected_imgs_paths = []  # List to store file paths for images with detected eyes




Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Data Pre-processing

The first block of code will load the previously saved serialized data from the shared drive.

The second block is the code used to create the serialized data from the dataset's images.

#### SerializedData - Images

In [None]:
## Run this if you are starting a new session to load all of the serialized data

# Load the file paths for imgs
with open(os.path.join(data_path, 'SerializedData', 'imgs_paths.pkl'), 'rb') as f:
    imgs_paths = pickle.load(f)

# Load the file paths for eye detected images
with open(os.path.join(data_path, 'SerializedData', 'eye_detected_imgs_paths.pkl'), 'rb') as f:
    eye_detected_imgs_paths = pickle.load(f)

print("File paths loaded successfully.")

In [6]:
## Does not need to be run each session

import os
import cv2
import glob

# Define data path and ensure cascade directory exists
os.makedirs(data_path + '/eye_cascade/', exist_ok=True)

# Function to apply thresholding and morphological transformations
def transform_image(img, threshold):
    if threshold == 0:
        _, threshold = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    else:
        retval, threshold = cv2.threshold(img, threshold, 255, cv2.THRESH_BINARY)

    # Apply morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    opening = cv2.morphologyEx(threshold, cv2.MORPH_OPEN, kernel)
    closing = cv2.morphologyEx(threshold, cv2.MORPH_CLOSE, kernel)
    open_close = cv2.bitwise_or(opening, closing, mask=None)

    return open_close, opening, closing

# Initialize lists for images and labels
imgs = []
label = 0
path = "/content/drive/MyDrive/ML final project/datasets/CLASSES_400_300_Part2"

for filepath in glob.iglob(path + '/**', recursive=True):
    num_in_folder = 0
    for filefilepath in glob.iglob(filepath + '/**/*.tiff', recursive=True):
        # Store the file path, folder number, label
        img_colored = cv2.imread(filefilepath)
        img_gray = cv2.cvtColor(cv2.resize(img_colored, (200, 150)), cv2.COLOR_BGR2GRAY)
        imgs_paths.append([filefilepath, num_in_folder, label])  # Save the file path and metadata
        num_in_folder += 1
    label += 1

# Eye detection and processing
eyes_num = 0
for img_path, j, L in imgs_paths:
    # Read and process the image
    img = cv2.imread(img_path)
    img_gray = cv2.cvtColor(cv2.resize(img, (400, 400)), cv2.COLOR_BGR2GRAY)

    # Detect eyes in the image using the eye cascade classifier
    eyes = eye_cascade.detectMultiScale(img_gray, scaleFactor=1.1, minNeighbors=3)

    if len(eyes) > 1:
        print(eyes_num)
        eye_detected_imgs_paths.append([img_path, j, L])  # Save the file path for detected images
        eyes_num += 1

        maxium_area = -3
        for (ex, ey, ew, eh) in eyes:
            area = ew * eh
            if area > maxium_area:
                maxium_area = area
                maxium_width = ew
                point_x = ex
                point_y = ey
                maxium_height = eh

        # Optionally, draw rectangle around largest eye (currently commented out)
        # cv2.rectangle(img, (point_x, point_y), (point_x + maxium_width, point_y + maxium_height), (255, 0, 0), 2)

# Print summary
print("Total eyes found: ", eyes_num)
print("Total images processed: ", len(imgs_paths))

# Save the file paths for imgs and eye_detected_imgs to disk
with open(os.path.join(data_path, 'SerializedData', 'imgs_paths.pkl'), 'wb') as f:
    pickle.dump(imgs_paths, f)

with open(os.path.join(data_path, 'SerializedData', 'eye_detected_imgs_paths.pkl'), 'wb') as f:
    pickle.dump(eye_detected_imgs_paths, f)

print("File paths saved successfully.")

Total eyes found:  0
Total images processed:  0
File paths saved successfully.


In [None]:
# Initialize a counter to track the number of iris images found
iris_num = 0

# Loop through each image and its associated metadata in the list of eye-detected images
for i, j, L, c in eye_detected_imgs:

    # Apply the Hough Circle Transform to detect circles in the grayscale image 'i'
    circles = cv2.HoughCircles(i, cv2.HOUGH_GRADIENT, 10, 100)

    # If circles were detected
    if circles is not None:
        # Round the coordinates and radii of the detected circles to integers
        circles = np.round(circles[0, :]).astype("int")

        # Initialize a very high maximum average intensity for comparison
        maxiumum_average = 10000000000000
        key = True  # A flag to indicate if no valid circles were found within the criteria

        # Loop through each detected circle
        for (x, y, r) in circles:
            # Check if the circle is entirely within the image boundaries and has a minimum radius
            if x + r <= max(i.shape) and y + r <= max(i.shape) and x - r > 0 and y - r > 0 and r > 20:
                key = False  # Set the flag to False since a valid circle was found

                # Define a Region of Interest (ROI) around the detected circle
                new_roi = i[y - r:y + r, x - r:x + r]
                # Calculate the average pixel intensity of the ROI
                average = np.average(new_roi)

                # Update if this circle has the lowest average intensity found so far
                if average < maxiumum_average:
                    maxiumum_r = r
                    point_x = x
                    point_y = y
                    maxiumum_average = average

        # If no circle met the criteria (i.e., key is still True)
        if key:
            # Set the average intensity to infinity to ensure this block is only for fallback cases
            average = float('inf')
            maxiumu_raduis = -4  # Initialize a variable to store the maximum radius found

            # Loop again through the circles as a fallback, choosing the largest radius
            for (x, y, r) in circles:
                if r > maxiumu_raduis:
                    maxiumum_r = r
                    point_x = x
                    point_y = y
                    maxiumum_average = average

        # Optionally, draw the detected iris circle on the original color image `c` (currently commented out)
        # cv2.circle(c, (point_x, point_y), maxiumum_r, (255, 255, 0), 4)

        # Save the annotated image to the specified directory on Google Drive with a unique name based on `L` and `j`
        cv2.imwrite("/content/drive/MyDrive/ML final project/datasets/iris/" + str(L) + '.' + str(j) + '.jpg', c)

        # Add the detected iris image to the iris_eye_detected_imgs list
        iris_eye_detected_imgs.append(eye_detected_imgs[iris_num])
        # Increment the iris count
        iris_num += 1

# Print the total number of iris images found
print("total_iris_found = ", iris_num)

# Print the total number of images processed
print("total images number ", len(imgs))


In [None]:
# Copy the list of detected iris images to 'imgs' and shuffle them randomly
imgs = iris_eye_detected_imgs

# Define a 5x5 kernel for image transformations (e.g., dilation or erosion)
kernel = np.ones((5,5), np.uint8)

# Import the random module and shuffle 'imgs' to randomize the processing order
import random
random.shuffle(imgs)

# Initialize lists to store the final output, labels, and test images
test = []
final_output = []
lables = []

# Loop through each image and associated metadata in 'imgs'
for i, j, L, c in imgs:
    # Perform a transformation on the image 'i' with an initial threshold of 0
    # 'gold', 'siver', and 'diamond' represent different transformations of the image
    gold, siver, diamond = transform_image(i, 0)
    golden_refrence = sum(sum(gold))  # Sum the pixel values of the 'gold' transformed image

    # Loop through threshold values from 10 to 1000 in increments of 10
    for k in range(10, 1000, 10):
        # Apply transformations with the current threshold 'k'
        working_img, opening, closing = transform_image(i, k)
        suming = sum(sum(working_img))  # Sum the pixel values of the 'working_img'
        diffrence = suming - golden_refrence  # Calculate the difference from the golden reference

        # If the difference in pixel values is significant (threshold found)
        if diffrence > 800:
            print("The image threshold =", k)
            print("The image name", j)
            print(" ")

            # Save images with applied transformations for different thresholds
            cv2.imwrite("/content/drive/MyDrive/ML final project/datasets/threshold/" + str(L) + '.' + str(j) + '.jpg', working_img)
            cv2.imwrite("/content/drive/MyDrive/ML final project/datasets/opening/" + str(L) + '.' + str(j) + '.jpg', opening)
            cv2.imwrite("/content/drive/MyDrive/ML final project/datasets/closing/" + str(L) + '.' + str(j) + '.jpg', closing)

            # Find contours in the thresholded image 'working_img'
            contours, _ = cv2.findContours(working_img, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)

            # Draw bounding rectangles around the detected contours if they meet specific criteria
            for z in contours:
                x, y, w, h = cv2.boundingRect(z)
                if x + w < 150 and y + h < 200 and x - w // 4 > 0:
                    cv2.rectangle(working_img, (x, y), (x + w, y + h), (0, 255, 0), -2)
                    cv2.imwrite("/content/drive/MyDrive/ML final project/datasets/contour/" + str(L) + '.' + str(j) + '.jpg', working_img)

            # Find contours again for further processing
            contours_2, _ = cv2.findContours(working_img, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)

            # Initialize variables to store properties of the largest contour found
            maxium_area = 0
            maxium_width = 0
            point_x = 0
            point_y = 0
            maxium_height = 0

            # Loop through contours to find the largest one by area
            for z in contours_2:
                x, y, w, h = cv2.boundingRect(z)
                new_area = h * w
                if x + w < 150 and y + h < 200 and new_area > maxium_area and x - w // 4 > 0:
                    maxium_area = new_area
                    maxium_width = w
                    point_x = x
                    point_y = y
                    maxium_height = h

            # Define the center of the largest bounding box and set a radius for cropping the region of interest (ROI)
            center_x = point_x + maxium_width // 2
            center_y = point_y + maxium_height // 2
            radius = 40

            # Ensure the cropping region is within bounds and save the cropped ROI
            if center_y - radius > 0 and center_x - radius > 0 and center_y + radius < 200 and center_x + radius < 150:
                new_roi = c[center_y - radius:center_y + radius, center_x - radius:center_x + radius]
                new_roi = cv2.resize(new_roi, (200, 150))
                cv2.imwrite("/content/drive/MyDrive/ML final project/datasets/final_iris/" + str(L) + '.' + str(j) + '.jpg', new_roi)
            else:
                # Default to using the center of the image if the region goes out of bounds
                center_y = c.shape[0] // 2
                center_x = c.shape[1] // 2
                new_roi = c[center_y - radius:center_y + radius, center_x - radius:center_x + radius]
                new_roi = cv2.resize(new_roi, (200, 150))
                cv2.imwrite("/content/drive/MyDrive/ML final project/datasets/final_iris/" + str(L) + '.' + str(j) + '.jpg', new_roi)

            # Save the transformed original image and add data to the lists
            cv2.imwrite("/content/drive/MyDrive/ML final project/datasets/edging_5/" + str(L) + '_' + str(j) + '.jpg', i)
            test.append(i)
            final_output.append(new_roi)
            lables.append(L)

            # Break out of the threshold loop once a suitable threshold is found
            break

# Display the number of final images and labels generated
print("The length of final output =", len(final_output))
print("The number of labels =", len(lables))

# Convert lists to numpy arrays for easier storage and access
final_output = np.array(final_output)
print(final_output.shape)

test = np.array(test)
print(test.shape)

# Save the processed data and labels using pickle for later use
import pickle

pickle_out = open("test_ubiris.pickle", "wb")
pickle.dump(test, pickle_out)
pickle_out.close()

pickle_out = open("ubiris_features.pickle", "wb")
pickle.dump(final_output, pickle_out)
pickle_out.close()

pickle_out = open("ubiris_lables.pickle", "wb")
pickle.dump(lables, pickle_out)
pickle_out.close()
