In [8]:
import cv2
import numpy as np
import glob
import pickle
import os
from google.colab.patches import cv2_imshow
from google.colab import drive

# Initialize the path to the shared folder globally
data_path = '/content/drive/MyDrive/ML final project'
os.makedirs(data_path + '/SerializedData/', exist_ok=True)  # Create SerializedData directory

# Path to the serialized data stored in the drive
serialized_file_path = '/content/drive/MyDrive/ML final project/SerializedData/'


# Mounts drive to google drive
drive.mount('/content/drive')

# Global definition for Cascade Classifier from google drive
eye_cascade = cv2.CascadeClassifier(data_path + '/code/haarcascade_eye.xml')

imgs_paths = []  # List to store file paths for all images
eye_detected_imgs_paths = []  # List to store file paths for images with detected eyes




Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Data Pre-processing

The first block of code will load the previously saved serialized data from the shared drive.

The second block is the code used to create the serialized data from the dataset's images.

#### Load All Previously Serialized Data

In [17]:
## Run this if you are starting a new session to load all of the serialized data

# Load the file paths for imgs
with open(os.path.join(data_path, 'SerializedData', 'imgs_paths.pkl'), 'rb') as f:
    imgs_paths = pickle.load(f)

# Load the file paths for eye detected images
with open(os.path.join(data_path, 'SerializedData', 'eye_detected_imgs_paths.pkl'), 'rb') as f:
    eye_detected_imgs_paths = pickle.load(f)

with open(serialized_file_path + 'processed_image_data.pkl', 'rb') as f:
    loaded_data = pickle.load(f)

# Access the loaded variables
iris_eye_detected_imgs = loaded_data['iris_eye_detected_imgs']
images = loaded_data['images']
iris_num = loaded_data['iris_num']
total_images_processed = loaded_data['total_images_processed']


print("File paths loaded successfully.")

File paths loaded successfully.


In [9]:
## Does not need to be run each session

import os
import cv2
import glob

# Define data path and ensure cascade directory exists
os.makedirs(data_path + '/eye_cascade/', exist_ok=True)

# Function to apply thresholding and morphological transformations
def transform_image(img, threshold):
    if threshold == 0:
        _, threshold = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    else:
        retval, threshold = cv2.threshold(img, threshold, 255, cv2.THRESH_BINARY)

    # Apply morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    opening = cv2.morphologyEx(threshold, cv2.MORPH_OPEN, kernel)
    closing = cv2.morphologyEx(threshold, cv2.MORPH_CLOSE, kernel)
    open_close = cv2.bitwise_or(opening, closing, mask=None)

    return open_close, opening, closing

# Initialize lists for images and labels
imgs = []
label = 0
path = "/content/drive/MyDrive/ML final project/datasets/CLASSES_400_300_Part2"

for filepath in glob.iglob(path + '/**', recursive=True):
    num_in_folder = 0
    for filefilepath in glob.iglob(filepath + '/**/*.tiff', recursive=True):
        # Store the file path, folder number, label
        img_colored = cv2.imread(filefilepath)
        img_gray = cv2.cvtColor(cv2.resize(img_colored, (200, 150)), cv2.COLOR_BGR2GRAY)
        imgs_paths.append([filefilepath, num_in_folder, label])  # Save the file path and metadata
        num_in_folder += 1
    label += 1

# Eye detection and processing
eyes_num = 0
for img_path, j, L in imgs_paths:
    # Read and process the image
    img = cv2.imread(img_path)
    img_gray = cv2.cvtColor(cv2.resize(img, (400, 400)), cv2.COLOR_BGR2GRAY)

    # Detect eyes in the image using the eye cascade classifier
    eyes = eye_cascade.detectMultiScale(img_gray, scaleFactor=1.1, minNeighbors=3)

    if len(eyes) > 1:
        print(eyes_num)
        eye_detected_imgs_paths.append([img_path, j, L])  # Save the file path for detected images
        eyes_num += 1

        maxium_area = -3
        for (ex, ey, ew, eh) in eyes:
            area = ew * eh
            if area > maxium_area:
                maxium_area = area
                maxium_width = ew
                point_x = ex
                point_y = ey
                maxium_height = eh

        # Optionally, draw rectangle around largest eye (currently commented out)
        # cv2.rectangle(img, (point_x, point_y), (point_x + maxium_width, point_y + maxium_height), (255, 0, 0), 2)

# Print summary
print("Total eyes found: ", eyes_num)
print("Total images processed: ", len(imgs_paths))

# Save the file paths for imgs and eye_detected_imgs to disk
with open(os.path.join(data_path, 'SerializedData', 'imgs_paths.pkl'), 'wb') as f:
    pickle.dump(imgs_paths, f)

with open(os.path.join(data_path, 'SerializedData', 'eye_detected_imgs_paths.pkl'), 'wb') as f:
    pickle.dump(eye_detected_imgs_paths, f)

print("File paths saved successfully.")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [13]:
import cv2
import numpy as np

# Initialize a counter to track the number of iris images found
iris_num = 0
iris_eye_detected_imgs = []

def is_valid_circle(x, y, r, img_shape, min_radius=20):
    """Check if the circle is within the image bounds and meets the minimum radius condition."""
    return (x + r <= img_shape[1] and y + r <= img_shape[0] and
            x - r > 0 and y - r > 0 and r > min_radius)

# Initialize an array to store the images
images = []

# Load all images into the array from the file paths
for img_path, j, L in eye_detected_imgs_paths:

    # Load the image in color for processing (or grayscale as required)
    c = cv2.imread(img_path)  # Load the color image (original)

    # Load the image in grayscale for Hough Circle detection
    i = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

    # Check if the image was loaded correctly
    if i is not None:
        images.append((i, img_path, j, L, c))  # Store the image array, metadata, and color image
    else:
        print(f"Error loading image: {img_path}")

# Loop through each loaded image for processing
for i, img_path, j, L, c in images:
    # Apply the Hough Circle Transform to detect circles in the grayscale image 'i'
    circles = cv2.HoughCircles(i, cv2.HOUGH_GRADIENT, dp=1.2, minDist=100, param1=50, param2=30, minRadius=20, maxRadius=100)

    if circles is not None:
        # Round the coordinates and radii of the detected circles to integers
        circles = np.round(circles[0, :]).astype("int")

        # Initialize a high maximum average intensity for comparison
        maximum_average = float('inf')
        best_circle = None  # To store the best circle

        # Loop through each detected circle and find the one with the lowest average intensity
        for (x, y, r) in circles:
            if is_valid_circle(x, y, r, i.shape):
                # Define a Region of Interest (ROI) around the detected circle
                new_roi = i[y - r:y + r, x - r:x + r]
                # Calculate the average pixel intensity of the ROI
                average = np.average(new_roi)

                # Update if this circle has the lowest average intensity found so far
                if average < maximum_average:
                    maximum_average = average
                    best_circle = (x, y, r)

        # If no valid circle found, select the circle with the largest radius
        if best_circle is None:
            best_circle = max(circles, key=lambda circle: circle[2])

        # Optionally, draw the detected iris circle on the original color image `c` (optional)
        # cv2.circle(c, (best_circle[0], best_circle[1]), best_circle[2], (255, 255, 0), 4)

        # Save the annotated image to the specified directory on Google Drive with a unique name
        output_filename = f"/content/drive/MyDrive/ML final project/datasets/iris/{L}.{j}.jpg"
        cv2.imwrite(output_filename, c)

        # Add the detected iris image to the iris_eye_detected_imgs list
        iris_eye_detected_imgs.append((img_path, j, L, c))

        # Increment the iris count
        iris_num += 1

# Print the total number of iris images found
print("total_iris_found =", iris_num)

# Print the total number of images processed
print("total images number", len(images))


# Variables to serialize
data_to_serialize = {
    'iris_eye_detected_imgs': iris_eye_detected_imgs,
    'images': images,
    'iris_num': iris_num,
    'total_images_processed': len(images),
}

# Open the file in write-binary mode and serialize the variables
with open(serialized_file_path + 'processed_image_data.pkl', 'wb') as f:
    pickle.dump(data_to_serialize, f)

print(f"Serialized data has been saved to {serialized_file_path + 'processed_image_data.pkl'}")


total_iris_found = 714
total images number 714


IsADirectoryError: [Errno 21] Is a directory: '/content/drive/MyDrive/ML final project/SerializedData/'

In [15]:
# Open the file in write-binary mode and serialize the variables
with open(serialized_file_path + 'processed_image_data.pkl', 'wb') as f:
    pickle.dump(data_to_serialize, f)

print(f"Serialized data has been saved to {serialized_file_path}")


Serialized data has been saved to /content/drive/MyDrive/ML final project/SerializedData/


In [21]:
import cv2
import numpy as np
import os
import random
import pickle

# Function to load image from file path
def load_image(image_path):
    # Check if the image file exists
    if not os.path.exists(image_path):
        print(f"Error: The file {image_path} does not exist.")
        return None

    # Load the image using OpenCV
    img = cv2.imread(image_path)

    if img is None:
        print(f"Error: Failed to load image from {image_path}.")
    return img

# Updated transform_image function
def transform_image(img, threshold):
    # Ensure the image is in grayscale
    if len(img.shape) == 3:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    if threshold == 0:
        # Apply Otsu's thresholding
        _, thresholded_img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    else:
        # Apply threshold with provided value
        _, thresholded_img = cv2.threshold(img, threshold, 255, cv2.THRESH_BINARY)

    # Perform morphological operations (opening, closing)
    kernel = np.ones((5, 5), np.uint8)
    opening = cv2.morphologyEx(thresholded_img, cv2.MORPH_OPEN, kernel)
    closing = cv2.morphologyEx(thresholded_img, cv2.MORPH_CLOSE, kernel)

    return thresholded_img, opening, closing

# Assume 'imgs' is already defined as a list of tuples containing image path, label, and image data
imgs = iris_eye_detected_imgs

# Define a 5x5 kernel for image transformations (e.g., dilation or erosion)
kernel = np.ones((5,5), np.uint8)

# Shuffle the images to randomize the processing order
random.shuffle(imgs)

# Initialize lists to store the final output, labels, and test images
test = []
final_output = []
labels = []

# Loop through each image and associated metadata in 'imgs'
for i, j, L, c in imgs:
    # Load the image from the given path (assume 'i' is the image path)
    img = load_image(i)

    if img is None:
        continue  # Skip if the image cannot be loaded

    # Perform a transformation on the image 'i' with an initial threshold of 0
    gold, siver, diamond = transform_image(img, 0)
    golden_reference = np.sum(gold)  # Sum the pixel values of the 'gold' transformed image

    # Loop through threshold values from 10 to 1000 in increments of 10
    for k in range(10, 1000, 10):
        # Apply transformations with the current threshold 'k'
        working_img, opening, closing = transform_image(img, k)
        suming = np.sum(working_img)  # Sum the pixel values of the 'working_img'
        difference = suming - golden_reference  # Calculate the difference from the golden reference

        # If the difference in pixel values is significant (threshold found)
        if difference > 800:
            print(f"The image threshold = {k}")
            print(f"The image name {j}")
            print(" ")

            # Save images with applied transformations for different thresholds
            cv2.imwrite(f"/content/drive/MyDrive/ML final project/datasets/threshold/{L}.{j}.jpg", working_img)
            cv2.imwrite(f"/content/drive/MyDrive/ML final project/datasets/opening/{L}.{j}.jpg", opening)
            cv2.imwrite(f"/content/drive/MyDrive/ML final project/datasets/closing/{L}.{j}.jpg", closing)

            # Find contours in the thresholded image 'working_img'
            contours, _ = cv2.findContours(working_img, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)

            # Draw bounding rectangles around the detected contours if they meet specific criteria
            for z in contours:
                x, y, w, h = cv2.boundingRect(z)
                if x + w < 150 and y + h < 200 and x - w // 4 > 0:
                    cv2.rectangle(working_img, (x, y), (x + w, y + h), (0, 255, 0), -2)
                    cv2.imwrite(f"/content/drive/MyDrive/ML final project/datasets/contour/{L}.{j}.jpg", working_img)

            # Find contours again for further processing
            contours_2, _ = cv2.findContours(working_img, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)

            # Initialize variables to store properties of the largest contour found
            maximum_area = 0
            maximum_width = 0
            point_x = 0
            point_y = 0
            maximum_height = 0

            # Loop through contours to find the largest one by area
            for z in contours_2:
                x, y, w, h = cv2.boundingRect(z)
                new_area = h * w
                if x + w < 150 and y + h < 200 and new_area > maximum_area and x - w // 4 > 0:
                    maximum_area = new_area
                    maximum_width = w
                    point_x = x
                    point_y = y
                    maximum_height = h

            # Define the center of the largest bounding box and set a radius for cropping the region of interest (ROI)
            center_x = point_x + maximum_width // 2
            center_y = point_y + maximum_height // 2
            radius = 40

            # Ensure the cropping region is within bounds and save the cropped ROI
            if center_y - radius > 0 and center_x - radius > 0 and center_y + radius < 200 and center_x + radius < 150:
                new_roi = c[center_y - radius:center_y + radius, center_x - radius:center_x + radius]
                new_roi = cv2.resize(new_roi, (200, 150))
                cv2.imwrite(f"/content/drive/MyDrive/ML final project/datasets/final_iris/{L}.{j}.jpg", new_roi)
            else:
                # Default to using the center of the image if the region goes out of bounds
                center_y = c.shape[0] // 2
                center_x = c.shape[1] // 2
                new_roi = c[center_y - radius:center_y + radius, center_x - radius:center_x + radius]
                new_roi = cv2.resize(new_roi, (200, 150))
                cv2.imwrite(f"/content/drive/MyDrive/ML final project/datasets/final_iris/{L}.{j}.jpg", new_roi)

            # Save the transformed original image and add data to the lists
            cv2.imwrite(f"/content/drive/MyDrive/ML final project/datasets/edging_5/{L}_{j}.jpg", img)
            test.append(img)
            final_output.append(new_roi)
            labels.append(L)

            # Break out of the threshold loop once a suitable threshold is found
            break

# Display the number of final images and labels generated
print(f"The length of final output = {len(final_output)}")
print(f"The number of labels = {len(labels)}")

# Convert lists to numpy arrays for easier storage and access
final_output = np.array(final_output)
print(final_output.shape)

test = np.array(test)
print(test.shape)

# Save the images and labels to files with .pkl extension
with open('/content/drive/MyDrive/ML final project/SerializedData/final_output.pkl', 'wb') as f:
    pickle.dump(final_output, f)

with open('/content/drive/MyDrive/ML final project/SerializedData/test.pkl', 'wb') as f:
    pickle.dump(test, f)

with open('/content/drive/MyDrive/ML final project/SerializedData/labels.pkl', 'wb') as f:
    pickle.dump(labels, f)


The image threshold = 10
The image name 2916
 
The image threshold = 10
The image name 146
 
The image threshold = 10
The image name 2153
 
The image threshold = 10
The image name 139
 
The image threshold = 10
The image name 2648
 
The image threshold = 10
The image name 1699
 
The image threshold = 10
The image name 1907
 
The image threshold = 10
The image name 1242
 
The image threshold = 10
The image name 1202
 
The image threshold = 10
The image name 885
 
The image threshold = 10
The image name 2762
 
The image threshold = 10
The image name 1401
 
The image threshold = 10
The image name 2454
 
The image threshold = 10
The image name 1760
 
The image threshold = 10
The image name 924
 
The image threshold = 10
The image name 1556
 
The image threshold = 10
The image name 1744
 
The image threshold = 10
The image name 1389
 
The image threshold = 10
The image name 1209
 
The image threshold = 10
The image name 2560
 
The image threshold = 10
The image name 2513
 
The image thresho