# Clear Pre-Processing

1. Resize the images to make them consistent for training.
2. Define augmentation functions

In [15]:
import cv2 
from collections import Counter
from PIL import Image
import imagesize
from tqdm import tqdm
import os 
import random
import numpy as np

In [16]:
from utils import * 

In [17]:
IMAGE_ANNOTATION_PATHS = get_image_paths()
print(len(IMAGE_ANNOTATION_PATHS))
print(IMAGE_ANNOTATION_PATHS[0])

100%|██████████| 7393/7393 [00:00<00:00, 173573.11it/s]

7393
('dataset/images/Egyptian_Mau_167.jpg', 'dataset/annotations/trimaps/Egyptian_Mau_167.png')





## Resizing

### Exploring Dimensions to Determine Ideal Resize

In [18]:
def get_image_sizes(path):
    image_sizes = []
    image_areas = []
    for root, dirs, files in os.walk(path):
        for file in tqdm(files):
            if not os.path.isdir(file):
                if file.endswith('.mat'):
                    img = cv2.imread(os.path.join(root, file))
                    if img is not None:
                        h, w = img.shape[:2]
                    else:
                        continue
                else:
                    w, h = imagesize.get(os.path.join(root, file))

                image_sizes.append((w, h))
                image_areas.append(w * h)
    return image_sizes, image_areas

image_sizes, image_areas = get_image_sizes(ORIGINAL_IMAGE_PATH)
print(image_sizes, image_areas)

100%|██████████| 7393/7393 [00:01<00:00, 5114.29it/s]

[(183, 275), (500, 333), (374, 500), (333, 500), (327, 500), (500, 332), (500, 375), (500, 398), (392, 500), (500, 375), (333, 500), (500, 333), (225, 300), (500, 357), (225, 300), (500, 375), (428, 432), (280, 210), (500, 333), (334, 500), (500, 453), (500, 426), (500, 375), (500, 375), (500, 375), (500, 375), (300, 225), (207, 300), (467, 467), (378, 500), (500, 375), (500, 375), (500, 375), (500, 333), (222, 300), (500, 375), (500, 375), (500, 333), (500, 603), (500, 331), (500, 335), (500, 333), (300, 225), (500, 349), (500, 333), (500, 375), (500, 375), (500, 375), (500, 334), (285, 300), (335, 500), (233, 350), (500, 400), (500, 368), (600, 400), (500, 400), (500, 343), (500, 500), (500, 331), (333, 500), (500, 375), (500, 375), (500, 403), (500, 400), (300, 222), (300, 225), (332, 500), (500, 333), (425, 319), (500, 375), (500, 300), (500, 360), (500, 357), (500, 375), (500, 375), (500, 375), (375, 500), (600, 900), (500, 347), (300, 225), (500, 375), (333, 500), (500, 313), (37




In [19]:
# Top 5 most common dimensions
counter = Counter(image_sizes)

min_dim = min(min(image_sizes, key=lambda x: min(x[0],x[1])))
max_dim = max(max(image_sizes, key=lambda x: max(x[0],x[1])))
avg_dim = np.mean(image_sizes, axis=0)
avg_area = np.mean(image_areas)
sqrt_avg_area = int(np.sqrt(avg_area))

"""
print(f"Largest dimension: {max_dim}")
print(f"Smallest dimension: {min_dim}")
print(f"Average Dimension: {avg_dim}")
print(f"Top 5 most common dimensions: {counter.most_common(5)}")
print(f"Average Area: {avg_area}")
print(f"Square root of Average Area: {sqrt_avg_area}")
"""

# Top 5 most common dimensions: [((500, 375), 1425), ((500, 333), 1072), ((375, 500), 511), ((333, 500), 509), ((300, 225), 266)]
# Smallest dimension: 103
# Largest dimension: 3264
# Average Dimension: [436.74519621 390.91366712]
# Average Area: 174861.48240866035
# Square root of Average Area: 418



'\nprint(f"Largest dimension: {max_dim}")\nprint(f"Smallest dimension: {min_dim}")\nprint(f"Average Dimension: {avg_dim}")\nprint(f"Top 5 most common dimensions: {counter.most_common(5)}")\nprint(f"Average Area: {avg_area}")\nprint(f"Square root of Average Area: {sqrt_avg_area}")\n'


- Best resize dimension is 256x256
- Prevents excessive upscaling of small images.
- Keeps computational efficiency reasonable.
- Maintains a square format for neural networks.
- Avoids unnecessary interpolation artifacts.

### Resizing Images and Saving in Resized Images Folder

In [20]:
# Resizing Images to 256x256 Pixels and save them in a new folder

def resize_images(path, save_path):
    transferred_files = 0
    for root, dirs, files in os.walk(path):
        for file in tqdm(files):
            if not os.path.isdir(file):
                img = cv2.imread(os.path.join(root, file))
                if img is not None:
                    resized_img = cv2.resize(img, (256, 256))
                    cv2.imwrite(os.path.join(save_path, file), resized_img)
                    transferred_files += 1
    
    print(f"Transferred {transferred_files} images to {save_path}")

#resize_images(ORIGINAL_IMAGE_PATH, RESIZED_IMAGE_PATH)


## Augmentation

### Experimenting with Augmentation Techniques

In [21]:
np.arange(0, 10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [22]:
# Define the augmentation pipeline using cv2 
import matplotlib.pyplot as plt
import scipy 

def augment_image(image, annotation):
    """Applies OpenCV-based augmentations to both the image and annotation (mask)."""
    
    # Convert images to numpy arrays
    image = np.array(image)
    annotation = np.array(annotation)

    # Random Horizontal Flip
    if random.random() > 0.5:
        image = cv2.flip(image, 1)
        annotation = cv2.flip(annotation, 1)

    # Convert image and annotation back to PIL format
    image = Image.fromarray(image)
    annotation = Image.fromarray(annotation)

    return image, annotation

""""
# Example usage
print("Image Cleaning")
for i in range(len(IMAGE_ANNOTATION_PATHS)):
    image_path = IMAGE_ANNOTATION_PATHS[i][0]

    if cv2.imread(image_path) is None:
        if image_path.endswith('.mat'):
            print(f"Mat file @ {image_path}")
            Image = Image.open(image_path).convert('RGB')

        else: 
            print(f"Unable to just open it normally, but still jpg {i}")
            img = Image.open(image_path).convert('RGB')
            img = np.array(img)

print("Annotation Cleaning")
for i in range(len(IMAGE_ANNOTATION_PATHS)):
    annotation_path = IMAGE_ANNOTATION_PATHS[i][1]

    if cv2.imread(annotation_path) is None:
        if image_path.endswith('.mat'):
            print(f"Mat file @ {annotation_path}")
        else: 
            print(f"Unable to just open it normally, but still jpg {i}")
            img = Image.open(annotation_path).convert('RGB')
            img = np.array(img)
"""

offending = "/Users/hashim/Desktop/Computer_Vision_Project/dataset/images/Abyssinian_100.mat"
# convert offending image to numy array from mat file
import scipy.io
mat = scipy.io.loadmat(offending)
# print all the keys in the dictionary
print(mat.keys())
mat = mat['binsa']
# view the image
"""
Image Cleaning
Unable to just open it normally, but still jpg 0
Corrupt JPEG data: premature end of data segment
Unable to just open it normally, but still jpg 1813
Unable to just open it normally, but still jpg 2274
Corrupt JPEG data: 240 extraneous bytes before marker 0xd9
Unable to just open it normally, but still jpg 4317
Unable to just open it normally, but still jpg 5514
Mat file @ 5915
Unable to just open it normally, but still jpg 5977
Mat file @ 5998
Mat file @ 6400
Annotation Cleaning
"""


dict_keys(['__header__', '__version__', '__globals__', 'frames', 'binsa'])


'\nImage Cleaning\nUnable to just open it normally, but still jpg 0\nCorrupt JPEG data: premature end of data segment\nUnable to just open it normally, but still jpg 1813\nUnable to just open it normally, but still jpg 2274\nCorrupt JPEG data: 240 extraneous bytes before marker 0xd9\nUnable to just open it normally, but still jpg 4317\nUnable to just open it normally, but still jpg 5514\nMat file @ 5915\nUnable to just open it normally, but still jpg 5977\nMat file @ 5998\nMat file @ 6400\nAnnotation Cleaning\n'

In [23]:
from PIL import Image

corrupt_images = []

for i in range(len(IMAGE_ANNOTATION_PATHS)):
    image_path, annotation_path = IMAGE_ANNOTATION_PATHS[i]

    try:
        with Image.open(image_path) as img:
            img.verify()  # Check if image is corrupt
    except (IOError, SyntaxError) as e:
        print(f"Corrupt Image at Index {i}: {image_path}")
        corrupt_images.append(image_path)

Corrupt Image at Index 5915: dataset/images/Abyssinian_102.mat
Corrupt Image at Index 5998: dataset/images/Abyssinian_100.mat
Corrupt Image at Index 6400: dataset/images/Abyssinian_101.mat


In [24]:
for img_path in corrupt_images:
    try:
        with Image.open(img_path) as img:
            img.save(img_path)  # Overwrites with a clean version
            print(f"Fixed: {img_path}")
    except Exception as e:
        print(f"Failed to fix: {img_path}, Error: {e}")

Failed to fix: dataset/images/Abyssinian_102.mat, Error: cannot identify image file 'dataset/images/Abyssinian_102.mat'
Failed to fix: dataset/images/Abyssinian_100.mat, Error: cannot identify image file 'dataset/images/Abyssinian_100.mat'
Failed to fix: dataset/images/Abyssinian_101.mat, Error: cannot identify image file 'dataset/images/Abyssinian_101.mat'
