# Imports

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from collections import Counter
import seaborn as sns
import yaml
from pathlib import Path

# Paths

In [6]:
# useful constants for late access
dataset_path = "P:/Python programs/BNA-2025/src/Round1/dataset"
training_dataset_path = "P:/Python programs/BNA-2025/src/Round1/dataset/train"
testing_dataset_path = "P:/Python programs/BNA-2025/src/Round1/dataset/val"

# Dataset Preparation:
A function to rename all image files in order from 1 onwards so it is more intuitive and easier to recall later

In [5]:
def rename_files_in_folder(folder_path):
    """This function will rename all files in a folder from 1 onwards in the order they are sorted for intuitive ordering.

    Args:
        folder_path (string): The path to the folder containing the files to be renamed.
    """
    # Get a list of all files in the folder
    files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
    
    # Sort files to ensure they are renamed in order
    files.sort()

    # Rename each file
    for index, file in enumerate(files, start=1):
        # Split the file name and extension
        file_name, file_extension = os.path.splitext(file)
        
        # Generate new file name
        new_name = f"{index}{file_extension}"
        
        # Construct full file paths
        old_file_path = os.path.join(folder_path, file)
        new_file_path = os.path.join(folder_path, new_name)
        
        # Rename the file
        os.rename(old_file_path, new_file_path)

In [4]:
rename_files_in_folder("") # add path

# Image Manipulation
Multiple functions and call blocks to modify distorted images

In [4]:
def invert_color(image_path, destination_path):
    """
    Image inversion is the process of changing the RGB values of a image by subtracting them from the maximum value(255) 
    for each channel. This will result in the image being 'inverted'

    Parameters:
        image_path (str): The file path to the input image.
        destination_path (str): The file path to save the inverted image.

    Returns:
        None
    """
    # Image inversion is the process of changing the RGB values of a image by subtracting them from the maximum value(255) for each 
    # channel. This will result in the image being 'inverted'
    from PIL import Image, ImageOps
    
    # Open the image
    inverted_image = Image.open(image_path)
    # Invert the image
    restored_image = ImageOps.invert(inverted_image)
    restored_image.save(destination_path)
    
def remove_random_noise(image_path, destination_path):
    """
    Removes random noise from an image using a median filter and saves the denoised image to the specified destination.
    Random noise refers to black or white pixels that appear on the screen, similar to old school TVs. This noise can occur due to 
    electromagnetic interference, damaged image sensors, or data corruption. The median filter removes this noise by changing the 
    value of a distorted pixel to the median value of the pixels around it.
    Args:
        image_path (str): The file path of the noisy image.
        destination_path (str): The file path where the denoised image will be saved.
    Returns:
        None
    """
    from PIL import Image
    import numpy as np, cv2

    # Open the image with noise
    noisy_image = Image.open(image_path)
    # Convert to NumPy array
    noisy_image_array = np.array(noisy_image)
    # Apply median filter to remove noise
    denoised_image_array = cv2.medianBlur(noisy_image_array, 3)
    # Convert back to PIL image
    denoised_image = Image.fromarray(denoised_image_array)
    # Save the denoised image
    denoised_image.save(destination_path)
    
def change_brightness(image_path, destination_path, brightness_factor):
    """
    Extreme Brightness levels can either make images too dim making dark parts equal or too bright causing overexposure
    and washing out of details. This function will adjust the brightness of an image by a specified factor.

    Parameters:
        image_path (str): The file path to the input image.
        destination_path (str): The file path to save the adjusted image.
        brightness_factor (float): A factor by which to adjust the brightness. 
                                Values > 1.0 increase brightness, 
                                values < 1.0 decrease brightness.

    Returns:
        None
    """
    from PIL import ImageEnhance, Image

    # Open the distorted image
    dim_image = Image.open(image_path)
    # Restore brightness
    enhancer = ImageEnhance.Brightness(dim_image)
    restored_image = enhancer.enhance(brightness_factor)
    restored_image.save(destination_path)
    
def fix_perspective(image_path, destination_path, resolution):
    """
    Corrects the perspective of a skewed image which is image that is stretches in a particular direction. This function will
    correct that transformation to make it match the specified resolution and aspect ratio.
    
    Args:
        image_path (str): The file path of the skewed image to be corrected.
        destination_path (str): The file path where the restored image will be saved.
        resolution (tuple, optional): The resolution (width, height) of the restored image. 
        
    Returns:
        None
    """
    import cv2, numpy as np
    
    # Read the skewed image
    skewed_image = cv2.imread(image_path)
    rows, cols, ch = skewed_image.shape
    # Define points for reverse perspective transformation
    src_points = np.float32([[50, 0], [cols - 50, 0], [0, rows - 1], [cols - 1, rows - 1]])
    # Calculate the aspect ratio of the destination resolution
    dst_width, dst_height = resolution
    dst_points = np.float32([[0, 0], [dst_width - 1, 0], [0, dst_height - 1], [dst_width - 1, dst_height - 1]])
    # Apply the reverse perspective warp
    matrix = cv2.getPerspectiveTransform(src_points, dst_points)
    restored_image = cv2.warpPerspective(skewed_image, matrix, (dst_width, dst_height))

    # Save the restored image
    cv2.imwrite(destination_path, restored_image)
    
def crop_image(image_path, destination_path, crop_box):
    """
    Crops an image to a specified box and saves the cropped image to the specified destination.
    
    Parameters:
        image_path (str): The file path to the input image.
        destination_path (str): The file path to save the cropped image.
        crop_box (tuple): A tuple (left, upper, right, lower) defining the box to crop.
        
    Returns:
        None
    """
    from PIL import Image
    
    # Open the image
    image = Image.open(image_path)
    # Crop the image using the provided box
    cropped_image = image.crop(crop_box)
    # Save the cropped image
    cropped_image.save(destination_path)

def resize_image(image_path, destination_path, size):
    """
    Resizes an image to the specified size and saves the resized image to the specified destination.
    
    Parameters:
        image_path (str): The file path to the input image.
        destination_path (str): The file path to save the resized image.
        size (tuple): The desired size as (width, height).
        
    Returns:
        None
    """
    from PIL import Image
    
    # Open the image
    image = Image.open(image_path)
    # Resize the image
    resized_image = image.resize(size)
    # Save the resized image
    resized_image.save(destination_path)

def rotate_image(image_path, destination_path, angle):
    """
    Rotates an image by the specified angle and saves the rotated image to the specified destination.
    
    Parameters:
        image_path (str): The file path to the input image.
        destination_path (str): The file path to save the rotated image.
        angle (float): The angle in degrees to rotate the image.
        
    Returns:
        None
    """
    from PIL import Image
    
    # Open the image
    image = Image.open(image_path)
    # Rotate the image by the specified angle
    rotated_image = image.rotate(angle)
    # Save the rotated image
    rotated_image.save(destination_path)

def adjust_contrast(image_path, destination_path, contrast_factor):
    """
    Adjusts the contrast of an image by the specified factor and saves the adjusted image to the specified destination.
    
    Parameters:
        image_path (str): The file path to the input image.
        destination_path (str): The file path to save the adjusted image.
        contrast_factor (float): The factor by which to adjust the contrast. 
                                 Values > 1.0 increase contrast, values < 1.0 decrease contrast.
        
    Returns:
        None
    """
    from PIL import ImageEnhance, Image
    
    # Open the image
    image = Image.open(image_path)
    # Enhance the contrast of the image
    enhancer = ImageEnhance.Contrast(image)
    adjusted_image = enhancer.enhance(contrast_factor)
    # Save the adjusted image
    adjusted_image.save(destination_path)

def mirror_image(image_path, destination_path):
    """
    Mirrors an image horizontally and saves the mirrored image to the specified destination.
    
    Parameters:
        image_path (str): The file path to the input image.
        destination_path (str): The file path to save the mirrored image.
        
    Returns:
        None
    """
    from PIL import Image
    
    # Open the image
    image = Image.open(image_path)
    # Mirror the image horizontally
    mirrored_image = image.transpose(Image.FLIP_LEFT_RIGHT)
    # Save the mirrored image
    mirrored_image.save(destination_path)

def convert_color_scheme(image_path, destination_path, color_mode):
    """
    Converts the color scheme of an image to the specified mode (e.g., 'RGB', 'L', 'CMYK') and saves the converted image.
    
    Parameters:
        image_path (str): The file path to the input image.
        destination_path (str): The file path to save the converted image.
        color_mode (str): The desired color mode ('RGB', 'L', 'CMYK', etc.).
        
    Returns:
        None
    """
    from PIL import Image
    
    # Open the image
    image = Image.open(image_path)
    # Convert the image to the specified color mode
    converted_image = image.convert(color_mode)
    # Save the converted image
    converted_image.save(destination_path)

In [None]:
img_path = '' # enter path
destination_path = '' # enter path
invert_color(img_path, destination_path)

In [None]:
img_path = 'P:/Python programs/BNA-2025/src/Round1/dataset/train/images/IMG_4902.jpg'  # enter path
destination_path = 'P:/Python programs/BNA-2025/src/Round1/dataset/'  # enter path
remove_random_noise(img_path, destination_path)

In [None]:
img_path = ''  # enter path
destination_path = ''  # enter path
brightness_factor = 1.5  # enter desired brightness factor (e.g., 1.5 for brighter)
change_brightness(img_path, destination_path, brightness_factor)

In [None]:
img_path = ''  # enter path
destination_path = ''  # enter path
resolution = (800, 600)  # enter desired resolution (width, height)
fix_perspective(img_path, destination_path, resolution)

In [None]:
img_path = ''  # enter path
destination_path = ''  # enter path
crop_box = (100, 100, 400, 400)  # enter the crop box (left, upper, right, lower)
crop_image(img_path, destination_path, crop_box)

In [None]:
img_path = ''  # enter path
destination_path = ''  # enter path
size = (800, 600)  # enter desired size (width, height)
resize_image(img_path, destination_path, size)

In [None]:
img_path = ''  # enter path
destination_path = ''  # enter path
contrast_factor = 1.5  # enter desired contrast factor (e.g., 1.5 for higher contrast)
adjust_contrast(img_path, destination_path, contrast_factor)

In [None]:
img_path = ''  # enter path
destination_path = ''  # enter path
mirror_image(img_path, destination_path)

In [None]:
img_path = ''  # enter path
destination_path = ''  # enter path
color_mode = 'L'  # enter desired color mode (e.g., 'L' for grayscale, 'RGB' for color)
convert_color_scheme(img_path, destination_path, color_mode)

# Image Labelling UI

In [2]:
! labelImg

# Pre-Training Data Analysis

In [5]:
def load_class_names(data_yaml_path):
    """Load class names from YOLO data.yaml file."""
    with open(data_yaml_path, 'r') as f:
        data = yaml.safe_load(f)
    return data['names']

def plot_class_distribution(class_names, annotations_path):
    """Plot class distribution using class names from data.yaml."""
    class_counts = Counter()

    for file_name in os.listdir(annotations_path):
        if file_name.endswith('.txt'):
            with open(os.path.join(annotations_path, file_name), 'r') as f:
                for line in f:
                    if line.strip():
                        class_id = int(line.split()[0])
                        class_name = class_names[class_id]
                        class_counts[class_name] += 1

    plt.figure(figsize=(12, 6))
    plt.bar(class_counts.keys(), class_counts.values(), color='skyblue')
    plt.title('Class Distribution')
    plt.xlabel('Class Name')
    plt.ylabel('Instance Count')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

def plot_bounding_box_aspect_ratio(annotations_path):
    """Plot distribution of bounding box aspect ratios."""
    aspect_ratios = []

    for file_name in os.listdir(annotations_path):
        if file_name.endswith('.txt'):
            with open(os.path.join(annotations_path, file_name), 'r') as f:
                for line in f:
                    if line.strip():
                        parts = line.split()
                        if len(parts) >= 5:  # Ensure the line has enough values
                            _, _, w, h = map(float, parts[1:5])
                            aspect_ratios.append(w / h)

    plt.figure(figsize=(12, 6))
    plt.hist(aspect_ratios, bins=50, color='salmon', edgecolor='black')
    plt.title('Bounding Box Aspect Ratio (Width/Height)')
    plt.xlabel('Aspect Ratio')
    plt.ylabel('Frequency')
    plt.yscale('log')
    plt.tight_layout()
    plt.show()

def plot_bounding_box_size(annotations_path):
    """Plot distribution of bounding box sizes (normalized area)."""
    areas = []

    for file_name in os.listdir(annotations_path):
        if file_name.endswith('.txt'):
            with open(os.path.join(annotations_path, file_name), 'r') as f:
                for line in f:
                    if line.strip():
                        parts = line.split()
                        if len(parts) >= 5:  # Ensure the line has enough values
                            _, _, w, h = map(float, parts[1:5])
                            areas.append(w * h)

    plt.figure(figsize=(12, 6))
    plt.hist(areas, bins=50, color='lightgreen', edgecolor='black')
    plt.title('Bounding Box Size (Normalized Area)')
    plt.xlabel('Width * Height')
    plt.ylabel('Frequency')
    plt.yscale('log')
    plt.tight_layout()
    plt.show()

def plot_object_size_vs_image_size(annotations_path, images_path):
    """Plot object size vs image size (in pixels)."""
    img_areas, obj_areas = [], []

    for file_name in os.listdir(annotations_path):
        if file_name.endswith('.txt'):
            # Find corresponding image
            img_stem = Path(file_name).stem
            img_files = list(Path(images_path).glob(f"{img_stem}.*"))
            if not img_files:
                continue
            img = Image.open(img_files[0])
            img_w, img_h = img.size
            img_area = img_w * img_h
            
            # Read annotations
            with open(os.path.join(annotations_path, file_name), 'r') as f:
                for line in f:
                    if line.strip():
                        parts = line.split()
                        if len(parts) >= 5:  # Ensure the line has enough values
                            _, _, w_norm, h_norm = map(float, parts[1:5])
                            w = w_norm * img_w
                            h = h_norm * img_h
                            obj_areas.append(w * h)
                            img_areas.append(img_area)

    plt.figure(figsize=(12, 6))
    plt.scatter(img_areas, obj_areas, alpha=0.3, color='purple')
    plt.title('Object Size vs Image Size')
    plt.xlabel('Image Area (pixels)')
    plt.ylabel('Object Area (pixels)')
    plt.xscale('log')
    plt.yscale('log')
    plt.tight_layout()
    plt.show()

def plot_image_resolution_distribution(annotations_path, images_path):
    """Plot distribution of image resolutions."""
    resolutions = []

    for file_name in os.listdir(annotations_path):
        if file_name.endswith('.txt'):
            img_stem = Path(file_name).stem
            img_files = list(Path(images_path).glob(f"{img_stem}.*"))
            if img_files:
                img = Image.open(img_files[0])
                resolutions.append(img.size)

    widths, heights = zip(*resolutions) if resolutions else ([], [])
    plt.figure(figsize=(12, 6))
    plt.hist(widths, bins=30, alpha=0.5, label='Width')
    plt.hist(heights, bins=30, alpha=0.5, label='Height')
    plt.title('Image Resolution Distribution')
    plt.xlabel('Pixels')
    plt.ylabel('Count')
    plt.legend()
    plt.tight_layout()
    plt.show()

def plot_object_density(annotations_path, grid_size=100):
    """Plot heatmap of object density across all images."""
    density = np.zeros((grid_size, grid_size))

    for file_name in os.listdir(annotations_path):
        if file_name.endswith('.txt'):
            with open(os.path.join(annotations_path, file_name), 'r') as f:
                for line in f:
                    if line.strip():
                        parts = line.split()
                        if len(parts) >= 5:  # Ensure the line has enough values
                            x_center, y_center, _, _ = map(float, parts[1:5])
                            x = int(x_center * (grid_size-1))
                            y = int(y_center * (grid_size-1))
                            density[y, x] += 1

    plt.figure(figsize=(12, 6))
    plt.imshow(density, cmap='hot', interpolation='nearest')
    plt.colorbar(label='Object Count')
    plt.title('Object Density Heatmap')
    plt.xlabel('Normalized X')
    plt.ylabel('Normalized Y')
    plt.tight_layout()
    plt.show()

def plot_aspect_ratio_vs_class(class_names, annotations_path):
    """Plot aspect ratio distribution per class using class names."""
    aspect_ratios = {name: [] for name in class_names}

    for file_name in os.listdir(annotations_path):
        if file_name.endswith('.txt'):
            with open(os.path.join(annotations_path, file_name), 'r') as f:
                for line in f:
                    if line.strip():
                        parts = line.split()
                        if len(parts) >= 5:  # Ensure the line has enough values
                            class_id = int(parts[0])
                            w, h = map(float, parts[3:5])
                            aspect_ratio = w / h
                            aspect_ratios[class_names[class_id]].append(aspect_ratio)

    plt.figure(figsize=(12, 6))
    sns.violinplot(data=[aspect_ratios[name] for name in class_names])
    plt.xticks(ticks=range(len(class_names)), labels=class_names, rotation=45)
    plt.title('Aspect Ratio Distribution per Class')
    plt.xlabel('Class')
    plt.ylabel('Aspect Ratio (Width/Height)')
    plt.tight_layout()
    plt.show()

In [None]:
# Configuration paths
data_yaml_path = dataset_path + "/data.yaml"
annotations_path = training_dataset_path + "/labels"
images_path = training_dataset_path + "/images"

# Load class names
class_names = load_class_names(data_yaml_path)

# Generate all plots
plot_class_distribution(class_names, annotations_path)
plot_bounding_box_aspect_ratio(annotations_path)
plot_bounding_box_size(annotations_path)
plot_object_size_vs_image_size(annotations_path, images_path)
plot_image_resolution_distribution(annotations_path, images_path)
plot_object_density(annotations_path)
plot_aspect_ratio_vs_class(class_names, annotations_path)

# Training

In [8]:
# Train the model
# set batch=n if memory is not enough
! yolo task=detect mode=train model=yolov8n.yaml data='P:/Python programs/BNA-2025/src/Round1/dataset/data.yaml' epochs=30 batch=8 imgsz=640 project="C:\Users\muahm\OneDrive\Desktop"

# Testing

In [None]:
# Test the model
# add path, model.pt, image source
# conf means to show only predictions with confidence greater than n
! yolo task=detect mode=predict data="" model="" show=True conf=0.5 source=""

# Post-Training Analysis

In [None]:
confusion_matrix = Image.open() # path to confusion matrix
results = Image.open() # path to results
print("Confusion Matrix:", confusion_matrix)
print("Results:", results)