**Experimenting with different image resolutions, from 28x28 upwards**

In [1]:
pip install pillow


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


**Counting the Dataset**

In [3]:
import os

# Function to count the number of images in a directory
def count_images_in_directory(directory):
    image_count = 0
    # Walk through the directory and its subdirectories
    for root, _, files in os.walk(directory):
        # Iterate over the files in the current directory
        for file in files:
            # Check if the file is a JPEG image
            if file.endswith(".jpg"):
                # Increment the image count
                image_count += 1
    return image_count

# Directories to search for images
directories = [
    '/home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/test/images',
    '/home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/train/images',
    '/home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/valid/images'
]

# Iterate over each directory and count the images
for directory in directories:
    image_count = count_images_in_directory(directory)
    print(f"Directory: {directory}, Image Count: {image_count}")

Directory: /home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/test/images, Image Count: 169
Directory: /home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/train/images, Image Count: 3631
Directory: /home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/valid/images, Image Count: 348


**Different of Fractured and non-Fractured Images**

In [4]:
import os

# Function to count the number of images with and without labels in a directory
def count_images_with_labels(directory):
    labeled_image_count = 0
    unlabeled_image_count = 0
    # Iterate over the directory and its subdirectories
    for root, _, _ in os.walk(directory):
        # Get the paths to the image and label directories
        image_dir = os.path.join(root, 'images')
        label_dir = os.path.join(root, 'labels')
        # Iterate over the files in the image directory
        for _, _, files in os.walk(image_dir):
            for file in files:
                # Check if the file is a JPEG image
                if file.endswith(".jpg"):
                    # Get the full paths to the image and label files
                    image_path = os.path.join(image_dir, file)
                    label_path = os.path.join(label_dir, file.replace(".jpg", ".txt"))
                    # Check if the label file exists
                    if os.path.exists(label_path):
                        # Read the content of the label file
                        with open(label_path, 'r') as f:
                            label_content = f.read().strip()
                            # Check if the label has content
                            if label_content:
                                labeled_image_count += 1
                            else:
                                unlabeled_image_count += 1
                    else:
                        unlabeled_image_count += 1
    return labeled_image_count, unlabeled_image_count

# Directories to search for images and labels
directories = [
    '/home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/test',
    '/home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/train',
    '/home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/valid'
]

# Iterate over each directory and count the images with and without labels
for directory in directories:
    labeled_count, unlabeled_count = count_images_with_labels(directory)
    total_count = labeled_count + unlabeled_count
    # Print the counts for each directory
    print(f"Directory: {directory}")
    print(f"Total Images: {total_count}")
    print(f"Images with Labels: {labeled_count}")
    print(f"Images without Labels: {unlabeled_count}\n")

Directory: /home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/test
Total Images: 169
Images with Labels: 83
Images without Labels: 86

Directory: /home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/train
Total Images: 3631
Images with Labels: 1804
Images without Labels: 1827

Directory: /home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/valid
Total Images: 348
Images with Labels: 173
Images without Labels: 175



**Analysing the Different Dimensions**

In [5]:
from collections import Counter
import os
from PIL import Image

# Function to retrieve dimensions of images in a directory
def get_image_dimensions(directory):
    dimensions = []
    # Iterate over the directory and its subdirectories
    for root, _, files in os.walk(directory):
        for file in files:
            # Check if the file is a JPEG image
            if file.endswith(".jpg"):
                # Get the full path to the image file
                image_path = os.path.join(root, file)
                # Open the image file and get its dimensions
                with Image.open(image_path) as img:
                    dimensions.append(img.size)
    return dimensions

# Function to count the occurrences of each image dimension
def count_image_dimensions(dimensions):
    dimension_counts = Counter(dimensions)
    return dimension_counts

# Function to retrieve the top N dimensions with the highest counts
def top_n_dimensions(dimension_counts, n=5):
    top_n = dimension_counts.most_common(n)
    return top_n

# Function to count the number of dimensions with only one occurrence
def count_single_dimensions(dimension_counts):
    single_count_dimensions = sum(1 for count in dimension_counts.values() if count == 1)
    return single_count_dimensions

# Directories containing the images
directories = [
    '/home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/test/images',
    '/home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/train/images',
    '/home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/valid/images'
]

# Iterate over each directory and process the images
for directory in directories:
    print(f"Directory: {directory}\n")
    # Retrieve dimensions of images in the directory
    dimensions = get_image_dimensions(directory)
    # Count the occurrences of each image dimension
    dimension_counts = count_image_dimensions(dimensions)
    # Retrieve the top 5 dimensions with the highest counts
    top_5 = top_n_dimensions(dimension_counts)
    # Count the number of dimensions with only one occurrence
    single_count = count_single_dimensions(dimension_counts)
    
    # Print the top 5 dimensions with the highest counts
    print("Top 5 dimensions with the highest counts:")
    for i, (dimension, count) in enumerate(top_5, 1):
        print(f"{i}. Dimension: {dimension}, Count: {count}")

    # Print the number of images with one unique dimension
    print("\nThe number of images with one unique dimension:" f" {single_count}\n")

Directory: /home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/test/images

Top 5 dimensions with the highest counts:
1. Dimension: (406, 512), Count: 20
2. Dimension: (420, 512), Count: 18
3. Dimension: (512, 406), Count: 15
4. Dimension: (379, 512), Count: 6
5. Dimension: (512, 420), Count: 6

The number of images with one unique dimension: 79

Directory: /home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/train/images

Top 5 dimensions with the highest counts:
1. Dimension: (420, 512), Count: 518
2. Dimension: (406, 512), Count: 443
3. Dimension: (512, 406), Count: 255
4. Dimension: (512, 512), Count: 129
5. Dimension: (512, 420), Count: 117

The number of images with one unique dimension: 0

Directory: /home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/valid/images

Top 5 dimensions with the highest counts:
1. Dimension: (420, 512), Count: 48
2. Dimension: (406, 512), Count: 32
3. Dimension: (512, 406), Count: 30
4. Dimension: (512, 512), Count:

In [6]:
import os
from PIL import Image

def find_min_max_dimensions(directory):
    min_length = float('inf')
    min_width = float('inf')
    max_length = 0
    max_width = 0
    min_image_path = None
    max_image_path = None
    
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(".jpg"):  # Assuming images are in JPEG format
                image_path = os.path.join(root, file)
                with Image.open(image_path) as img:
                    width, height = img.size
                    if width * height < min_length * min_width:
                        min_length = width
                        min_width = height
                        min_image_path = image_path
                    if width * height > max_length * max_width:
                        max_length = width
                        max_width = height
                        max_image_path = image_path
    
    return min_length, min_width, min_image_path, max_length, max_width, max_image_path

directories = [
    '/home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/test',
    '/home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/train',
    '/home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/valid'
]

for directory in directories:
    min_length, min_width, min_image_path, max_length, max_width, max_image_path = find_min_max_dimensions(directory)
    print(f"Directory: {directory}")
    print(f"Image with Smallest Dimensions: {min_image_path}, Dimensions: {min_length}x{min_width}")
    print(f"Image with Largest Dimensions: {max_image_path}, Dimensions: {max_length}x{max_width}\n")

Directory: /home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/test
Image with Smallest Dimensions: /home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/test/images/image1_4521_png.rf.9d460c80d2b830fc35b17c628a6d6290.jpg, Dimensions: 119x512
Image with Largest Dimensions: /home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/test/images/distal-humerus-fracture-1_jpg.rf.831cb137cfcbde1079f86abd5f5f2867.jpg, Dimensions: 801x2048

Directory: /home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/train
Image with Smallest Dimensions: /home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/train/images/image1_3_png.rf.4f3936b1954ddb019efef8efe3594f6e.jpg, Dimensions: 135x406
Image with Largest Dimensions: /home/elt/rw372/notebook/COMP8260 - A.I Systems/Project/Dataset/train/images/elbow-dislocation-with-coronoid-process-fracture_jpg.rf.75b67d2e40fe637ba2838e1283f970c5.jpg, Dimensions: 2048x2048

Directory: /home/elt/rw372/notebook/COMP8260 -