In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install tensorflow_addons

Collecting tensorflow_addons
  Downloading tensorflow_addons-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (612 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/612.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/612.3 kB[0m [31m3.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m612.3/612.3 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
Collecting typeguard<3.0.0,>=2.7 (from tensorflow_addons)
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: typeguard, tensorflow_addons
Successfully installed tensorflow_addons-0.22.0 typeguard-2.13.3


In [3]:
# Import libraries for data augumentation

import cv2
import os
import numpy as np
import shutil
from skimage import exposure
from PIL import Image
from IPython.display import display
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import random
import time

random.seed(123) # random data augumentation


In [4]:


# this function does the job of image zoom augumentation | input:numpy img outut:zoomed numpy img
def zoom_augmentation(image, zoom_factor):
    # Ensure the zoom factor is within valid range (0.0 to 1.0)
    if zoom_factor <= 0.0 or zoom_factor >= 1.0:
        raise ValueError("Zoom factor must be between 0.0 and 1.0")

    height, width, _ = image.shape

    # Calculate the cropping region
    crop_height = int(height * zoom_factor)
    crop_width = int(width * zoom_factor)

    # Calculate the starting point for cropping
    start_x = (width - crop_width) // 2
    start_y = (height - crop_height) // 2

    # Crop the image to achieve zoom
    zoomed_image = image[start_y:start_y + crop_height, start_x:start_x + crop_width]

    # Resize the cropped region back to the original image size
    zoomed_image = cv2.resize(zoomed_image, (width, height))
    return zoomed_image


#--> This function combines many agumentations and output an augumented image in numpy format
def augment_image(image,
                  rotation_angle=0,
                  horizontal_flip=False,
                  vertical_flip=False,
                  brightness_factor=None,
                  apply_hist_eq=False,
                  zoom_factor=1.0):

    augmented_image= image.copy()

    # Rotation
    if rotation_angle != 0:
        rows, cols, _ = augmented_image.shape
        M = cv2.getRotationMatrix2D((cols / 2, rows / 2), rotation_angle, 1)
        augmented_image = cv2.warpAffine(augmented_image, M, (cols, rows))


    # Horizontal flip
    if horizontal_flip:
        augmented_image = cv2.flip(augmented_image, 1)


    # Vertical flip
    if vertical_flip:
        augmented_image = cv2.flip(augmented_image, 0)


    # Brightness adjustment
    if brightness_factor is not None:
        augmented_image = cv2.convertScaleAbs(augmented_image, alpha=brightness_factor, beta=0)


    # histogram equilization
    if apply_hist_eq:
        augmented_image = exposure.equalize_adapthist(augmented_image, clip_limit=0.008)
        augmented_image = (augmented_image*250).astype('uint8')


    # apply zoom
    if zoom_factor!=1.0: # lower is no. higher is zoom | 0.0 to 1.0
        augmented_image = zoom_augmentation(augmented_image, zoom_factor)


    return augmented_image



# ## example use of image augumentation displaying below
# image = cv2.imread('test/0Normal/NormalG0 (433).png')
# image = augment_image(image, brightness_factor=1.0, zoom_factor=1.0, rotation_angle=10, apply_hist_eq=True, horizontal_flip=True, vertical_flip=True)
# display(Image.fromarray((image).astype('uint8')))



In [5]:
# --> Function to save augumented images

def auto_augument_img(image, generate_qty):
    augumented_images = []


    # Here is augumentation parameters | modify if needed
    rotation_angle = [ +12, 0, -12]
    brightness_factor = [1.0, 0.8]
    vertical_flip = [False, True]
    horizontal_flip = [True]
    apply_hist_eq = [False] # if change to [True], also uncomment hist eql. for original img (below code)
    zoom_factor = [1.0, 0.9]

    total_possible_img_count = len(rotation_angle)* len(brightness_factor)* len(vertical_flip)* len(horizontal_flip)*len(zoom_factor) # 240
    skip_factor = generate_qty/total_possible_img_count # 14/240 = 0.0058
    for rotatation in rotation_angle:
        for bright in brightness_factor:
            for v_flip in vertical_flip:
                for h_flip in horizontal_flip:
                    for zoom in zoom_factor:
                        if random.random() < skip_factor: # 0.0058 lower is number more images
                            aug_im = augment_image(image, brightness_factor=bright, zoom_factor=zoom, rotation_angle=rotatation, apply_hist_eq=apply_hist_eq, horizontal_flip=h_flip, vertical_flip=v_flip)
                            augumented_images.append(aug_im)


    return augumented_images


# # example use: takes an image_arr and gives list of images after applying different augumentation on them | input and output are np array img
# image = cv2.imread('archive/test/0/9003175L.png')
# all_imgs = auto_augument_img(image, generate_qty=10)
# print('Total no. of images generated: ', len(all_imgs))

## Balancing no. of images with data augmentation

In [6]:
# creating new image dataset with data augumentation


def data_balanceing(output_path, folder_path, max_image):
    os.makedirs(output_path, exist_ok=True)

    classes = ['0Normal', '1Doubtful', '2Mild', '3Moderate', '4Severe']  # class folder names

    for class_ in classes:
        img_folder_path = os.path.join(folder_path, str(class_))
        image_counter = 0
        for img_file in os.listdir(img_folder_path): # loop through every single image in class folder
            full_path_img_file = os.path.join(img_folder_path, img_file)
            output_folder_cls = os.path.join(output_path, str(class_))

            # print(full_path_img_file, output_folder_cls)
            os.makedirs(output_folder_cls, exist_ok=True)

            if image_counter>max_image:
                break # break from this class of image

            total_available_img = len(os.listdir(img_folder_path)) # eg: 200
            required_img = max(max_image-total_available_img, 0) # eg: 1000 - 200 = 800
            required_img = required_img/total_available_img # per img reqire img

            # # Original image apply histogram equilization and saving
            # original_img = exposure.equalize_adapthist(cv2.imread(full_path_img_file), clip_limit=0.008)
            # original_img = (original_img*255).astype('uint8')
            # cv2.imwrite(f'{output_folder_cls}/original_{image_counter}.jpg', original_img)
            cv2.imwrite(f'{output_folder_cls}/original_{image_counter}.jpg', cv2.imread(full_path_img_file))

            image_counter+=1

            all_imgs = auto_augument_img(cv2.imread(full_path_img_file), generate_qty=required_img)
            for img_arr in all_imgs:
                new_img_file_path = f'{output_folder_cls}/aug_{image_counter}.jpg'
                cv2.imwrite(new_img_file_path, img_arr)
                image_counter+=1

            if image_counter>max_image:
                break

        print(f'Done class {class_} for {folder_path}')



data_balanceing(
    output_path = "/content/drive/MyDrive/SHSU-Projects/Final_files/Knee_Dataset_2_Augmented/train_augmented",
    folder_path="/content/drive/MyDrive/SHSU-Projects/Final_files/KneeOA_Dataset_2_Original/train",
    max_image=1350
)



Done class 0Normal for /content/drive/MyDrive/SHSU-Projects/Final_files/KneeOA_Dataset_2_Original/train
Done class 1Doubtful for /content/drive/MyDrive/SHSU-Projects/Final_files/KneeOA_Dataset_2_Original/train
Done class 2Mild for /content/drive/MyDrive/SHSU-Projects/Final_files/KneeOA_Dataset_2_Original/train
Done class 3Moderate for /content/drive/MyDrive/SHSU-Projects/Final_files/KneeOA_Dataset_2_Original/train
Done class 4Severe for /content/drive/MyDrive/SHSU-Projects/Final_files/KneeOA_Dataset_2_Original/train


In [7]:
import os

# Specify the path to the test folder
test_folder_path = '/content/drive/MyDrive/SHSU-Projects/Final_files/Knee_Dataset_2_Augmented/train_augmented'

# Get a list of class folders within the test folder
class_folders = [folder for folder in os.listdir(test_folder_path) if os.path.isdir(os.path.join(test_folder_path, folder))]

# Count the number of images in each class folder
class_image_counts = {}
for class_folder in class_folders:
    class_path = os.path.join(test_folder_path, class_folder)
    image_count = len(os.listdir(class_path))
    class_image_counts[class_folder] = image_count

# Display the image counts for each class
for class_folder, count in class_image_counts.items():
    print(f"Class {class_folder}: {count} images")

Class 0Normal: 1351 images
Class 1Doubtful: 1347 images
Class 2Mild: 1338 images
Class 3Moderate: 1334 images
Class 4Severe: 1355 images
