<a href="https://colab.research.google.com/github/Jaseelkt007/ML/blob/master/Diabetic_Retinopathy_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import torchvision.transforms as transforms
import numpy as np
from PIL import Image
from multiprocessing import Pool
import warnings
from tqdm import tqdm
import os
import torch
import matplotlib.pyplot as plt
import cv2

sample_data_path = '/content/drive/MyDrive/sample'
output_folder = '/content/drive/MyDrive/sample/preprocessed_samples'

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

transform = transforms.Compose([
    #transforms.Resize((256,256)),
    transforms.RandomRotation(20),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness= 0.2, contrast = 0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456,0.406], std= [0.229,0.224,0.225])
])

def trim(image):

    percentage = 0.02
    img = np.array(image)
    img_gray = cv2.cvtColor(img , cv2.COLOR_BGR2GRAY) # Convert to grayscale to simply the process
    # create the binary mask , to get the background from actual content
    img_gray = img_gray > 0.1 * np.mean(img_gray[img_gray!=0])
    # calculate the row wise and column wise sums to find where the significant content exists
    row_sums = np.sum(img_gray, axis = 1)
    col_sums = np.sum(img_gray, axis = 0)
    rows = np.where(row_sums > img.shape[1] * percentage)[0] # return the rows index of rows which contain atleast 2% of its content
    cols = np.where (col_sums > img.shape[0] * percentage)[0]
    # find the min and max rows and columns for croping
    min_row, min_col = np.min(rows), np.min(cols)
    max_row, max_col = np.max(rows), np.max(cols)
    im_crop = img[min_row : max_row +1 , min_col : max_col+1]
    return Image.fromarray(im_crop)

def resize_main_aspect(image, desired_size):
    old_size = image.size
    ratio = float(desired_size)/ max(old_size) # resize ratio
    new_size = tuple([int(x * ratio) for x in old_size]) # (N,M) N,M are new size
    im = image.resize(new_size, Image.LANCZOS) # a filter to smooth image when resize, helps to reduce artifacts in the reduced image
    new_im = Image.new("RGB", (desired_size, desired_size))
    new_im.paste(im, ((desired_size - new_size[0])//2 , (desired_size - new_size[1])//2)) # paster the image on the new square background
    return new_im

def save_single(args): # helpfull for multiprocessing
    img_file, input_path_folder, output_path_folder, output_size = args
    image_org = Image.open(os.path.join(input_path_folder, img_file))
    image = trim(image_org)
    image = resize_main_aspect(image, desired_size= output_size[0])
    image.save(os.path.join(output_path_folder , img_file))



def multi_image_resize(input_path_folder, output_path_folder, output_size=None):
    if not output_size:
        warnings.warn("Need to specify output_size! For example: output_size=100")
        exit()

    if not os.path.exists(output_path_folder):
        os.makedirs(output_path_folder)

    jobs = [
        (file, input_path_folder, output_path_folder, output_size)
        for file in os.listdir(input_path_folder)
        if os.path.isfile(os.path.join(input_path_folder,file))
    ]

    with Pool() as p:
        list(tqdm(p.imap_unordered(save_single, jobs), total=len(jobs)))

if __name__ == "__main__":
    #multi_image_resize(sample_data_path, output_folder, output_size = (256,256))
    pass

def preprocess_images(data_path, transform):
    processed_images = []
    for img_name in os.listdir(data_path):
        img_path = os.path.join(data_path, img_name)
        image = Image.open(img_path)
        image = trim(image)
        image_resized = resize_main_aspect(image, desired_size=256)
        image = transform(image_resized)
        processed_images.append(image)
    return processed_images

#processed_images = preprocess_images(sample_data_path, transform)

def show_images(images, n=5):
    fig, axs = plt.subplots(1, n , figsize=(15,5))
    for i , img in enumerate(images[:n]):
        img = img.permute(1,2,0) # change from C, H, W to H, W, C
        img = torch.clamp(img * torch.tensor([0.229,0.224,0.225]) +
                          torch.tensor([0.485,0.456,0.406]), 0,1) # denormalize
        axs[i].imshow(img)
        axs[i].axis("off")
    plt.show()

#show_images(processed_images,n=5)







100%|██████████| 413/413 [04:01<00:00,  1.71it/s]


In [None]:
import csv


file_path = '/content/train.csv'

filtered_image_names_0 = []
filtered_image_names_1 = []
filtered_image_names_2 = []
filtered_image_names_3 = []
filtered_image_names_4 = []

#open csv file
with open(file_path,  encoding='utf-8') as csv_file:
    csv_reader = csv.reader(csv_file) #csv_reader is an iterator
    header = next(csv_reader)  # read the header
    #print("Header:", header)

    # find the column index of "Image name" and "Retinopathy grade"
    image_name_index = header.index("Image name")
    grade_index = header.index("Retinopathy grade")


    for row in csv_reader: # read each row of it
        if row[grade_index] == '0':
            filtered_image_names_0.append(row[image_name_index])
        elif row[grade_index] == '1':
            filtered_image_names_1.append(row[image_name_index])
        elif row[grade_index] == '2':
            filtered_image_names_2.append(row[image_name_index])
        elif row[grade_index] == '3':
            filtered_image_names_3.append(row[image_name_index])
        elif row[grade_index] == '4':
            filtered_image_names_4.append(row[image_name_index])

for i in range(5):
    name = f"filtered_image_names_{i}"

    print(f"{name}: {globals()[name]}")

filtered_image_names_0: ['IDRiD_118', 'IDRiD_138', 'IDRiD_139', 'IDRiD_140', 'IDRiD_141', 'IDRiD_142', 'IDRiD_143', 'IDRiD_144', 'IDRiD_145', 'IDRiD_146', 'IDRiD_147', 'IDRiD_148', 'IDRiD_149', 'IDRiD_150', 'IDRiD_151', 'IDRiD_152', 'IDRiD_153', 'IDRiD_154', 'IDRiD_155', 'IDRiD_156', 'IDRiD_157', 'IDRiD_158', 'IDRiD_159', 'IDRiD_160', 'IDRiD_161', 'IDRiD_162', 'IDRiD_163', 'IDRiD_164', 'IDRiD_165', 'IDRiD_166', 'IDRiD_167', 'IDRiD_168', 'IDRiD_169', 'IDRiD_170', 'IDRiD_171', 'IDRiD_172', 'IDRiD_173', 'IDRiD_174', 'IDRiD_175', 'IDRiD_176', 'IDRiD_177', 'IDRiD_179', 'IDRiD_181', 'IDRiD_182', 'IDRiD_184', 'IDRiD_190', 'IDRiD_193', 'IDRiD_195', 'IDRiD_197', 'IDRiD_199', 'IDRiD_200', 'IDRiD_202', 'IDRiD_204', 'IDRiD_205', 'IDRiD_206', 'IDRiD_209', 'IDRiD_210', 'IDRiD_211', 'IDRiD_212', 'IDRiD_213', 'IDRiD_214', 'IDRiD_217', 'IDRiD_218', 'IDRiD_219', 'IDRiD_220', 'IDRiD_221', 'IDRiD_222', 'IDRiD_223', 'IDRiD_225', 'IDRiD_226', 'IDRiD_227', 'IDRiD_228', 'IDRiD_229', 'IDRiD_230', 'IDRiD_233', 

In [8]:
from torchvision import transforms
from PIL import Image
from tqdm import tqdm

augmentation_transforms = transforms.Compose([
    transforms.RandomRotation(20),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness= 0.1, contrast=0.1),
])

preprocessed_folder = '/content/drive/MyDrive/sample/preprocessed_samples'
augmented_folder = '/content/drive/MyDrive/sample/aug_train_samples'
csv_file_path = '/content/train.csv'

if not os.path.exists(augmented_folder):
    os.makedirs(augmented_folder)

# create output folder structure each class
for i in range(5):
    class_folder = os.path.join(augmented_folder, f'class_{i}')
    os.makedirs(class_folder, exist_ok=True)

# augmentation per class:
augmentation_counts = {
      '0' : 2, # fewer augumentation for class 0
      '1' : 10, # more augmentation
      '2' : 2,
      '3' : 5, # moderate augmentation for class 3 and 4
      '4' : 8
  }

with open(csv_file_path, encoding='utf-8') as csv_file:
    csv_reader = csv.reader(csv_file)
    header = next(csv_reader)
    image_name_index = header.index("Image name")
    grade_index = header.index("Retinopathy grade")


    #Loop through each row in csv and augment imges based on class
    for row in tqdm(csv_reader , desc='Augmenting images', unit='image'):
        image_name = row[image_name_index]
        label = row[grade_index]

        #Load the preprocessed image
        img_path = os.path.join(preprocessed_folder, f"{image_name}.jpg")
        if not os.path.exists(img_path):
            print(f"Warning: {img_path} doesn't exist")
            continue # skip if the file doesn't exist
        image = Image.open(img_path)

        # define where to save the image based on the class
        class_folder = os.path.join(augmented_folder, f'class_{label}')
        num_augmentation = augmentation_counts[label]

        for i in range(num_augmentation):
            augmented_image = augmentation_transforms(image)
            # save the image to corresponding class folder
            aug_image_name = f'{image_name}_aug_{i}.jpg'
            augmented_image.save(os.path.join(class_folder, aug_image_name))
print("Augmentation and saving completed")

Augmenting images: 413image [00:25, 16.38image/s]

Augmentation and saving completed





In [9]:
import os

# Define the path to the augmented folder
augmented_folder = '/content/drive/MyDrive/sample/aug_train_samples'

# Loop through each class folder and count the files
for i in range(5):
    class_folder = os.path.join(augmented_folder, f'class_{i}')
    if os.path.exists(class_folder):
        files = os.listdir(class_folder)
        print(f"Number of files in {class_folder}: {len(files)}")
    else:
        print(f"{class_folder} does not exist.")

Number of files in /content/drive/MyDrive/sample/aug_train_samples/class_0: 268
Number of files in /content/drive/MyDrive/sample/aug_train_samples/class_1: 200
Number of files in /content/drive/MyDrive/sample/aug_train_samples/class_2: 272
Number of files in /content/drive/MyDrive/sample/aug_train_samples/class_3: 370
Number of files in /content/drive/MyDrive/sample/aug_train_samples/class_4: 392


In [None]:
pip install tensorflow-addons

In [None]:
# Code in tensorflow
import os
import csv
import tensorflow as tf
import tensorflow_addons as tfa
from PIL import Image
from tqdm import tqdm

# Define augmentation functions using TensorFlow
def augment_image(image):
    # Random rotation between -20 and +20 degrees
    angle = tf.random.uniform([], minval=-20, maxval=20, dtype=tf.float32) * (3.14159265 / 180.0)
    image = tfa.image.rotate(image, angle)

    # Random horizontal flip
    image = tf.image.random_flip_left_right(image)

    # Color jitter (brightness and contrast adjustments)
    image = tf.image.random_brightness(image, max_delta=0.1)
    image = tf.image.random_contrast(image, lower=0.9, upper=1.1)

    return image

preprocessed_folder = '/content/drive/MyDrive/sample/preprocessed_samples'
augmented_folder = '/content/drive/MyDrive/sample/aug_train_samples'
csv_file_path = '/content/train.csv'

if not os.path.exists(augmented_folder):
    os.makedirs(augmented_folder)

# Create output folder structure for each class
for i in range(5):
    class_folder = os.path.join(augmented_folder, f'class_{i}')
    os.makedirs(class_folder, exist_ok=True)

# Define the number of augmentations per class
augmentation_counts = {
    '0': 2,   # Fewer augmentations for class 0
    '1': 10,  # More augmentations for class 1
    '2': 2,   # Fewer augmentations for class 2
    '3': 5,   # Moderate augmentations for class 3
    '4': 8    # Moderate augmentations for class 4
}

# Read the CSV file
with open(csv_file_path, encoding='utf-8') as csv_file:
    csv_reader = csv.reader(csv_file)
    header = next(csv_reader)
    image_name_index = header.index("Image name")
    grade_index = header.index("Retinopathy grade")

    # Loop through each row in CSV and augment images based on class
    for row in tqdm(csv_reader, desc='Augmenting images', unit='image'):
        image_name = row[image_name_index]
        label = row[grade_index]

        # Load the preprocessed image
        img_path = os.path.join(preprocessed_folder, f"{image_name}.jpg")
        if not os.path.exists(img_path):
            print(f"Warning: {img_path} doesn't exist")
            continue  # Skip if the file doesn't exist
        image = Image.open(img_path)
        image = tf.keras.preprocessing.image.img_to_array(image)  # Convert PIL image to numpy array
        image = tf.image.convert_image_dtype(image, dtype=tf.float32)  # Scale pixel values to [0,1]
        # Tensorflow augmentation fn operate effectively with in this range [0 1]
        # Define where to save the augmented images based on class label
        class_folder = os.path.join(augmented_folder, f'class_{label}')
        num_augmentation = augmentation_counts[label]

        # Generate and save augmented images
        for i in range(num_augmentation):
            augmented_image = augment_image(image)
            augmented_image = tf.keras.preprocessing.image.array_to_img(augmented_image)  # Convert back to PIL image
            aug_image_name = f"{image_name}_aug_{i}.jpg"
            augmented_image.save(os.path.join(class_folder, aug_image_name))

print("Augmentation and saving completed.")