<a href="https://colab.research.google.com/github/AndyMuloki/EarlyDetectionOfAMD/blob/main/Early_detection_of_AMD_1_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Downloading the Data from Kaggle via API

In [None]:
!cp kaggle.json ~/.kaggle/
!kaggle datasets download -d andrewmvd/ocular-disease-recognition-odir5k
!unzip ocular-disease-recognition-odir5k.zip
!mkdir ODIR-5K/Validation_Images

### Importing the necessary libraries


In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import os

In [None]:
from PIL import Image
import glob
import random

In [None]:
import matplotlib.pyplot as plt
import IPython.display as display

### Defining the Batch Size and the Class Names

In [None]:
BATCH_SIZE = 32
# CLASS_NAMES = ['N', 'D', 'G', 'C', 'A', 'H', 'M', 'O']
# I only need on class name for AMD which is A. How do I extract only the required data for A? and will this reduce the accuracy due to overfitting?

CLASS_NAMES = ['N','A']

training_images_file = 'ODIR-5K/Training Images'
testing_images_file = 'ODIR-5K/Testing Images'
labels_file = 'ODIR-5K/data.xlsx'

# Dictionary mapping of specific diagnostic keyword with disease label
diseases = {'age-related macular degeneration': 'A', 'anterior segment image': 'DELETE', 'choroidal nevus': 'NaN', 'dry age-related macular degeneration': 'A', 'lens dust': 'DELETE', 'low image quality': 'DELETE', 'low image quality,maculopathy': 'DELETE', 'macular pigmentation disorder': 'NaN', 'no fundus image': 'DELETE', 'normal fundus': 'N', 'optic disk photographically invisible': 'DELETE', 'wet age-related macular degeneration': 'A'}

#load labels to pandas
labels = pd.read_excel(labels_file, index_col=0)

In [None]:
print(labels['Right-Diagnostic Keywords'][0])

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

### Labelling images and renaming them. Filtering out low quality fundus images

In [None]:
file_path = 'ODIR-5K/Training Images'
all_paths = []
for element in glob.glob(file_path+"/*.jpg"):
  all_paths.append(element)
paths = []

#adding label information to image names
for u_id in labels.index:
  labelL = ""
  labelR = ""
  leftDiagnosis = labels['Left-Diagnostic Keywords'][u_id]
  rightDiagnosis = labels['Right-Diagnostic Keywords'][u_id]
  leftDiagnosis = leftDiagnosis.split(", ")
  rightDiagnosis = rightDiagnosis.split(", ")

  for d in leftDiagnosis:
    if d in diseases:
        if labelL != "":
          labelL+="&"+diseases[d]
        else:
          labelL+=diseases[d]
    else:
        labelL+="DELETE"
  for d in rightDiagnosis:
    if d in diseases:
      if labelR != "":
        labelR+="&"+diseases[d]
      else:
        labelR+=diseases[d]
    else:
      labelR+="DELETE"

  if file_path+"/"+str(labels['Left-Fundus'][u_id]) in all_paths:
    filename = str(labels['Left-Fundus'][u_id]).split(".")
    os.rename(file_path+"/"+str(labels['Left-Fundus'][u_id]), file_path+"/"+filename[0]+"-"+labelL+".jpg")
    paths.append(file_path+"/"+str(labels['Left-Fundus'][u_id])+"/"+labelL)

  if file_path+"/"+str(labels['Right-Fundus'][u_id]) in all_paths:
    filename = str(labels['Right-Fundus'][u_id]).split(".")
    os.rename(file_path+"/"+str(labels['Right-Fundus'][u_id]), file_path+"/"+filename[0]+"-"+labelR+".jpg")
    paths.append(file_path+"/"+str(labels['Right-Fundus'][u_id])+"/"+labelR)


In [None]:
#deleting low quality images    
items_to_remove = []
add_mix_info = []

for element in glob.glob("ODIR-5K/Training Images/*.jpg"):
    img_name = element.split("/")[-1]
    img_label = img_name.split("-")
    if "DELETE" in img_label[-1]:
      items_to_remove.append(element)
    elif "&" in img_label[-1]:
      add_mix_info.append(element)

for e in items_to_remove:
  os.remove(e)

for e in add_mix_info:
  pom = e.split(".")
  os.rename(e, pom[0]+"&X"+".jpg")      

### Enhancing Images and resizing. Creating validation set

In [None]:
import cv2 as cv

def loadAndCropCenterResizeCV2(img, newSize):
    #img = cv.imread(imgPath)
    width, height, ______ = img.shape
    if width == height:
        return cv.resize(img, newSize)
    length = min(width, height)
    left = (width - length) // 2
    top = (height - length) // 2
    right = (width + length) // 2
    bottom = (height + length) // 2
    return cv.resize(img[left:right, top:bottom, :], newSize)

def clahe_resize(impath):
  img = cv.imread(impath)
  # resizing image to size; 250 by 250 pixels
  eq_image = loadAndCropCenterResizeCV2(img, (250, 250))
  
  cv.imwrite(impath,eq_image)

In [None]:
#images enhancing and resizing (takes a while ~10min)
all_paths = []
for element in glob.glob("ODIR-5K/Training Images/*.jpg"):
  all_paths.append(element)
  clahe_resize(element)

#creating validation set
# num_to_select = 1950  
num_to_select = 1450                        
list_of_random_items = random.sample(all_paths, num_to_select)
for element in list_of_random_items:
  p = element.split("/")
  os.replace(element, "ODIR-5K/Validation_Images/"+p[-1])

In [None]:
!rm -rf ODIR-5K/Testing\ Images/
!rm -rf ODIR-5K/data.xlsx
!zip -r ODIR-5K.zip ODIR-5K/

In [None]:
!cp -r ODIR-5K.zip /amd/My\ Drive

In [None]:
def show_class_distribution():
  N = 0
  A = 0 
  for element in glob.glob("ODIR-5K/Training Images/*.jpg"):
    img_name = element.split("/")[-1]
    img_label = img_name.split("-")
    if img_label[-1] == "N.jpg":
      N += 1
    elif img_label[-1] == "A.jpg":
      A +=1
    elif "&" in img_label[-1]:
      X +=1  
  print(N, A)

In [None]:
show_class_distribution()