<a href="https://colab.research.google.com/github/AhmedNasser1601/Diabetic-Retinopathy-Detection/blob/Main/Diabetic_Retinopathy_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ***Name: Ahmed Nasser Ahmed Hassan***
> **CodeClause |> *Sep/2022***
>> **Data Science Intern |> *CC-OL-911***
>>> **Task2 >> *Diabetic Retinopathy Detection***
---

> ### |> ***Requirements***

>> #### |> ***Import Packages***

In [1]:
import sys
import os
import json
import csv
import numpy as np
import pandas as pd
import matplotlib
import random
import cv2
from datetime import datetime
from subprocess import check_output

from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

from sklearn.model_selection import train_test_split



---



In [2]:
def classes_to_int(label):
  label = label.strip()

  if label=="No DR": return 0
  if label=="Mild": return 1
  if label=="Moderate": return 2
  if label=="Severe": return 3
  if label=="Proliferative DR": return 4

  print("Invalid Label", label)
  return 5

In [3]:
def int_to_classes(i):
  if i==0: return "No DR"
  elif i==1: return "Mild"
  elif i==2: return "Moderate"
  elif i==3: return "Severe"
  elif i==4: return "Proliferative DR"
  
  print("Invalid class ", i)
  return "Invalid Class"

In [4]:
NUM_CLASSES = 5

Height = 128
Width = 128
Depth = 3
inputShape = (Height, Width, Depth)

EPOCHS = 15   #epochs to train
INIT_LR = 1e-3   #initial learning rate
BS = 32   #batch size

ImageNameDataHash = {}
uniquePatientIDList = []



---



In [5]:
def readTrainData(trainDir):
  global ImageNameDataHash

  images = os.listdir(trainDir)
  print("Number of files in " + trainDir + " is " + str(len(images)))

  for imageFileName in images:
    if (imageFileName == "trainLabels.csv"): continue

    imageFullPath = os.path.join(os.path.sep, trainDir, imageFileName)
    img = load_img(imageFullPath)

    arr = img_to_array(img)
    dim1 = arr.shape[0]
    dim2 = arr.shape[1]
    dim3 = arr.shape[2]

    if (dim1<Height or dim2<Width or dim3<Depth):
      print("Error image dimensions are less than expected " + str(arr.shape))
    
    arr = cv2.resize(arr, (Height, Width))
    dim1 = arr.shape[0]
    dim2 = arr.shape[1]
    dim3 = arr.shape[2]

    if (dim1 != Height or dim2 != Width or dim3 != Depth):
      print("Error after resize, image dimensions are not equal to expected " + str(arr.shape))

    #scale the raw pixel intensities to the range [0, 1] - TBD TEST
    arr = np.array(arr, dtype="float") / 255.0
    imageFileName = imageFileName.replace('.jpeg','')
    ImageNameDataHash[str(imageFileName)] = np.array(arr)
  return



---



In [6]:
!mkdir ~/.kaggle
!touch ~/.kaggle/kaggle.json

api_token = {"username":"ahmednasser1601","key":"fd950b67a38861322900a50fdc9f6881"}

with open('/root/.kaggle/kaggle.json', 'w') as file:
    json.dump(api_token, file)

!chmod 600 ~/.kaggle/kaggle.json

In [7]:
!kaggle competitions download diabetic-retinopathy-detection -f sample.zip
!kaggle competitions download diabetic-retinopathy-detection -f sampleSubmission.csv.zip
!kaggle competitions download diabetic-retinopathy-detection -f trainLabels.csv.zip


Downloading sample.zip to /content
  0% 0.00/10.4M [00:00<?, ?B/s] 96% 10.0M/10.4M [00:00<00:00, 103MB/s]
100% 10.4M/10.4M [00:00<00:00, 105MB/s]
Downloading sampleSubmission.csv.zip to /content
  0% 0.00/81.6k [00:00<?, ?B/s]
100% 81.6k/81.6k [00:00<00:00, 42.3MB/s]
Downloading trainLabels.csv.zip to /content
  0% 0.00/69.4k [00:00<?, ?B/s]
100% 69.4k/69.4k [00:00<00:00, 28.2MB/s]


In [8]:
!unzip sample.zip
!unzip sampleSubmission.csv.zip
!unzip trainLabels.csv.zip


Archive:  sample.zip
   creating: sample/
  inflating: sample/10_left.jpeg     
  inflating: sample/10_right.jpeg    
  inflating: sample/13_left.jpeg     
  inflating: sample/13_right.jpeg    
  inflating: sample/15_left.jpeg     
  inflating: sample/15_right.jpeg    
  inflating: sample/16_left.jpeg     
  inflating: sample/16_right.jpeg    
  inflating: sample/17_left.jpeg     
  inflating: sample/17_right.jpeg    
Archive:  sampleSubmission.csv.zip
  inflating: sampleSubmission.csv    
Archive:  trainLabels.csv.zip
  inflating: trainLabels.csv         


In [9]:
!rm sample.zip
!rm sampleSubmission.csv.zip
!rm trainLabels.csv.zip




---



In [10]:
def readTrainCsv():
  raw_df = pd.read_csv('trainLabels.csv', sep=',')
  print(type(raw_df)) #<class 'pandas.core.frame.DataFrame'>
  row_count=raw_df.shape[0] #gives number of row count row_count=35126 
  col_count=raw_df.shape[1] #gives number of col count col count=2
  print("row_count="+str(row_count)+" col count="+str(col_count))
  raw_df["PatientID"] = ''
  header_list = list(raw_df.columns)
  print(header_list) # ['image', 'level', 'PatientID']
  # double check if level of left and right are same or not
  ImageLevelHash = {}
  patientIDList = []

  for index, row in raw_df.iterrows():
    # 0 is image, 1 is level, 2 is PatientID, 3 is data
    key = row[0] + ''
    patientID = row[0] + ''
    patientID = patientID.replace('_right','')
    patientID = patientID.replace('_left','')
    #print("Adding patient ID"+ patientID)
    raw_df.at[index, 'PatientID'] = patientID
    patientIDList.append(patientID)
    ImageLevelHash[key] = str(row[1]) # level
              
  global uniquePatientIDList
  uniquePatientIDList = sorted(set(patientIDList))
  count=0;
  for patientID in uniquePatientIDList:
    left_level = ImageLevelHash[str(patientID+'_left')]
    right_level = ImageLevelHash[str(patientID+'_right')]
    #right_exists = str(patientID+'_right') in raw_df.values
    if (left_level != right_level):
      count = count+1
      #print("Warning for patient="+ str(patientID) + " left_level=" + left_level+ " right_level=" +right_level)

  print("count of images with both left and right eye level not matching="+str(count)) # 2240
  print("number of unique patients="+str(len(uniquePatientIDList))) # 17563
  return raw_df

In [11]:
random.seed(10)
print("Reading trainLabels.csv...")
df = readTrainCsv()

Reading trainLabels.csv...
<class 'pandas.core.frame.DataFrame'>
row_count=35126 col count=2
['image', 'level', 'PatientID']
count of images with both left and right eye level not matching=2240
number of unique patients=17563


In [12]:
for i in range(0,10):
  s = df.loc[df.index[i], 'PatientID'] # get patient id of patients
  print(str(i) + " patient's patientID="+str(s))

0 patient's patientID=10
1 patient's patientID=10
2 patient's patientID=13
3 patient's patientID=13
4 patient's patientID=15
5 patient's patientID=15
6 patient's patientID=16
7 patient's patientID=16
8 patient's patientID=17
9 patient's patientID=17


In [13]:
# df has 3 columns ['image', 'level', 'PatientID']
keepImages =  list(ImageNameDataHash.keys())
df = df[df['image'].isin(keepImages)]
print(len(df)) # 1000

0


In [19]:
#convert hash to dataframe
imageNameArr = []
dataArr = []

for index, row in df.iterrows():
  key = str(row[0])
  if key in ImageNameDataHash:
    imageNameArr.append(key)
    dataArr.append(np.array(ImageNameDataHash[key])) # np.array

df2 = pd.DataFrame({'image': imageNameArr, 'data': dataArr})
df2_header_list = list(df2.columns) 
print(df2_header_list) # ['image', 'data']
print(len(df2)) # 1000
#print(df2.describe(include='all'))
#print(df2.sample(3)) # 3 rows x 2 columns

['image', 'data']
0


In [15]:
if len(df) != len(df2):
  print("Error length of df != df2")
    
for idx in range(0,len(df)):
  if (df.loc[df.index[idx], 'image'] != df2.loc[df2.index[idx], 'image']):
    print("Error " + df.loc[df.index[idx], 'image'] +"==" + df2.loc[df2.index[idx], 'image'])
        
print(df2.dtypes)
print(df.dtypes)

image    float64
data     float64
dtype: object
image        object
level         int64
PatientID    object
dtype: object


In [16]:
df = pd.merge(df2, df, left_on='image', right_on='image', how='outer')
df_header_list = list(df.columns) 
print(df_header_list) # 'image', 'data', level', 'PatientID'
print(len(df)) # 1000
print(df.sample())

ValueError: ignored