# Project - Eye Disease Prediction

## Downloading The Dataset

In [4]:
pip install gdown




In [1]:
import gdown
!gdown --fuzzy https://drive.google.com/file/d/12-ueTXS7OBy7fK1GDrlofWMJkr9ZbM1V/view?usp=drive_link

Downloading...
From (original): https://drive.google.com/uc?id=12-ueTXS7OBy7fK1GDrlofWMJkr9ZbM1V
From (redirected): https://drive.google.com/uc?id=12-ueTXS7OBy7fK1GDrlofWMJkr9ZbM1V&confirm=t&uuid=23d22f93-e46f-422a-ac96-603f005bbd5e
To: C:\Users\Priyanka\Downloads\XAI Proj\archive.zip

  0%|          | 0.00/771M [00:00<?, ?B/s]
  0%|          | 524k/771M [00:00<15:56, 806kB/s]
  0%|          | 1.05M/771M [00:01<13:13, 971kB/s]
  0%|          | 1.57M/771M [00:01<11:55, 1.08MB/s]
  0%|          | 2.10M/771M [00:01<10:37, 1.21MB/s]
  0%|          | 2.62M/771M [00:02<10:07, 1.27MB/s]
  0%|          | 3.15M/771M [00:02<09:52, 1.30MB/s]
  0%|          | 3.67M/771M [00:02<08:42, 1.47MB/s]
  1%|          | 4.19M/771M [00:03<07:40, 1.67MB/s]
  1%|          | 4.72M/771M [00:03<07:04, 1.81MB/s]
  1%|          | 5.24M/771M [00:03<06:44, 1.89MB/s]
  1%|          | 5.77M/771M [00:03<06:30, 1.96MB/s]
  1%|          | 6.29M/771M [00:04<06:40, 1.91MB/s]
  1%|          | 6.82M/771M [00:04<07:27, 1.71MB/

In [6]:
!unzip archive.zip

'unzip' is not recognized as an internal or external command,
operable program or batch file.


## Splitting The Dataset

In [7]:
pip install split-folders

Note: you may need to restart the kernel to use updated packages.


In [9]:
import splitfolders
splitfolders.ratio("dataset", output="output", seed=1337, ratio=(.7, .15, .15), group_prefix=None)

Copying files: 0 files [00:00, ? files/s]


In [6]:
pip install scipy

Note: you may need to restart the kernel to use updated packages.


In [10]:
import numpy as np
import os
from glob import glob
import tensorflow as tf
import scipy
from tensorflow.keras.applications import VGG16, ResNet50, InceptionV3
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model

In [11]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    preprocessing_function=tf.keras.applications.vgg16.preprocess_input)

test_val_datagen = ImageDataGenerator(
    rescale=1./255,
    preprocessing_function=tf.keras.applications.vgg16.preprocess_input)

In [12]:
train_generator = train_datagen.flow_from_directory(
    'output/train',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical')

validation_generator = test_val_datagen.flow_from_directory(
    'output/val',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical')

test_generator = test_val_datagen.flow_from_directory(
    'output/test',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical')

Found 0 images belonging to 4 classes.
Found 0 images belonging to 4 classes.
Found 0 images belonging to 4 classes.


## Part 1 - Extracting Labels And Features

### Function For Extracting Features

In [10]:
def extract_features(model, generator, sample_count):
    batch_size = generator.batch_size
    feature_size = model.output_shape[1:]
    features = np.zeros((sample_count,) + feature_size)
    labels = np.zeros((sample_count, generator.num_classes))

    i = 0
    for inputs_batch, labels_batch in generator:
        features_batch = model.predict(inputs_batch)
        features[i * batch_size: (i + 1) * batch_size] = features_batch
        labels[i * batch_size: (i + 1) * batch_size] = labels_batch
        i += 1
        if i * batch_size >= sample_count:
            break
    return features, labels

### Part A - VGG16 Features

In [11]:
vgg16_model = VGG16(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
vgg16_feature_extractor = Model(inputs=vgg16_model.input, outputs=vgg16_model.output)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [12]:
vgg16_model.summary()

In [13]:
vgg16_train_features, train_labels = extract_features(vgg16_feature_extractor, train_generator, train_generator.samples)
vgg16_val_features, val_labels = extract_features(vgg16_feature_extractor, validation_generator, validation_generator.samples)
vgg16_test_features, test_labels = extract_features(vgg16_feature_extractor, test_generator, test_generator.samples)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 29s/step


I0000 00:00:1710699967.104459     178 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36

In [16]:
os.mkdir('data')

In [17]:
np.save('data/vgg16_train_features.npy', vgg16_train_features)
np.save('data/train_labels.npy', train_labels)
np.save('data/vgg16_val_features.npy', vgg16_val_features)
np.save('data/val_labels.npy', val_labels)
np.save('data/vgg16_test_features.npy', vgg16_test_features)
np.save('data/test_labels.npy', test_labels)

### Part B - ResNet50 Features

In [18]:
resnet_model = ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
resnet_feature_extractor = Model(inputs=resnet_model.input, outputs=resnet_model.output)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [19]:
resnet_model.summary()

In [20]:
resnet_train_features, _ = extract_features(resnet_feature_extractor, train_generator, train_generator.samples)
resnet_val_features, _ = extract_features(resnet_feature_extractor, validation_generator, validation_generator.samples)
resnet_test_features, _ = extract_features(resnet_feature_extractor, test_generator, test_generator.samples)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29

In [21]:
np.save('data/resnet_train_features.npy', resnet_train_features)
np.save('data/resnet_val_features.npy', resnet_val_features)
np.save('data/resnet_test_features.npy', resnet_test_features)

### Part C - InceptionV3 Features

In [22]:
inception_model = InceptionV3(include_top=False, weights='imagenet', input_shape=(299, 299, 3))
inception_feature_extractor = Model(inputs=inception_model.input, outputs=inception_model.output)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [23]:
inception_model.summary()

In [24]:
inception_train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    preprocessing_function=tf.keras.applications.inception_v3.preprocess_input)

In [25]:
inception_train_generator = inception_train_datagen.flow_from_directory(
    'output/train',
    target_size=(299, 299),
    batch_size=32,
    class_mode='categorical')

inception_test_generator = test_val_datagen.flow_from_directory(
    'output/test',
    target_size=(299, 299),
    batch_size=32,
    class_mode='categorical')

inception_val_generator = test_val_datagen.flow_from_directory(
    'output/val',
    target_size=(299, 299),
    batch_size=32,
    class_mode='categorical')

Found 2949 images belonging to 4 classes.
Found 637 images belonging to 4 classes.
Found 631 images belonging to 4 classes.


In [26]:
inception_train_features, _ = extract_features(inception_feature_extractor, inception_train_generator, inception_train_generator.samples)
inception_val_features, _ = extract_features(inception_feature_extractor, inception_val_generator, inception_val_generator.samples)
inception_test_features, _ = extract_features(inception_feature_extractor, inception_test_generator, inception_test_generator.samples)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 23s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55

In [27]:
np.save('data/inception_train_features.npy', inception_train_features)
np.save('data/inception_val_features.npy', inception_val_features)
np.save('data/inception_test_features.npy', inception_test_features)

## Part 2 - Flattening And Concatenating Features

### Part A - Train Features

In [28]:
vgg16_train = np.load('data/vgg16_train_features.npy')
resnet_train = np.load('data/resnet_train_features.npy')
inception_train = np.load('data/inception_train_features.npy')

In [29]:
vgg16_train_flat = vgg16_train.reshape((vgg16_train.shape[0], -1))
resnet_train_flat = resnet_train.reshape((resnet_train.shape[0], -1))
inception_train_flat = inception_train.reshape((inception_train.shape[0], -1))

In [30]:
combined_train_features = np.concatenate([vgg16_train_flat, resnet_train_flat, inception_train_flat], axis=1)

In [31]:
np.save('data/combined_train_features.npy', combined_train_features)

### Part B - Validation Features

In [32]:
vgg16_val = np.load('data/vgg16_val_features.npy')
resnet_val = np.load('data/resnet_val_features.npy')
inception_val = np.load('data/inception_val_features.npy')

In [33]:
vgg16_val_flat = vgg16_val.reshape((vgg16_val.shape[0], -1))
resnet_val_flat = resnet_val.reshape((resnet_val.shape[0], -1))
inception_val_flat = inception_val.reshape((inception_val.shape[0], -1))

In [34]:
combined_val_features = np.concatenate([vgg16_val_flat, resnet_val_flat, inception_val_flat], axis=1)

In [35]:
np.save('data/combined_val_features.npy', combined_val_features)

### Part C - Test Features

In [36]:
vgg16_test = np.load('data/vgg16_test_features.npy')
resnet_test = np.load('data/resnet_test_features.npy')
inception_test = np.load('data/inception_test_features.npy')

In [37]:
vgg16_test_flat = vgg16_test.reshape((vgg16_test.shape[0], -1))
resnet_test_flat = resnet_test.reshape((resnet_test.shape[0], -1))
inception_test_flat = inception_test.reshape((inception_test.shape[0], -1))

In [38]:
combined_test_features = np.concatenate([vgg16_test_flat, resnet_test_flat, inception_test_flat], axis=1)

In [39]:
np.save('data/combined_test_features.npy', combined_test_features)

# Part 3 - Training And Predicting With ANN, SVM, RF And NB

In [8]:
pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [9]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [4]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Model
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

2024-03-17 18:42:50.104028: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-17 18:42:50.104143: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-17 18:42:50.263079: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


### Preparing The Data

In [5]:
combined_train_features = np.load('data/combined_train_features.npy')
combined_val_features = np.load('data/combined_val_features.npy')
combined_test_features = np.load('data/combined_test_features.npy')

In [6]:
train_labels = np.load('data/train_labels.npy')
val_labels = np.load('data/val_labels.npy')
test_labels = np.load('data/test_labels.npy')

In [7]:
train_labels_flat = np.argmax(train_labels, axis=1)
val_labels_flat = np.argmax(val_labels, axis=1)
test_labels_flat = np.argmax(test_labels, axis=1)

### ANN

In [8]:
def create_ann_classifier(input_shape, output_shape):
    model = Sequential([
        Dense(512, activation='relu', input_shape=(input_shape,)),
        Dropout(0.5),
        Dense(output_shape, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [9]:
ann_classifier = create_ann_classifier(combined_train_features.shape[1], train_labels.shape[1])
ann_history = ann_classifier.fit(combined_train_features, train_labels,
                                 epochs=30, batch_size=32,
                                 validation_data=(combined_val_features, val_labels),
                                 verbose=2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/30


I0000 00:00:1710701028.656837    3840 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1710701028.674466    3840 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1710701034.676278    3843 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1710701036.845144    3841 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


93/93 - 14s - 146ms/step - accuracy: 0.2811 - loss: 7.2497 - val_accuracy: 0.2726 - val_loss: 1.3811
Epoch 2/30


W0000 00:00:1710701037.834177    3842 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


93/93 - 5s - 49ms/step - accuracy: 0.2543 - loss: 1.4328 - val_accuracy: 0.3043 - val_loss: 1.9817
Epoch 3/30
93/93 - 4s - 48ms/step - accuracy: 0.2530 - loss: 1.3853 - val_accuracy: 0.2979 - val_loss: 1.3761
Epoch 4/30
93/93 - 5s - 49ms/step - accuracy: 0.2557 - loss: 1.3860 - val_accuracy: 0.3090 - val_loss: 1.3736
Epoch 5/30
93/93 - 5s - 49ms/step - accuracy: 0.2604 - loss: 1.3860 - val_accuracy: 0.3074 - val_loss: 1.3735
Epoch 6/30
93/93 - 5s - 49ms/step - accuracy: 0.2604 - loss: 1.3855 - val_accuracy: 0.2995 - val_loss: 1.4272
Epoch 7/30
93/93 - 4s - 48ms/step - accuracy: 0.2601 - loss: 1.3864 - val_accuracy: 0.2884 - val_loss: 1.3803
Epoch 8/30
93/93 - 5s - 49ms/step - accuracy: 0.2604 - loss: 1.3856 - val_accuracy: 0.2837 - val_loss: 1.3802
Epoch 9/30
93/93 - 5s - 52ms/step - accuracy: 0.2604 - loss: 1.3855 - val_accuracy: 0.2868 - val_loss: 1.3805
Epoch 10/30
93/93 - 5s - 48ms/step - accuracy: 0.2604 - loss: 1.3858 - val_accuracy: 0.2837 - val_loss: 1.3804
Epoch 11/30
93/93 - 

### SVM

In [13]:
svm_classifier = SVC(kernel='rbf', gamma=0.1)

In [14]:
svm_classifier.fit(combined_train_features, train_labels_flat)

### Random Forest

In [17]:
rf_classifier = RandomForestClassifier(n_estimators=100)

In [18]:
rf_classifier.fit(combined_train_features, train_labels_flat)

### Naive Bayes

In [21]:
nb_classifier = GaussianNB()

In [22]:
nb_classifier.fit(combined_train_features, train_labels_flat)

### Evaluating Models

In [10]:
def evaluate_model(model, features, true_labels):
    predictions = model.predict(features)
    return accuracy_score(true_labels, predictions)

In [11]:
ann_val_accuracy = ann_classifier.evaluate(combined_val_features, val_labels, verbose=0)[1]
ann_test_accuracy = ann_classifier.evaluate(combined_test_features, test_labels, verbose=0)[1]

W0000 00:00:1710701210.301387    3840 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


In [15]:
svm_val_accuracy = evaluate_model(svm_classifier, combined_val_features, val_labels_flat)
svm_test_accuracy = evaluate_model(svm_classifier, combined_test_features, test_labels_flat)

In [19]:
rf_val_accuracy = evaluate_model(rf_classifier, combined_val_features, val_labels_flat)
rf_test_accuracy = evaluate_model(rf_classifier, combined_test_features, test_labels_flat)

In [23]:
nb_val_accuracy = evaluate_model(nb_classifier, combined_val_features, val_labels_flat)
nb_test_accuracy = evaluate_model(nb_classifier, combined_test_features, test_labels_flat)

In [28]:
results = {
    "Model": ["ANN", "SVM", "Random Forest", "Naive Bayes"],
    "Validation Accuracy": [ann_val_accuracy, svm_val_accuracy, rf_val_accuracy, nb_val_accuracy],
    "Test Accuracy": [ann_test_accuracy, svm_test_accuracy, rf_test_accuracy, nb_test_accuracy]
}

In [29]:
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,Model,Validation Accuracy,Test Accuracy
0,ANN,0.886922,0.879725
1,SVM,0.945658,0.93873
2,Random Forest,0.916564,0.907936
3,Naive Bayes,0.853548,0.847188
