# Import libaries

In [None]:
# Ignore  the warnings
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

# data visualisation and manipulation
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
from collections import Counter
 
#configure
# sets matplotlib to inline and displays graphs below the corressponding cell.
%matplotlib inline  
style.use('fivethirtyeight')
sns.set(style = 'whitegrid',color_codes = True)

#model selection
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score,precision_score,recall_score
from sklearn.metrics import confusion_matrix,roc_curve
from sklearn.metrics import roc_auc_score,precision_recall_curve,auc,f1_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder

#preprocess.
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import img_to_array, array_to_img, load_img
from sklearn.utils import resample
from imblearn.over_sampling import RandomOverSampler
from imblearn.over_sampling import SMOTE

#dl libraraies
from keras import backend as K
from tensorflow.keras import layers, Model
from keras.models import Sequential
from keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.preprocessing import image
from tensorflow.keras.optimizers import Adam,SGD,Adagrad,Adadelta,RMSprop
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.metrics import SparseCategoricalAccuracy
from keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras import regularizers

# specifically for cnn
from keras.layers import Dropout, Flatten, Activation
from keras.layers import Conv2D, MaxPool2D, BatchNormalization
 
import tensorflow as tf
import random as rn

# specifically for manipulating zipped images and getting numpy arrays of pixel values of images.
import cv2                  
import numpy as np  
from tqdm import tqdm
import os                   
from random import shuffle  
from zipfile import ZipFile
from PIL import Image

from sklearn.metrics import classification_report
import itertools
from tensorflow import keras
import re

# Data Preprocessing

In [None]:
(img_height, img_width) = (299, 299)
batch_size = 32
epochs = 50

In [None]:
p1 = '/kaggle/input/diabetic-retinopathy-classification-for-superai-3/Train/Train'
path1 = pd.Series(os.listdir(p1))
df = pd.DataFrame(path1, columns = ['id_code'])
df['path'] = p1 + '/' + df['id_code'] 
df['id_code'] = df['id_code'].str[:-4]

label = pd.read_csv('/kaggle/input/diabetic-retinopathy-classification-for-superai-3/Train.csv')
Train = df.merge(label, on = 'id_code')
Train['diagnosis'] = Train['diagnosis'].astype(str)

In [None]:
Train.head()

In [None]:
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.nasnet import NASNetLarge
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2
from tensorflow.keras.applications.densenet import DenseNet121, DenseNet201
from tensorflow.keras.applications.mobilenet import MobileNet
#from tensorflow.keras.applications.convnext import ConvNeXtXLarge
from tensorflow.keras.applications.efficientnet import EfficientNetB0, EfficientNetB3, EfficientNetB4
from collections import Counter
from sklearn.model_selection import StratifiedKFold
import sklearn
from tensorflow.keras.applications.resnet import ResNet152

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize = False,
                          title = 'Confusion matrix',
                          cmap = plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation = 'nearest', cmap = cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation = 45)
    plt.yticks(tick_marks, classes)
    plt.grid(None)

    if normalize:
        cm = cm.astype('float')/cm.sum(axis = 1)[:, np.newaxis]

    thresh = cm.max()/2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment = "center",
                 color = "white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

def plot_roc_curve(fpr, tpr, label = None):
    """
    The ROC curve, modified from 
    Hands-On Machine learning with Scikit-Learn and TensorFlow; p.91
    """
    plt.figure(figsize = (10, 10))
    plt.title('ROC Curve')
    plt.plot(fpr, tpr, linewidth = 2, label = label)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.axis([-0.005, 1, 0, 1.005])
    plt.xticks(np.arange(0,1, 0.05), rotation = 90)
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate (Recall)")
    plt.show()

In [None]:
red_lr = ReduceLROnPlateau(monitor = 'val_accuracy', patience = 5, verbose = 1,factor = 0.1)
early_stop = EarlyStopping(monitor = 'val_accuracy', mode = 'max', patience = 10, restore_best_weights = True)

In [None]:
model_name = 'InceptionV3'
model = InceptionV3(include_top = False, weights = 'imagenet', input_shape = (img_height, img_width, 3))
regularizer = regularizers.l2(0.0001)
for layer in model.layers:
    for attr in ['kernel_regularizer']:
        if hasattr(layer, attr):
            setattr(layer, attr, regularizer)
model.trainable = True

x = GlobalAveragePooling2D()(model.output)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Flatten()(x)
x = Dense(1024, activation ='relu', kernel_regularizer = regularizers.l2(0.0001))(x)
x = Dropout(0.5)(x)
predictions = Dense(5, activation = 'softmax')(x)

model = Model(inputs = model.input, outputs = predictions)
model.compile(optimizer = Adam(lr = 0.0001, decay = 1e-5), loss = "sparse_categorical_crossentropy", metrics = ['accuracy'])

In [None]:
# Training with K-fold cross validation
kf = StratifiedKFold(n_splits = 5, random_state = 42, shuffle = True)
X = np.array(Train['id_code'])
Y = np.array(Train['diagnosis'])

fold = []
train = []
test = []

for fold_no, (train_index, test_index) in enumerate(kf.split(X, Y)):
    print(f"Fold {fold_no+1}:")
    print(f"  Train: index = {train_index}")
    print(f"  Test:  index = {test_index}")
    fold.append(fold_no+1)
    train.append(train_index)
    test.append(test_index)

In [None]:
train_image_generator = ImageDataGenerator(preprocessing_function = keras.applications.inception_v3.preprocess_input,
                                     horizontal_flip = True, 
                                     vertical_flip = False, 
                                     height_shift_range = 0.05, 
                                     width_shift_range = 0.02, 
                                     rotation_range = 5, 
                                     shear_range = 0.01,
                                     fill_mode = 'nearest',
                                     zoom_range = 0.05)

val_image_generator = ImageDataGenerator(preprocessing_function = keras.applications.inception_v3.preprocess_input)

In [None]:
#Fold 2
i = 1
trainData = X[train[i]]
testData = X[test[i]]
## create train, valid dataframe and thus train_gen , valid_gen for each fold-loop
train_df = Train.loc[Train['id_code'].isin(list(trainData))]
valid_df = Train.loc[Train['id_code'].isin(list(testData))]
    
train_gen = train_image_generator.flow_from_dataframe(dataframe = train_df,
                                         directory = None,
                                         x_col = 'path',
                                         y_col = 'diagnosis',
                                         class_mode = 'sparse',
                                         classes = ['0', '1', '2', '3', '4'],
                                         shuffle = True,
                                         target_size = (img_height, img_width),
                                         color_mode = 'rgb',
                                         batch_size = batch_size,
                                         seed = 42)
    
valid_gen = val_image_generator.flow_from_dataframe(dataframe = valid_df,
                                         directory = None,
                                         x_col = 'path',
                                         y_col = 'diagnosis',
                                         class_mode = 'sparse',
                                         classes = ['0', '1', '2', '3', '4'],
                                         shuffle = False,
                                         target_size = (img_height, img_width),
                                         color_mode = 'rgb',
                                         batch_size = batch_size,
                                         seed = 42)

print(f'Training for fold {fold[i]} ...')
train = Counter(train_gen.labels)
val = Counter(valid_gen.labels)
print('Train:', train)
print('Val:', val)
    
train_class_weights = sklearn.utils.class_weight.compute_class_weight(
                                            class_weight = 'balanced',
                                            classes = ['0', '1', '2', '3', '4'], 
                                            y = Train['diagnosis'].values.tolist())
class_weight = dict(enumerate(train_class_weights))
print(class_weight)
    
History = model.fit(
            train_gen,
            steps_per_epoch = train_gen.n//batch_size,
            epochs = epochs,
            validation_data = valid_gen,
            validation_steps = valid_gen.n//batch_size,
            class_weight = class_weight,
            callbacks = [red_lr, early_stop])

model.save(f'/kaggle/working/{model_name}_fold_{i+1}.h5')

l = History.history['val_loss'][-1]
ac = History.history['val_accuracy'][-1]

valid_gen.reset()
lo, accu = model.evaluate(valid_gen, steps = valid_gen.n//batch_size)

print('model name: ', model_name) 
print(f'Score for fold {fold[i]}: Loss = {l}; Accuracy = {ac}')
print(lo,accu)

plt.figure(figsize = (10, 10))
plt.plot(History.history['loss'])
plt.plot(History.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epochs')
plt.legend(['train', 'test'])
plt.show()

plt.figure(figsize = (10, 10))
plt.plot(History.history['accuracy'])
plt.plot(History.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epochs')
plt.legend(['train', 'test'])
plt.show()