# **Deep learning - Pràctica**
## Creació d'un model per al diagnòstic de glaucoma
### Jordi Salleras Alabau

In [None]:
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import cv2
from tensorflow.keras.applications import EfficientNetB0
import numpy as np
from tensorflow import keras
from keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from tensorflow.keras import layers
from keras.layers import Dense, Conv2D, Flatten, Activation, Dropout, MaxPooling2D, BatchNormalization
from tensorflow.keras.optimizers import Adam, SGD, Adadelta, Adagrad
import matplotlib.pyplot as plt
import time
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import keras_tuner as kt

### **Secció 1. Anàlisi Exploratòria de les dades**

### **Secció 2. Entrenament d’una xarxa neuronal sobre una única partició**

In [None]:
# Fold 0
train_dataset_path = os.listdir('../input/glaucoma/archive/practica_DL_UOC_2022/Fold0/train')
test_dataset_path = os.listdir('../input/glaucoma/archive/practica_DL_UOC_2022/Fold0/test')
valid_dataset_path = os.listdir('../input/glaucoma/archive/practica_DL_UOC_2022/Fold0/valid')
print(train_dataset_path)
print('Types of classes labels found: ' + str(len(train_dataset_path)))

In [None]:
train_path = '../input/glaucoma/archive/practica_DL_UOC_2022/Fold0/train'
class_labels = []
for item in train_dataset_path:
    all_classes = os.listdir(train_path + '/' + item)    
    for image in all_classes:
        class_labels.append((item, str(train_path + '/' + item) + '/' + image))
        
df = pd.DataFrame(data=class_labels, columns = ['Label', 'Image'])

In [None]:
df.head()

In [None]:
print('Total number of images in the train dataset: ', len(df))

label_count = df['Label'].value_counts()
print(label_count)

In [None]:
def process_images_folder(folder_path):
    folder_path_dir = os.listdir(folder_path)
    img_size = 224

    images = []
    labels = []

    for i in folder_path_dir:
        data_path = folder_path + '/' + str(i)
        filenames = [i for i in os.listdir(data_path)]

        for f in filenames:
            img = cv2.imread(data_path + '/' + f)
            img = cv2.resize(img, (img_size, img_size))
            images.append(img)
            labels.append(i)
    
    images = np.array(images)
    # images = images.astype('float32') / 255.0
    y = labels 
    ''' 
    y_labelEncoder = LabelEncoder()
    y = y_labelEncoder.fit_transform(y)
    #y = y.reshape(-1,1)
    y = to_categorical(y)
       
    ct = ColumnTransformer([('my_ohe', OneHotEncoder(), [0])], remainder='passthrough')
    Y = ct.fit_transform(y)
    '''
    encoder = LabelEncoder()
    encoder.fit(y)
    encoded_y = encoder.transform(y)
    return images,encoded_y

In [None]:
X_train, y_train = process_images_folder('../input/glaucoma/archive/practica_DL_UOC_2022/Fold0/train')
X_test, y_test = process_images_folder('../input/glaucoma/archive/practica_DL_UOC_2022/Fold0/test')
X_val, y_val = process_images_folder('../input/glaucoma/archive/practica_DL_UOC_2022/Fold0/valid')

In [None]:
def plot_hist(hist):
    plt.plot(hist.history["accuracy"])
    plt.plot(hist.history["val_accuracy"])
    plt.title("model accuracy")
    plt.ylabel("accuracy")
    plt.xlabel("epoch")
    plt.legend(["train", "validation"], loc="upper left")
    plt.show()

# **EfficientNet implementation**

## Model 1

In [None]:
def model_builder(hp):
    base_model = tf.keras.applications.efficientnet.EfficientNetB0(include_top=False, weights='imagenet')
    base_model.trainable = False

    inputs = layers.Input(shape = (224,224,3), name='inputLayer')
    x = base_model(inputs, training = False)
    x = layers.GlobalAveragePooling2D(name='poolingLayer')(x)
    x = layers.BatchNormalization(name='batchLayer')(x)
    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(1, activation="sigmoid", name='outputLayer')(x)

    model = tf.keras.Model(inputs, outputs, name = "Model1")

    # Tune the learning rate for the optimizer
    # Choose an optimal value from 0.01, 0.001, or 0.0001
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4, 1e-5, 1e-6])

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    return model

In [None]:
tuner = kt.Hyperband(model_builder,
                     objective='val_accuracy',
                     max_epochs=150,
                     factor=3)

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

In [None]:
tuner.search(X_train, y_train, 
             epochs=150, 
             validation_data = (X_val, y_val), 
             callbacks=[callback])

In [None]:
# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]
print(best_hps.get('learning_rate'))

In [None]:
# Build the model with the optimal hyperparameters and train it on the data for 50 epochs
model1 = tuner.hypermodel.build(best_hps)
model1.summary()

In [None]:
model2 = keras.models.clone_model(model1)
model3 = keras.models.clone_model(model1)

In [None]:
history = model1.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val), callbacks=[callback])

In [None]:
model_eval = model1.evaluate(X_test, y_test, return_dict=True)

In [49]:
preds = model1.predict(X_test)
preds1 = []
for pred in preds:
    if pred > 0.5:
        preds1.append(1)
    else:
        preds1.append(0)

print('**MODEL 1**')
cm1 = confusion_matrix(y_test, preds1)
print(f'Matriu de confusió: \n', cm1)
print('********'*7)
cr1 = classification_report(y_test, preds1)
print('Informe de la classificació: \n', cr1)

**MODEL 1**
Matriu de confusió: 
 [[72 20]
 [11 71]]
********************************************************
Informe de la classificació: 
               precision    recall  f1-score   support

           0       0.87      0.78      0.82        92
           1       0.78      0.87      0.82        82

    accuracy                           0.82       174
   macro avg       0.82      0.82      0.82       174
weighted avg       0.83      0.82      0.82       174



## Model 2

In [None]:
def unfreeze_model(model):
    for layer in model.layers[-20:]:
        if not isinstance(layer, layers.BatchNormalization):
            layer.trainable = True

    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5)
    model.compile(
        optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"]
    )
    


model2.set_weights(model1.get_weights()) 
unfreeze_model(model2)
model2.summary()

In [None]:
hist = model2.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val), callbacks=[callback])
plot_hist(hist)

In [None]:
model_eval = model2.evaluate(X_test, y_test, return_dict=True)
model_eval

In [None]:
preds = model2.predict(X_test)
preds2 = []
for pred in preds:
    if pred > 0.5:
        preds2.append(1)
    else:
        preds2.append(0)

print('**MODEL 2**')
cm2 = confusion_matrix(y_test, preds2)
print(f'Matriu de confusió: \n', cm2)
print('********'*7)
cr2 = classification_report(y_test, preds2)
print('Informe de la classificació: \n', cr2)

## Model 3

In [None]:
model3.set_weights(model2.get_weights()) 
for layer in model3.layers:
    layer.trainable = True
model3.summary()

In [None]:
preds = model3.predict(X_test)
preds3 = []
for pred in preds:
    if pred > 0.5:
        preds3.append(1)
    else:
        preds3.append(0)

print('**MODEL 2**')
cm3 = confusion_matrix(y_test, preds3)
print(f'Matriu de confusió: \n', cm3)
print('********'*7)
cr3 = classification_report(y_test, preds3)
print('Informe de la classificació: \n', cr3)

## Model 4

In [None]:

model = keras.models.Sequential([
    keras.layers.Conv2D(64, (3, 3), activation='relu',input_shape=(224, 224, 3)),
    keras.layers.Conv2D(32, (3, 3), activation='relu', padding = 'same'),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Dropout(0.2),
    
    keras.layers.Conv2D(64, (3, 3), activation='relu', padding = 'same'),
    keras.layers.Conv2D(64, (3, 3), activation='relu', padding = 'same'),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Dropout(0.2),
    
    keras.layers.Conv2D(128, (3, 3), activation='relu', padding = 'same'),
    keras.layers.Conv2D(128, (3, 3), activation='relu', padding = 'same'),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Dropout(0.2),
    
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dropout(0.2),
    
    keras.layers.Flatten(),
    keras.layers.Dense(1, activation="sigmoid"),
])

print(model.summary())

In [None]:
# Compile
# TODO
model.compile(keras.optimizers.Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Entrenament del model
# TODO
history = model.fit(X_train, y_train,
          validation_data = (X_val, y_val),
          epochs=50,
          batch_size=16,
          verbose = 1)

In [None]:
model_eval = model.evaluate(X_test, y_test, return_dict=True)
model_eval

In [None]:
plot_hist(history)