# OCT + iPhone + Samsung con validación cruzada

In [3]:
import os
import cv2
import seaborn as sn 
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import random
import numpy as np
import glob as glob
from sklearn.model_selection import train_test_split
import csv
import pandas as pd
from sklearn.metrics import confusion_matrix, classification_report  
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.utils import class_weight
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from keras.applications.vgg19 import VGG19
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, ClassifierMixin
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import accuracy_score

**PREPARACIÓN DE LOS DATOS**

In [4]:
color = 'rgb' #'grayscale' o 'rgb'

escala = 150

In [5]:
def cargar_datos(path1, path2, escala = escala, color = color):
    
    EMD = os.listdir(path1)
    NO_EMD = os.listdir(path2)
    
    data = []
    labels = []

    for i in EMD:   
        image=tf.keras.preprocessing.image.load_img(path1+'/'+i, color_mode= color, 
        target_size= (escala, escala))
        image=np.array(image)
        data.append(image)
        labels.append(1)
    for i in NO_EMD:   
        image=tf.keras.preprocessing.image.load_img(path2+'/'+i, color_mode= color, 
        target_size= (escala, escala))
        image=np.array(image)
        data.append(image)
        labels.append(0)
        
    data = np.array(data)
    labels = np.array(labels)
    
    return data, labels

In [6]:
dataset_iphone = cargar_datos('Datos preprocesados INP EMD/iPhone/EMD', 'Datos preprocesados INP EMD/iPhone/NO EMD')

In [7]:
dataset_samsung = cargar_datos('Datos preprocesados INP EMD/Samsung/EMD', 'Datos preprocesados INP EMD/Samsung/NO EMD')

In [8]:
dataset = cargar_datos('Datos preprocesados INP EMD/OCT/EMD', 'Datos preprocesados INP EMD/OCT/NO EMD')

In [9]:
X = np.concatenate((dataset[0],dataset_samsung[0]))

X = np.concatenate((X,dataset_iphone[0]))

y = np.concatenate((dataset[1],dataset_samsung[1]))

y = np.concatenate((y,dataset_iphone[1]))

**DEFINICIÓN DEL MODELO TRANSFER LEARNING CON FIT() / SCORE() / PREDICT()**

In [10]:
class KerasModel(BaseEstimator, ClassifierMixin):
    
    dataset_iphone = cargar_datos('Datos preprocesados INP EMD/iPhone/EMD', 'Datos preprocesados INP EMD/iPhone/NO EMD')
    dataset_samsung = cargar_datos('Datos preprocesados INP EMD/Samsung/EMD', 'Datos preprocesados INP EMD/Samsung/NO EMD')
    dataset = cargar_datos('Datos preprocesados INP EMD/OCT/EMD', 'Datos preprocesados INP EMD/OCT/NO EMD')
    
    X = np.concatenate((dataset[0],dataset_samsung[0]))

    X = np.concatenate((X,dataset_iphone[0]))

    y = np.concatenate((dataset[1],dataset_samsung[1]))

    y = np.concatenate((y,dataset_iphone[1]))
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
    
    def fit(self, X_train, y_train):
        
        train_labels_categorical = to_categorical(y_train, num_classes=2)
        
        X_train = preprocess_input(X_train) 
        y_train = preprocess_input(y_train)
        
        ## Loading VGG16 model
        base_model = VGG19(weights="imagenet", include_top=False, input_shape=X_train[0].shape)
        base_model.trainable = False ## Not trainable weights
        
        flatten_layer = layers.Flatten()
        dense_layer_1 = layers.Dense(50, activation='relu')
        dense_layer_2 = layers.Dense(20, activation='relu')
        prediction_layer = layers.Dense(2, activation='softmax')

        model = models.Sequential([
            base_model,
            flatten_layer,
            dense_layer_1,
            dense_layer_2,
            prediction_layer
        ])
        
        classes = np.unique(y_train)
        class_weights = class_weight.compute_class_weight('balanced', classes=classes, y=y_train)
        dic_class_weights = {0:class_weights[0], 1:class_weights[1]}
        
        model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy'],
        )

        es = EarlyStopping(monitor='val_accuracy', mode='max', patience=20,  restore_best_weights=True)

        model.fit(x=X_train,y=train_labels_categorical, epochs=200, validation_split=0.2, batch_size=32, callbacks=[es], class_weight=dic_class_weights)

        self.model = model
        
        return self
    
    def score(self, X_test, y_test):
        
        predictions = self.model.predict(X_test)
        y_pred = []
        
        for e in predictions:
            if e[0]>e[1]:
                y_pred.append(0)
            else:
                y_pred.append(1)
                
        return accuracy_score(y_test, y_pred)
    
    def predict(self, X_test):
                
        return self.model.predict(X_test)

**VALIDACIÓN CRUZADA**

Evaluar una puntuación mediante validación cruzada.

Devuelve una matriz de puntuaciones del estimador para cada ejecución de la validación cruzada (en este caso hacemos 5).

In [11]:
def val_score(X, y):
    
    clf = KerasModel()
    scores = cross_val_score(clf, X, y)
    
    print("Con cross_val_score() hemos obtenido ")
    print(scores)
    
    print("%0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

In [29]:
def val_predict(X, y):
    
    clf = KerasModel()
    y_pred = cross_val_predict(clf, X, y)
    
    y_pred_bin = []

    for e in y_pred:
        if e[0]>e[1]:
            y_pred_bin.append(0)
        else:
            y_pred_bin.append(1)
    
    roc_score = roc_auc_score(y, y_pred_bin, multi_class='ovr')
    print("El valor AUC ROC para esta predicción es ")
    print(roc_score)
    
    f1 = f1_score(y, y_pred_bin, average='weighted')
    print("El F1 score para esta predicción es ")
    print(f1)
    
    matriz = confusion_matrix(y, y_pred_bin)
    print("La matriz de confución de esta predicción es ")
    print(matriz)

## VGG19

In [12]:
val_score(X, y)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200


Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200


Con cross_val_score() hemos obtenido 
[0.73770492 0.73770492 0.70491803 0.59016393 0.73770492]
0.70 accuracy with a standard deviation of 0.06


In [30]:
val_predict(X, y)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200


Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
El valor AUC ROC para esta predicción es 
0.5013888888888889
El F1 score para esta predicción es 
0.6326908325920417
La matriz de confución de esta predicción es 
[[220   5]
 [ 78   2]]
