# Experimento 0 : Revisión del artículo "A Preliminary Study on Deep Transfer Learning Applied to Image Classification for Small Datasets".

En este experimiento usaremos la arquitectura y estructura de entrenamiento planteada en "A prelimiminary Study on Deep Transfer Learning Applied to Image Classification for Small Datasets" para comprobar si se validan en un problema multiclase.

## Librerías usadas.

In [None]:
#Configuarción del sistema para desplegar tensorflow en GPU. Es necesario tener instalado CUDA y cudNN
import tensorflow as tf
gpus= tf.config.experimental.list_physical_devices('GPU')
print(gpus)
tf.config.experimental.set_memory_growth(gpus[0], True)

In [None]:
import numpy as np  #Librería básica de manejo de vectores y tensores
import pandas as pd #Librería básica para la gestión y análisis de datos

from glob import glob 
from matplotlib import pyplot as plt
import os
from tqdm import tqdm
import cv2

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.utils import resample

## Rutas y variables globales

In [None]:
#Rutas de los datos.
 
data_dir = os.path.dirname(os.path.realpath("../TFG/Datos/HAM10000_metadata.csv"))



csv_path = os.path.realpath(data_dir + "/HAM10000_metadata.csv")

#Variables globales

altura = 50
longitud = 50
clases = 7


print(data_dir)

print(csv_path)

## Creación del marco de datos.

In [None]:
#Inicializando el dataFrame

dataFrame=pd.read_csv(csv_path)

#Mezclando carpetas.

all_image_path = glob(os.path.join(data_dir, '*', '*'))
imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x for x in all_image_path}

# Inicializando diccionario de categorías

lesion_type_dict = {
    'nv': 'Melanocytic nevi',
    'mel': 'Melanoma',
    'bkl': 'Benign keratosis ',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}

#Añadiendo columnas al dataFrame para que sea más legible.

dataFrame['path'] = dataFrame['image_id'].map(imageid_path_dict.get)
dataFrame['cell_type'] = dataFrame['dx'].map(lesion_type_dict.get) 
dataFrame['cell_type_idx'] = pd.Categorical(dataFrame['cell_type']).codes
dataFrame.head()


## Select_network

Este método permite seleccionar la parte superior de una red.

In [None]:
def select_network(nn_base_arch):
    if nn_base_arch == 'HELP':
        print("Se encuentra disponible:\n"+
              "- CNN_SOCO")
    
    if nn_base_arch =='CNN_SOCO':
        nn = cnn_soco()
        
    return nn

## Los métodos que se describen acontinuación permitirán crear diferentes tipos de capas superiores que se usarán en el método select_network. Los métodos son : 

 - cnn_soco = Es una réplica de la red usada en el estudio "A Preliminary Study on Deep Transfer Learning Applied to Image Classification for Small Datasets".


In [None]:
def cnn_soco():
    
    model = tf.keras.Sequential()
    
    model.add(tf.keras.layers.Conv2D(32, (3,3),(1,1), activation='relu',input_shape=(altura,longitud,3)))
    model.add(tf.keras.layers.Conv2D(32, (3,3),(1,1),activation='relu'))
    model.add(tf.keras.layers.MaxPooling2D((2,2)))

 
    model.add(tf.keras.layers.Conv2D(64, (3,3), activation='relu'))
    model.add(tf.keras.layers.MaxPooling2D((2,2)))
    
    return model

Los métodos que se describen a continuación generarán la capa de salida de nuestra red : 
 - build : Modificación de la red_SOCO, añadimos una capa de Softmax

In [None]:
def build(nn):
    model = tf.keras.Sequential()
    model.add(nn)
    model.add(tf.keras.layers.Flatten())
    
    model.add(tf.keras.layers.Dense(128))
    model.add(tf.keras.layers.Activation('relu'))

    model.add(tf.keras.layers.Dense(clases,activation='softmax'))


    return model

## Se procede a crear un método que permita balancear la carga de imágenes

In [None]:
def balanced_dataset(df):
    df_balanced = pd.DataFrame()
    #df = pd.DataFrame()
    
    for cat in df['cell_type_idx'].unique():
        temp = resample(df[df['cell_type_idx'] == cat], 
                        replace=True,     # sample with replacement
                        n_samples=2500,   # to match majority class
                        random_state=123) # reproducible results

        # Combine majority class with upsampled minority class
        df_balanced = pd.concat([df_balanced, temp])
 
    df_balanced['cell_type'].value_counts()

    return df_balanced

def load_img_data(size, df, balanced=False):
    """
        ..
        first we should normalize the image from 0-255 to 0-1
    """
    
    img_h, img_w = size, size
    imgs = []
    
    if balanced:
        df = balanced_dataset(df)
    
    image_paths = list(df['path'])

    for i in tqdm(range(len(image_paths))):
        img = cv2.imread(image_paths[i])
        img = cv2.resize(img, (img_h, img_w))
        img = img.astype(np.float32) / 255.
        #img = np.asarray(Image.open(image_paths[i]).resize((size,size)))
        imgs.append(img)

    imgs = np.stack(imgs, axis=0)
    print(imgs.shape)

    #imgs = imgs.astype(np.float32) / 255.
    
    return imgs, df['cell_type_idx'].values

In [None]:
del dataFrame
del imgs
del target
del x_train
del x_test
del y_train
del y_test
del x_val
del y_val

## Cargamos los datos y generamos el set de datos,entrenamiento y validación para cada experimento.

In [None]:
def load_general_data():
    
    imgs, target = load_img_data(altura, dataFrame, balanced=True)
    
    x_train, x_transferLearning, y_train, y_transferLearning = train_test_split(imgs, target, test_size=0.60)
       
    source_data = [ x_transferLearning , y_transferLearning ]
    target_data = [ x_train , y_train ]
    
    x_train,x_test,y_train,y_test = train_test_split(target_data[0], target_data[1], test_size=0.70)
    
    train_data = [x_train,y_train]
    test_data = [x_test,y_test]
    
    return source_data,train_data,test_data

#############################################################################################
# Creamos varios métodos que nos permiten simular los procesos de entrenamiento del estudio #
#############################################################################################

def get_data_for_ex_1(source_data,train_data,test_data):
    
    x_train = train_data[0]
    y_train = train_data[1]
    
    x_test = test_data[0]
    y_test = test_data[1]
    
    return x_train,x_test,y_train,y_test


def get_data_for_ex_2(source_data,train_data,test_data):
    
    x_train = source_data[0]
    y_train = source_data[1]
    
    x_test = test_data[0]
    y_test = test_data[1]
    
    return x_train,x_test,y_train,y_test


def get_data_for_ex_3(source_data,train_data,test_data):
    data_0 = source_data[0]
    data_1 = source_data[1]
    
    for e in train_data[0]:
        np.append(data_0,e)
        
    for e in train_data[1]:
        np.append(data_1,e)
        
    x_train = data_0
    y_train = data_1
    
    x_test = test_data[0]
    y_test = test_data[1]
    
    return x_train,x_test,y_train,y_test


def get_data_for_ex_4(source_data,train_data,test_data):
    
    x_train = source_data[0]
    y_train = source_data[1]
    
    x_retrain = train_data[0]
    y_retrain = train_data[1]
    
    x_test = test_data[0]
    y_test = test_data[1]
    
    return x_train,x_retrain,x_test,y_train,y_retrain,y_test


In [None]:
#Constantes del experimento
BATCH_SIZE = 128
EPOCHS = 5

RMSpropEstudio = tf.keras.optimizers.RMSprop(
    learning_rate=0.0001
)

## Corremos los experimentos

In [None]:
source_data,train_data,test_data = load_general_data()

In [None]:

with tf.device('GPU:0'): # Esta linea permite utilizar el manejador de procesos de Python para gestionar el uso de la GPU
    x_train,x_test,y_train,y_test = get_data_for_ex_1(source_data,train_data,test_data)
    res1,evaluations1 = run_experiment_1_2_and_3(nn_base_arch,EPOCHS)

    x_train,x_test,y_train,y_test = get_data_for_ex_2(source_data,train_data,test_data)
    res2,evaluations2 = run_experiment_1_2_and_3(nn_base_arch,EPOCHS)

    x_train,x_test,y_train,y_test = get_data_for_ex_3(source_data,train_data,test_data)
    res3,evaluations3 = run_experiment_1_2_and_3(nn_base_arch,EPOCHS)

    x_train,x_retrain,x_test,y_train,y_retrain,y_test = get_data_for_ex_4(source_data,train_data,test_data)
    res4,res5,evaluations4,evaluations5 = run_experiment_4(nn_base_arch,EPOCHS)

In [None]:
plot_acc_for_ex1_w_test_score(res1,evaluations1,EPOCHS,"Experiment 1")
plot_acc_for_ex1_w_test_score(res2,evaluations2,EPOCHS,"Experiment 2")
plot_acc_for_ex1_w_test_score(res3,evaluations3,EPOCHS,"Experiment 3")
plot_acc_for_ex1_w_test_score(res3,evaluations3,EPOCHS,"Experiment 3")
plot_acc_all_experiments(res1,res2,res3,res5,evaluations1,evaluations2,evaluations3,evaluations5,EPOCHS)

## Métodos para correr los entrenamientos

In [None]:
def run_train(nn_base_arch,epochs,checkpoint,dense = False):
    nn = select_network(nn_base_arch)
    
    if dense == True :
        model = build_dense(nn)
    else:
        model = build(nn)
        
    cpoint = tf.keras.callbacks.ModelCheckpoint(checkpoint, monitor="loss", mode="min", save_best_only=True, verbose=0)
    model.compile(loss='sparse_categorical_crossentropy', optimizer=RMSpropEstudio, metrics=['accuracy','mse'])
    
    history = model.fit(x_train, y_train,epochs=EPOCHS,callbacks=[cpoint],batch_size = BATCH_SIZE,verbose=0)
      
    evaluation = model.evaluate(x_test, y_test)
        
    return history,evaluation

def run_train_w_model(nn_base_arch,epochs,checkpoint,dense = False):
    nn = select_network(nn_base_arch)
    
    if dense == True :
        model = build_dense(nn)
    else:
        model = build(nn)
        
    cpoint = tf.keras.callbacks.ModelCheckpoint(checkpoint, monitor="loss", mode="min", save_best_only=True, verbose=0)
    model.compile(loss='sparse_categorical_crossentropy', optimizer=RMSpropEstudio, metrics=['accuracy','mse'])
    
    history = model.fit(x_train, y_train,epochs=EPOCHS,callbacks=[cpoint],batch_size = BATCH_SIZE,verbose=0)
      
    evaluation = model.evaluate(x_test, y_test)
        
    return history,evaluation,model

def re_train(model,epocas):
    #checkpoint = tf.keras.callbacks.ModelCheckpoint(checkpoint, monitor="loss", mode="min", save_best_only=True, verbose=0)
    history = model.fit(x_train, y_train,epochs=EPOCHS,batch_size = BATCH_SIZE,verbose=0)
    evaluation = model.evaluate(x_test, y_test)
    return history,evaluation

In [None]:
def run_train(nn_base_arch,epochs,checkpoint,dense = False):
    nn = select_network(nn_base_arch)
    
    if dense == True :
        model = build_dense(nn)
    else:
        model = build(nn)
        
    checkpoint = tf.keras.callbacks.ModelCheckpoint(checkpoint, monitor="loss", mode="min", save_best_only=True, verbose=0)
    model.compile(loss='sparse_categorical_crossentropy', optimizer=RMSpropEstudio, metrics=['accuracy','mse'])
    
    history = model.fit(x_train, y_train,epochs=EPOCHS,callbacks=[checkpoint],batch_size = BATCH_SIZE,verbose=0)
      
    evaluation = model.evaluate(x_test, y_test)
        
    return history,evaluation

nn_base_arch = "CNN_SOCO"


def run_experiment_1_2_and_3(nn_base_arch,epochs,dense = False):
    result = []
    evaluations = []
    for i in range(10):
        checkpoint ="../TFG/Modelos/balanced_model_"+nn_base_arch+"_exp1_v_"+str(i)+"_EXP0.h5"
        h,e = run_train(nn_base_arch,epochs,checkpoint,dense = False)
        result.append(h)
        evaluations.append(e)
        print("########################################################")
        print("Iteración "+str(i+1) +" de 10")
        print("########################################################")
        
    return result,evaluations


def run_experiment_4(nn_base_arch,epochs,dense = False):
    result = []
    result_post_tf = []
    evaluations = []
    evaluations_post_tf = []
    for i in range(10):
        checkpoint ="../TFG/Modelos/balanced_model_"+nn_base_arch+"_exp4_v_"+str(i)+"_EXP0.h5"
        h,e,tf_model = run_train_w_model(nn_base_arch,epochs,checkpoint,dense = False)
        result.append(h)
        evaluations.append(e)
        
        print("--------------------------------------------------------------------------")

        layers = tf_model.layers[0:-1]
        for layer in layers:
            layer.trainable = False
        
        h_retrain,e_retrain = re_train(tf_model,epochs)
        
        result_post_tf.append(h_retrain)
        evaluations_post_tf.append(e_retrain)
        
        
        
        print("########################################################")
        print("Iteración "+str(i+1) +" de 10")
        print("########################################################")
        
    return result,result_post_tf,evaluations,evaluations_post_tf
    

## Métodos para hacer las gráficas

In [None]:
def plot_acc_for_ex1_w_test_score(res,evaluations,epochs,name):
    plt.figure(figsize=(12,8))
    
    for i in range(len(res)):
        precisiones=[]
        precisiones.append(0.0)
        for e in res[i].history["accuracy"]:
            precisiones.append(e)
            
        plt.plot(np.arange(0, epochs+1), precisiones[0:epochs+1], label="train_acc")
        plt.plot(5,evaluations[i][1],'bo')
        
    plt.title("Training Loss and Accuracy - {}".format(name))
    plt.xlabel("Epoch #")
    plt.ylabel("Loss/Accuracy")
    plt.legend()
    plt.show()

In [None]:
def plot_acc_for_ex1(res,epochs,name):
    plt.figure(figsize=(12,8))
    
    for i in range(len(res)):
        precisiones=[]
        precisiones.append(0.0)
        for e in res[i].history["accuracy"]:
            precisiones.append(e)
            
        plt.plot(np.arange(0, epochs+1), precisiones[0:epochs+1], label="train_acc")
    plt.title("Training Loss and Accuracy - {}".format(name))
    plt.xlabel("Epoch #")
    plt.ylabel("Loss/Accuracy")
    plt.legend()
    plt.show()

In [None]:
def plot_acc_for_experiments(res,res1,evaluations,evaluations1,epochs,name):
    plt.figure(figsize=(12,8))
    
    for i in range(len(res)):
        precisiones=[]
        precisiones.append(0.0)
        for e in res[i].history["accuracy"]:
            precisiones.append(e)
        
        precisiones1=[]
        precisiones1.append(0.0)
        for e in res1[i].history["accuracy"]:
            precisiones1.append(e)
        
        if i == 0:
            plt.plot(np.arange(0, epochs+1), precisiones[0:epochs+1], label="train_acc",color='green')
            plt.plot(np.arange(0, epochs+1), precisiones1[0:epochs+1], label="train_acc",color='blue')
        else:
            plt.plot(np.arange(0, epochs+1), precisiones[0:epochs+1],color='green')
            plt.plot(np.arange(0, epochs+1), precisiones1[0:epochs+1],color='blue')
            plt.plot(5,evaluations[i][1],'bo',color='green')
            plt.plot(5,evaluations1[i][1],'bo',color='blue')
        
    plt.title("Training Loss and Accuracy - {}".format(name))
    plt.xlabel("Epoch #")
    plt.ylabel("Loss/Accuracy")
    plt.legend()
    plt.show()

In [None]:
def plot_acc_all_experiments(res,res1,res2,res3,evaluations,evaluations1,evaluations2,evaluations3,epochs):
    plt.figure(figsize=(16,10))
    
    for i in range(len(res)):
        
        precisiones=[]
        precisiones.append(0.0)
        for e in res[i].history["accuracy"]:
            precisiones.append(e)
        
        precisiones1=[]
        precisiones1.append(0.0)
        for e in res1[i].history["accuracy"]:
            precisiones1.append(e)
            
        precisiones2=[]
        precisiones2.append(0.0)
        for e in res2[i].history["accuracy"]:
            precisiones2.append(e)
        
        precisiones3=[]
        precisiones3.append(0.0)
        for e in res3[i].history["accuracy"]:
            precisiones3.append(e)
            
        
        if i == 0:
            plt.plot(np.arange(0, epochs+1), precisiones[0:epochs+1], label="exp_1",color='green')
            plt.plot(np.arange(0, epochs+1), precisiones1[0:epochs+1], label="exp_2",color='blue')
            plt.plot(np.arange(0, epochs+1), precisiones2[0:epochs+1], label="exp_3",color='brown')
            plt.plot(np.arange(0, epochs+1), precisiones3[0:epochs+1], label="exp_4",color='salmon')
        else:
            plt.plot(np.arange(0, epochs+1), precisiones[0:epochs+1],color='green')
            plt.plot(np.arange(0, epochs+1), precisiones1[0:epochs+1],color='blue')
            plt.plot(np.arange(0, epochs+1), precisiones2[0:epochs+1],color='brown')
            plt.plot(np.arange(0, epochs+1), precisiones3[0:epochs+1],color='salmon')

        
    plt.title("Training Accuracy and Test Results")
    plt.xlabel("Epoch #")
    plt.ylabel("Loss/Accuracy")
    plt.legend()
    plt.savefig("Exp_0_Results.jpg")
    plt.show()
    

## Análisis de resultados con Pandas

In [None]:
#res1,res2,res3,res4,evaluations1,evaluations2,evaluations3,evaluations4

precision1 = []
precision2 = []
precision3 = []
precision4 = []

for e in evaluations1:
    element = e[1]
    precision1.append(element)

for e in evaluations2:
    element = e[1]
    precision2.append(element)

for e in evaluations3:
    element = e[1]
    precision3.append(element)

for e in evaluations5:
    element = e[1]
    precision4.append(element)

    
    
d = {'Iteración':[1,2,3,4,5,6,7,8,9,10],'Precision 1': precision1,'Precision 2': precision2,'Precision 3': precision3,'Precision 4': precision4}
df = pd.DataFrame(data=d)

print(df.mean())