# FINE TUNNING

# I. TRANSFER LEARNING

## LECTURA DE DATA

In [None]:
#se importan dependencias
import random 
import matplotlib.pyplot as plt
import cv2
import numpy as np

In [None]:
#Se instalan dependencias, se crean directorios
!pip install kaggle
!mkdir -p ~/.kaggle

In [None]:
#Se habilita el uso del API leyendo el kaggle.json
from google.colab import files
files.upload()

In [None]:
#Se crea el directorio y se habilitan los permisos para leer y escribir
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

#Cambiamos los permisos
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d ryeltsin/bdfvusm-qa

In [None]:
#Se extrae la data desde el Zip descargado
import os
import zipfile

zip_train_test = zipfile.ZipFile('bdfvusm-qa.zip','r')
zip_train_test.extractall()
zip_train_test.close()

In [None]:
clases_dir = os.listdir('BD-FV-USM_QA')

In [None]:
#se crean los folders de train test y validation
os.chdir('/content/BD-FV-USM_QA')
if not os.path.isdir('train'):
  os.mkdir('train')

if not os.path.isdir('test'):
  os.mkdir('test')

if not os.path.isdir('val'):
  os.mkdir('val')

In [None]:
all_elements = []
for ie in clases_dir:
  ll = os.listdir(ie)
  all_elements.append(ll)

In [None]:
# Importando función que define la cantidad de data para train test y validation
from sklearn.model_selection import train_test_split

def split_data(data, train_ratio=0.6, test_ratio=0.2, val_ratio=0.2 ):
  train, test = train_test_split(data, test_size=1 - train_ratio, random_state=100)
  test, val = train_test_split(test, test_size=test_ratio/(test_ratio + val_ratio), random_state=100) #Preguntar
  return train, test, val

In [None]:
l_dist = []
for iclass in all_elements:
  xtrain, xtest, xval = split_data(iclass)
  l_dist.append((xtrain, xtest, xval))

In [None]:
import shutil
def move_data( path_orig, path_dest, clase, data_path ):
  if not os.path.isdir(path_orig+'/'+path_dest+'/'+clase):
    os.mkdir(path_orig+'/'+path_dest+'/'+clase)
  
  for i in data_path:
    shutil.copy(path_orig+'/'+clase+'/'+i,path_orig+'/'+path_dest+'/'+clase+'/'+i)
  print("success copy {} , {} , {}".format(path_orig, path_dest, clase))

## DIVISIÓN DE DATA

In [None]:
#Divicion de data en train test y validation
lsplit = ['train', 'test', 'val']
all_splits = l_dist
lclases = clases_dir
orig = os.getcwd()
nclas = len(clases_dir)
for j in range(3):
  ii = 0
  dest = lsplit[j]
  while ii < nclas:
    pathx = all_splits[ii][j]      
    move_data(orig,dest, lclases[ii], pathx)
    ii += 1

In [None]:
#Se define tamaños de imagen y se indica la cantidad de muestra
pth_train = os.getcwd()+'/train'
pth_test = os.getcwd()+'/test'
pth_val = os.getcwd()+'/val'
image_height = 100
image_width = 300
batch_size = 64

## DATA AUGMENTATION

In [None]:
# Process image
from keras.preprocessing.image import ImageDataGenerator

In [None]:
# OJO: si no se desea usar data augmentation, solo considerar rescale=1./255
# train data augmentation
train_datagen = ImageDataGenerator(rescale=1./255,
                                   #Espejo horizontal
                                   horizontal_flip = True,
                                   #Rotación de la imagen
                                   rotation_range=10,
                                   #Brillo
                                   brightness_range=None,
                                   #Intensidad de corte
                                   shear_range=0.2,
                                   #Zoom
                                   zoom_range=0.2
                                   )

## val data augmentation
val_datagen = ImageDataGenerator(rescale=1./255)

## test data augmentation
test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
## read train data
training_set = train_datagen.flow_from_directory(pth_train,
                                                    target_size=(image_width, image_height),
                                                    batch_size=batch_size,
                                                    class_mode="categorical") ## categorical
                                      
## read val data
val_set = val_datagen.flow_from_directory(pth_val,
                                                target_size=(image_width, image_height),
                                                batch_size=batch_size,
                                                class_mode="categorical") ## categorical

## read test data
test_set = test_datagen.flow_from_directory(pth_test,
                                                target_size=(image_width, image_height),
                                                batch_size=batch_size,
                                                shuffle=False,
                                                class_mode="categorical") ## categorical

## IMPORTAR MODELO DE CNN

In [None]:
#libreria de deep learning que permite el uso de redes convolucionales
from keras.models import Sequential
from keras.layers import Conv2D,Dense,Flatten,Dropout,MaxPooling2D, BatchNormalization, Activation
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import ModelCheckpoint, EarlyStopping
import keras.optimizers as optimizers
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
## architecture CNN
from keras.models import Model
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras import optimizers

In [None]:
from tensorflow.keras.applications.MODELO import MODELO

In [None]:
## load the base model without the last layer
base_model = MODELO(weights="imagenet",
                   include_top=False,
                   input_shape=(image_width, image_height, 3))

## architecture summary
base_model.summary()

## REEMPLAZAR CAPA CLASIFICATORIA

In [None]:
## add custom layers
modelMODELO = Sequential()
for layer in base_model.layers[:-1]:
  layer.trainable = False

modelMODELO.add(base_model)


# Passing it to a dense layer
modelMODELO.add(Flatten())

#Dense lay

modelMODELO.add(Dense(512, use_bias=False))
modelMODELO.add(BatchNormalization())
modelMODELO.add(Activation("relu"))
modelMODELO.add(Dropout(0.25))

modelMODELO.add(Dense(256, use_bias=False))
modelMODELO.add(BatchNormalization())
modelMODELO.add(Activation("relu"))
modelMODELO.add(Dropout(0.25))


# Output Layer
modelMODELO.add(Dense(nclas, activation='softmax'))
modelMODELO.summary()

In [None]:
opt = optimizers.Adam(lr = 1.0e-4)
modelMODELO.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

In [None]:
reduce_learning_rate = ReduceLROnPlateau(monitor='loss',
                                          factor=0.1,
                                          patience=2,
                                          cooldown=2,
                                         verbose=1)

callbacks = [              ModelCheckpoint(filepath='best_model.h5', monitor='val_acc', save_best_only=True)]

my_history = modelMODELO.fit_generator(training_set,
                    steps_per_epoch=1718//batch_size,
                    epochs=100,
                    validation_data=val_set,
                    validation_steps=614//batch_size,
                    callbacks = callbacks,
                    shuffle= True
                   )

## GRÁFICOS

In [None]:
acc = my_history.history['accuracy']
val_acc = my_history.history['val_accuracy']

loss = my_history.history['loss']
val_loss = my_history.history['val_loss']

mse = my_history.history['mse']
val_mse = my_history.history['val_mse']

In [None]:
# summarize history for ACC
plt.figure(1)
plt.plot(my_history.history['accuracy'])
plt.plot(my_history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epochs')
plt.legend(['train', 'val'], loc='upper right')
plt.show()

# summarize history for LOSS
plt.figure(2)
plt.plot(my_history.history['loss'])
plt.plot(my_history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper right')
plt.show()

In [None]:
# summarize history for loss vs acc
plt.figure(4)
plt.plot(my_history.history['accuracy'])
plt.plot(my_history.history['loss'])
plt.title('model accuracy')
plt.ylabel('acc/loss')
plt.xlabel('epochs')
plt.legend(['test'], loc='upper right')
plt.show()

## MÉTRICAS Y RESULTADOS

### ACUURACY

In [None]:
scoreVal = modelMODELO.evaluate_generator(val_set)
print("Accuracy = ",scoreVal[1])
print("Loss = ",scoreVal[0])
print("mean_squared_error = ",scoreVal[2])

### LOSS

In [None]:
scoreTest = modelMODELO.evaluate_generator(test_set)
print("Accuracy = ",scoreTest[1])
print("Loss = ",scoreTest[0])
print("mean_squared_error = ",scoreTest[2])

## REPORTE DE CLASIFICACIÓN

In [None]:
from sklearn.metrics import classification_report, precision_score, confusion_matrix, accuracy_score, mean_squared_error

In [None]:
predict = modelMODELO.predict_generator(test_set)
predict = np.argmax(predict, axis=1)

In [None]:
print('Confusion Matrix')
print(confusion_matrix(test_set.classes, predict))
print('Classification Report')
target_names = ["1", "2",	"3",	"4",	"5",	"6",	"7",	"8",	"9",	"10",	"11",	"12",	"13",	"14",	"15",	"16",	"17",	"18",	"19",	"20",	"21",	"22",	"23",	"24",	"25",	"26",	"27",	"28",	"29",	"30",	"31",	"32",	"33",	"34",	"35",	"36",	"37",	"38",	"39",	"40",	"41",	"42",	"43",	"44",	"45",	"46",	"47",	"48",	"49",	"50",	"51",	"52",	"53",	"54",	"55",	"56",	"57",	"58",	"59",	"60",	"61",	"62",	"63",	"64",	"65",	"66",	"67",	"68",	"69",	"70",	"71",	"72",	"73",	"74",	"75",	"76",	"77",	"78",	"79",	"80",	"81",	"82",	"83",	"84",	"85",	"86",	"87",	"88",	"89",	"90",	"91",	"92",	"93",	"94",	"95",	"96",	"97",	"98",	"99",	"100",	"101",	"102",	"103",	"104",	"105",	"106",	"107",	"108",	"109",	"110",	"111",	"112",	"113",	"114",	"115",	"116",	"117",	"118",	"119",	"120",	"121",	"122",	"123"]
print(classification_report(test_set.classes, predict, target_names=target_names))

## DIAGRAMA DEL MODELO

In [None]:
from keras.utils.vis_utils import plot_model
plot_model(modelMODELO, to_file='modelin.png', show_shapes=True, show_layer_names=True)

## SAVE THE MDOEL

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
modelMODELO.save("modelMODELO.h5")

In [None]:
!mv modelMODELO.h5 /content/drive/My\ Drive/modelMODELO.h5

# II. FINE TUNNING

In [None]:
# Leemos la base del modelo
base_model.trainable = True
modeloMODELO.summary()

In [None]:
# Se observa cuántas capas hay en la base del modelo
print("Number of layers in the base model: ", len(base_model.layers))

In [None]:
# Se indica desde qué capa se realizará el fine tunning
fine_tune_at = n

# Se congelan las capas antes de la capa donde aplicará el fine-tunning
for layer in base_model.layers[:fine_tune_at]:
  layer.trainable =  False

In [None]:
opt2 = optimizers.Adam(lr = 1.0e-4/10)
modeloMODELO.compile(loss='categorical_crossentropy', optimizer=opt2, metrics=['accuracy'])
modeloMODELO.summary()

In [None]:
len(modeloMODELO.trainable_variables)

In [None]:
fine_tune_epochs = 100
total_epochs =  initial_epoch + fine_tune_epochs

history_fine = modeloMODELO.fit_generator(training_set,
                    steps_per_epoch=1718//batch_size,
                    epochs=total_epochs,
                    initial_epoch=my_history.epoch[-1],
                    validation_steps=614//batch_size,
                    validation_data=val_set,
                    callbacks = callbacks,
                    shuffle= True
                   )

## GRÁFICOS

In [None]:
acc += history_fine.history['accuracy']
val_acc += history_fine.history['val_accuracy']

loss += history_fine.history['loss']
val_loss += history_fine.history['val_loss']

mse += history_fine.history['mse']
val_mse += history_fine.history['val_mse']

In [None]:
plt.figure(5)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.ylim([0.8, 1])
plt.plot([initial_epoch-1,initial_epoch-1],
          plt.ylim(), label='Start Fine Tuning')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.figure(6)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.ylim([0, 1.0])
plt.plot([initial_epoch-1,initial_epoch-1],
         plt.ylim(), label='Start Fine Tuning')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

plt.figure(7)
plt.plot(mse, label='MSE Loss')
plt.plot(val_mse, label='MSE Loss')
plt.ylim([0, 1.0])
plt.plot([initial_epoch-1,initial_epoch-1],
         plt.ylim(), label='Start Fine Tuning')
plt.legend(loc='upper right')
plt.title('Training and Validation MSE')
plt.xlabel('epoch')
plt.show()

## MÉTRICAS Y RESULTADOS

### ACCURACY

In [None]:
scoreVal2 = modeloMODELO.evaluate_generator(val_set)
print("Accuracy = ",scoreVal2[1])
print("Loss = ",scoreVal2[0])
print("mean_squared_error = ",scoreVal2[2])

### LOSS

In [None]:
scoreTest2 = modeloMODELO.evaluate_generator(test_set)
print("Accuracy = ",scoreTest2[1])
print("Loss = ",scoreTest2[0])
print("mean_squared_error = ",scoreTest2[2])

## REPORTE DE CLASIFICACIÓN

In [None]:
from sklearn.metrics import classification_report, precision_score, confusion_matrix, accuracy_score, mean_squared_error

In [None]:
predict = modeloMODELO.predict_generator(test_set)
predict = np.argmax(predict, axis=1)

In [None]:
print('Confusion Matrix')
print(confusion_matrix(test_set.classes, predict))
print('Classification Report')
target_names = ["1", "2",	"3",	"4",	"5",	"6",	"7",	"8",	"9",	"10",	"11",	"12",	"13",	"14",	"15",	"16",	"17",	"18",	"19",	"20",	"21",	"22",	"23",	"24",	"25",	"26",	"27",	"28",	"29",	"30",	"31",	"32",	"33",	"34",	"35",	"36",	"37",	"38",	"39",	"40",	"41",	"42",	"43",	"44",	"45",	"46",	"47",	"48",	"49",	"50",	"51",	"52",	"53",	"54",	"55",	"56",	"57",	"58",	"59",	"60",	"61",	"62",	"63",	"64",	"65",	"66",	"67",	"68",	"69",	"70",	"71",	"72",	"73",	"74",	"75",	"76",	"77",	"78",	"79",	"80",	"81",	"82",	"83",	"84",	"85",	"86",	"87",	"88",	"89",	"90",	"91",	"92",	"93",	"94",	"95",	"96",	"97",	"98",	"99",	"100",	"101",	"102",	"103",	"104",	"105",	"106",	"107",	"108",	"109",	"110",	"111",	"112",	"113",	"114",	"115",	"116",	"117",	"118",	"119",	"120",	"121",	"122",	"123"]
print(classification_report(test_set.classes, predict, target_names=target_names))

## SAVE THE MODEL

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
modelMODELO.save("modelMODELO_FT.h5")

In [None]:
!mv modelMODELO_FT.h5 /content/drive/My\ Drive/modelMODELO_FT.h5