
 
#  **Introducción**

<font size = 4>
El presente notebook, recoge el procedimiento seguido para la creación del modelo y su posterior fase de entrenamiento, validación y test.  
<br> <br>
 
La base de datos utilizada para la creación del clasificador de imágenes se denomina **Dogs vs Cats**. 

</font>

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from tensorflow.keras import layers, losses
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import BatchNormalization, Dropout, Conv2D, Flatten, Dense, MaxPooling2D
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from zipfile import ZipFile
from keras.preprocessing.image import ImageDataGenerator, load_img
import os, re
import cv2
from glob import glob
import matplotlib.pyplot as plt
import random


<font size = 4>**Creacion de los dataframes y clasificacion de las imagenes** </font>


In [2]:
with ZipFile('../input/dogs-vs-cats/train.zip', 'r') as zip1:
    zip1.extractall() 
    
with ZipFile('../input/dogs-vs-cats/test1.zip', 'r') as zip2:
    zip2.extractall()

filenamesTrain = os.listdir("./train/")
categoriesTrain = []
for filenameTrain in filenamesTrain:
    if filenameTrain.startswith('dog'):
        categoriesTrain.append('0') # DOG = 0
    else:
        categoriesTrain.append('1') # CAT = 1

for i in range(len(filenamesTrain)):
    f = filenamesTrain[i].split(".")
    if(len(f[0]) == 3):
        F = f[0] + f[1]+ ".png"
        os.rename("./train/"+filenamesTrain[i], "./train/"+F)   
        filenamesTrain[i] = F
        
train_df = pd.DataFrame({
    'filename': filenamesTrain,
    'category': categoriesTrain
})

filenamesTest = os.listdir("./test1")   

test_df = pd.DataFrame({
    'filename': filenamesTest
})

<font size = 4>**Visualización de las imagenes del dataset** </font>

In [None]:
def plot_three_samples():
    base_path = './train'
    img_path = base_path + '/**'
    path_contents = glob(img_path)
    
    plt.figure(figsize=(16,16))
    imgs = random.sample(path_contents, 1)
    plt.subplot(131)
    plt.imshow(cv2.imread(imgs[0]))
    return
plot_three_samples()

In [None]:
train_df.head()

In [None]:
test_df.head()

<font size = 4>**Creacion del conjunto de entrenamiento, validación y test** </font>

In [4]:
imageWidht = 150
imageHeight = 150
imageChannels = 3
numClass = 2
batch_size = 100

X_train2, X_val2 = train_test_split(train_df, test_size = 0.2)

train_datagen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
)

train_generator = train_datagen.flow_from_dataframe(
    X_train2,
    directory="./train/",
    x_col='filename',
    y_col='category',
    target_size=(imageWidht,imageHeight),
    class_mode="binary",
    batch_size=batch_size
)

val_datagen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
)

val_generator = val_datagen.flow_from_dataframe(
    X_val2,
    directory="./train/",
    x_col='filename',
    y_col='category',
    target_size=(imageWidht,imageHeight),
    class_mode="binary",
    batch_size=batch_size
)

test_datagen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
)

test_generator = val_datagen.flow_from_dataframe(
    test_df,
    directory="./test1/",
    x_col='filename',
    y_col=None,
    target_size=(imageWidht,imageHeight),
    class_mode=None,
    batch_size=batch_size,
    shuffle=False
)

<font size = 4>**Creacion del modelo** </font>

In [5]:
input_shape = (imageWidht, imageHeight, imageChannels)

myModel = keras.Sequential(
    [
        layers.Conv2D(32, kernel_size = 4, strides = 2, activation="relu", input_shape = input_shape),
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        layers.Conv2D(64, kernel_size = 4, activation="relu"),
        layers.BatchNormalization(),
        layers.Conv2D(64, kernel_size = 4, strides = 2, activation="relu"),
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        layers.Conv2D(128, kernel_size = 4, activation="relu"),
        layers.BatchNormalization(),
        layers.Conv2D(128, kernel_size = 4, padding='same', activation="relu"),
        layers.BatchNormalization(),
        layers.Conv2D(128, kernel_size = 4, strides = 2, activation="relu"),
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        layers.Conv2D(256, kernel_size = 4, strides = 2, padding='same', activation="relu"),
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        layers.Flatten(),
        layers.Dense(512, activation="relu"),
        layers.Dropout(0.3),
        layers.Dense(2, activation="softmax"),
    ]
   )

myModel.compile( optimizer='rmsprop', loss="binary_crossentropy", metrics=["accuracy"])

myModel.summary()

<font size = 4>**Entrenamiento** </font>

In [None]:
early_stopping = EarlyStopping(
    min_delta=0.001, # minimium amount of change to count as an improvement
    patience=5, # how many epochs to wait before stopping
    restore_best_weights=True,)

batch_size = 100
epoch = 50
lb = "category"


history = myModel.fit(
    train_generator,
    validation_data=val_generator,
    batch_size=batch_size,
    validation_steps=X_val2.shape[0]//batch_size,
    steps_per_epoch=X_train2.shape[0]//batch_size,
    epochs=epoch,
    callbacks=[early_stopping],
    verbose=0, # suppress output since we'll plot the curves
)
history_df = pd.DataFrame(history.history)
history_df.loc[0:, ['loss', 'val_loss']].plot()
print("Minimum Validation Loss: {:0.4f}".format(history_df['val_loss'].min()))

print(max(history.history['accuracy']))
print(max(history.history['val_accuracy']))

<font size = 4>**Almacenamiento del modelo entrenado** </font>

In [None]:
myModel.save('myModel.h5')
myModel.save_weights('myModel.hdf5')

<font size = 4>**Carga del modelo entrenado** </font>

In [None]:
from keras.models import load_model
myModel = load_model('myModel.h5')
myModel.load_weights('myModel.h5')

<font size = 4>**Predicción** </font>

In [None]:
predict = myModel.predict(test_generator, steps=np.ceil(test_df.shape[0]//batch_size))

test_df['category'] = np.argmax(predict, axis=-1)

sample_test = test_df.head(18)
sample_test.head()
plt.figure(figsize=(12, 24))
for index, row in sample_test.iterrows():
    filename = row['filename']
    category = row['category']
    img = load_img("./test1/"+filename, target_size=(imageWidht,imageHeight))
    plt.subplot(6, 3, index+1)
    plt.imshow(img)
    plt.xlabel("Prediccion: " + "{}".format(category) )
plt.tight_layout()
plt.show()