<h1><center> Trabajo Práctico 3: Fashion detector </center></h1>

In [None]:
# lib para trabajar con arrays
import numpy as np
# lib para trabajar con dataframe
import pandas as pd 

# lib que usamos para mostrar las imágenes
import matplotlib.pyplot as plt

# libs que usamos para construir y entrenar redes neuronales, y que además tiene utilidades para leer sets de 
# imágenes
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input, Dropout, Convolution2D, MaxPooling2D, Flatten
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator

# libs que usamos para tareas generales de machine learning. En este caso, métricas
from sklearn.metrics import accuracy_score, confusion_matrix

# configuración para que las imágenes se vean dentro del notebook
%matplotlib inline
import tensorflow as tf
from tensorflow import keras
# graficar
import plotly.express as px

In [None]:
fashion_mnist = keras.datasets.fashion_mnist
# esto es para que los calores de los modelos den siempre igual
np.random.seed(2)
tf.random.set_seed(2)

(x_train, x_lab), (y_validation, y_lab) = fashion_mnist.load_data()

<h1><center>1. Análisis exploratorio</center></h1>

Presentación → 1.

In [None]:
# Presentación → 1.1 Volumetría de los datos
#Exploración de datos:
#x_train.shape
#Tiene 60000 imagenes de 28x28 pixeles.

In [None]:
# Convert to the tensor shape
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1).astype('float32')
y_validation = y_validation.reshape(y_validation.shape[0], 28, 28, 1).astype('float32')
print("x_train shape {}".format(x_train.shape))
print("y_validation shape {}".format(y_validation.shape))

El data set de train cuenta con 60000 imágenes de 28x28 pixeles.

El data set de validation cuenta con 10000 imágenes de 28x28 pixeles.

El target está compuesto por 10 clases, con valor entre 0 y 9.
A continuación, detallaremos que significa cada valor:

| Label | Clase |
| -- | --- |
| 0 | T-shirt/top |
| 1 | Trouser |
| 2 | Pullover|
| 3 | Dress |
| 4 | Coat |
| 5 | Sandal |
| 6 | Shirt |
| 7 | Sneaker |
| 8 | Bag |
| 9 | Ankle boot |


In [None]:
clases = "camiseta/top","Pantalón","Pulover","Vestido","Saco","Sandalia","Camisa","Zapatilla","Bolso","Botas"

In [None]:
# Create a dictionary for each type of label 
d_clases_labels = {0 : "camiseta/top", 1: "Pantalón", 2: "Pulover", 3: "Vestido", 4: "Saco",
          5: "Sandalia", 6: "Camisa", 7: "Zapatilla", 8: "Bolso", 9: "Botas"}

### Mostramos algunas imágenes del data set de train sin normalizar

In [None]:
plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(x_train[i], cmap=plt.cm.binary)
    plt.xlabel(d_clases_labels[x_lab[i]])
plt.show()

### Mostramos algunas imágenes del data set de validation sin normalizar

In [None]:
plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(y_validation[i], cmap=plt.cm.binary)
    plt.xlabel(d_clases_labels[y_lab[i]])
plt.show()

<h3><center>Normalizamos las imágenes</center></h3>

In [None]:
#Los valores de la imagen se encuentran entre 0 y 255, por lo cual es necesario normalizar.
x_train_m = keras.utils.normalize(x_train, axis = 1)

In [None]:
#Los valores de la imagen se encuentran entre 0 y 255, por lo cual es necesario normalizar.
y_validation_m = keras.utils.normalize(y_validation, axis = 1)

### Mostramos algunas imágenes del data set de train normalizadas

In [None]:
#Andi - A mi gusto, mostrar 10 veces las mismas imagenes no va, medio que es ejecutar al pedo

In [None]:
#Train normalizado.:
sample_images(x_train_m,x_lab)

### Mostramos algunas imágenes del data set de validation normalizadas

In [None]:
#Validation normalizado.:
sample_images(y_validation_m, y_lab)

### Distribución de la variable a predecir

In [None]:
# Presentación → 1.3 Distribución de la variable a predecir

In [None]:
# transformamos el array en un data frame de panda
df_train = pd.DataFrame(x_lab).rename(columns={0:'x_lab'})
# cambiamos los números de las clases po sus nombres
df_train['x_lab'] = df_train['x_lab'].map(d_clases_labels,
                             na_action=None)

In [None]:
# Presentación → 1.3 Distribución de la variable a predecir
df = px.data.tips()
fig = px.histogram(df_train, 
                   x="x_lab", 
                   color="x_lab", 
                   title='Distribución de la variable en train', 
                   labels={'x_lab':'Target'},
                  category_orders=dict(x_lab=d_clases_labels),
                  text_auto=True)
fig.update_layout(bargap=0.2)
fig.show()

In [None]:
# transformamos el array en un data frame de panda
df_validation = pd.DataFrame(y_lab).rename(columns={0:'y_lab'})
# cambiamos los números de las clases po sus nombres
df_validation['y_lab'] = df_validation['y_lab'].map(d_clases_labels,
                             na_action=None)

In [None]:
# Presentación → 1.3 Distribución de la variable a predecir
df = px.data.tips()
fig = px.histogram(df_validation, 
                   x="y_lab", 
                   color="y_lab", 
                   title='Distribución de la variable en validation', 
                   labels={'y_lab':'Target'},
                  category_orders=dict(y_lab=d_clases_labels),
                  text_auto=True)

fig.update_layout(bargap=0.2)
fig.show()

<h1><center>2. Machine Learning</center></h1>

<h3>2. I) MLP simple</h3>

In [None]:
#Definiendo modelo
model = Sequential()
model.add(Flatten(input_shape=((28, 28, 1))))
#Capa oculta con 200 neuronas
model.add(Dense(200, activation ="relu"))
model.add(Dense(10, activation ="softmax"))

#compilar el modelo
model.compile(optimizer = "adam", loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])
#model.fit(x_train,x_lab,epochs = 20)
history = model.fit(x_train, x_lab, epochs = 10, validation_data=(y_validation, y_lab))

In [None]:
model.evaluate(y_validation,y_lab)

In [None]:
history_dict = history.history

print(history_dict.keys())

In [None]:
def plot_train_instrumentation(epochs, data, train_param, val_param):
    
    plt.figure(figsize=(10,7))
    
    plt.plot(epochs, data[train_param], 'g', label=f'Training ({train_param})')
    plt.plot(epochs, data[val_param], 'red', label=f'Validation ({val_param})')
    
    plt.title("Training performance")
    plt.xlabel('Epochs')
    plt.ylabel(train_param)
    
    plt.legend()
    plt.show()

In [None]:
epochs = range(1, len(history_dict['accuracy'])+1)

plot_train_instrumentation(epochs, history_dict, 'accuracy', 'val_accuracy')
plot_train_instrumentation(epochs, history_dict, 'loss', 'val_loss')

### Visualizamos la predicción para cada clase en particular

In [None]:
def plot_image(i, predictions_array, true_label, img):
  predictions_array, true_label, img = predictions_array, true_label[i], img[i]
  plt.grid(False)
  plt.xticks([])
  plt.yticks([])
  plt.imshow(img, cmap=plt.cm.binary)
  predicted_label = np.argmax(predictions_array)
  if predicted_label == true_label:
    color = 'blue'
  else:
    color = 'red'
  plt.xlabel("{} {:2.0f}% ({})".format(d_clases_labels[predicted_label],
                                100*np.max(predictions_array),
                                d_clases_labels[true_label]),
                                color=color)

In [None]:
def plot_value_array(i, predictions_array, true_label):
  predictions_array, true_label = predictions_array, true_label[i]
  plt.grid(False)
  plt.xticks(range(10))
  plt.yticks([])
  thisplot = plt.bar(range(10), predictions_array, color="#777777")
  plt.ylim([0, 1])
  predicted_label = np.argmax(predictions_array)
  thisplot[predicted_label].set_color('red')
  thisplot[true_label].set_color('blue')

In [None]:
predictions = model.predict(y_validation)

In [None]:
# Plot the first X test images, their predicted labels, and the true labels.
# Color correct predictions in blue and incorrect predictions in red.
num_rows = 5
num_cols = 3
num_images = num_rows*num_cols
plt.figure(figsize=(2*2*num_cols, 2*num_rows))
for i in range(num_images):
  plt.subplot(num_rows, 2*num_cols, 2*i+1)
  plot_image(i, predictions[i], y_lab, y_validation)
  plt.subplot(num_rows, 2*num_cols, 2*i+2)
  plot_value_array(i, predictions[i], y_lab)
plt.tight_layout()
plt.show()

<h3>2. II) Red Convolucional</h3>

In [None]:
model_rc = Sequential([
    # el shape de los inputs es alto_imagen * ancho_imagen * cantidad_colores
    Convolution2D(input_shape=(28, 28, 1), filters=10, kernel_size=(3, 3), strides=1, activation='relu', padding='same'),
    
    # kernels de 4x4x3, y salida de 61x61x8
    Dropout(0.2),

    Convolution2D(filters=8, kernel_size=(3, 3), strides=1, activation='relu'),
    # kernels de 4x4x8, y salida de 58x58x8
    Dropout(0.2),
    
    MaxPooling2D(pool_size=(2, 2)),
    # salida de 14x14x8
    
    Flatten(),
    # salida de 1568
    
    Dense(10, activation='tanh'),
    # salida de 10
    Dropout(0.2),

    Dense(10, activation='tanh'),
    # salida de 10
    Dropout(0.2),

    Dense(10, activation='softmax'),
    # salida de 10
])
  
model_rc.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy',],)

In [None]:
model_rc.summary()

In [None]:
#como esta compuesto un elemento del train
x_train_m[0]

In [None]:
#como esta compuesto un elemento del lab
x_lab[0]

In [None]:
# esto es para que los calores de los modelos den siempre igual
np.random.seed(3)
tf.random.set_seed(3)

In [None]:
#https://programmerclick.com/article/19151150055/
#Para convertir los datos en la memoria en Dataset
#train_dataset = tf.data.Dataset.from_tensors((x_train_m,x_lab))
train_dataset = tf.data.Dataset.from_tensor_slices((x_train_m,x_lab)).batch(64)

In [None]:
#Para convertir los datos en la memoria en Dataset
#test_dataset = tf.data.Dataset.from_tensors((y_validation,y_lab))
test_dataset = tf.data.Dataset.from_tensor_slices((y_validation, y_lab)).batch(64)

In [None]:
# anda 
#history_rc = model_rc.fit(train_dataset, epochs=10, batch_size=50, validation_data=test_dataset,)
history_rc = model_rc.fit(train_dataset, epochs=20, batch_size=50, validation_data=(test_dataset))

In [None]:
# no hace falta hacerlo
#model_rc.evaluate(test_dataset)

In [None]:
history_rc_dict = history_rc.history

print(history_rc_dict.keys())

In [None]:
epochs = range(1, len(history_rc_dict['accuracy'])+1)

plot_train_instrumentation(epochs, history_rc_dict, 'accuracy', 'val_accuracy')
plot_train_instrumentation(epochs, history_rc_dict, 'loss', 'val_loss')

In [None]:
# función para plotear los resultados 
#def plt_history(history_fit):
#    plt.plot(history_fit.history['accuracy'], label='train')
#    plt.plot(history_fit.history['val_accuracy'], label='validation')
#    plt.title('Accuracy sobre las épocas de train')
#    plt.ylabel('Accuracy')
#    plt.xlabel('épocas')
#    plt.legend(loc='upper left')
#    plt.show()

In [None]:
#plt_history(history_rc)

In [None]:
# anda 
#history_rc_2 = model_rc.fit(
#    train_dataset,
#    epochs=20,
    #batch_size=50
    #validation_data=test_dataset,
#)

In [None]:
#este esta mal
#model_rc.evaluate(test_dataset)

In [None]:
# plt.plot(history_rc_2.history['accuracy'], label='train')
# plt.plot(history_rc_2.history['val_accuracy'], label='validation')
# plt.title('Accuracy sobre las épocas de train')
# plt.ylabel('Accuracy')
# plt.xlabel('épocas')
# plt.legend(loc='upper left')
# plt.show()

<h3>2. III) Red Convolucional 2</h3>

#### Probamos otra red convolucional para ver que no sobreentrene

In [None]:
model_rc_2 = Sequential([
    # el shape de los inputs es alto_imagen * ancho_imagen * cantidad_colores
    Convolution2D(input_shape=(28, 28, 1), filters=7, kernel_size=(3, 3), strides=1, activation='relu', padding='same'),
    
    # kernels de 4x4x3, y salida de 61x61x8
    Dropout(0.2),

    Convolution2D(filters=6, kernel_size=(3, 3), strides=1, activation='relu'),
    # kernels de 4x4x8, y salida de 58x58x8
    Dropout(0.2),
    
    MaxPooling2D(pool_size=(2, 2)),
    # salida de 14x14x8
    
    Flatten(),
    # salida de 
    
    Dense(8, activation='tanh'),
    # salida de 8
    Dropout(0.2),

    Dense(8, activation='tanh'),
    # salida de 8
    Dropout(0.2),

    Dense(10, activation='softmax'),
    # salida de 10
])
  
model_rc_2.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy', #categorical_crossentropy',
    metrics=['accuracy', ], #'mse', 'acc'
)

In [None]:
model_rc_2.summary()

In [None]:
# esto es para que los calores de los modelos den siempre igual
np.random.seed(6)
tf.random.set_seed(6)

In [None]:
history_rc_2 = model_rc_2.fit(
    train_dataset,
    epochs=10,
    batch_size=50,
    validation_data=test_dataset,
)

In [None]:
plt_history(history_rc_2)

Tecnica de aumentacion de datos

In [None]:
tf.keras.preprocessing.image.ImageDataGenerator(
    featurewise_center=False, samplewise_center=False,
    featurewise_std_normalization=False, samplewise_std_normalization=False,
    zca_whitening=False, zca_epsilon=1e-06, rotation_range=0, width_shift_range=0.0,
    height_shift_range=0.0, brightness_range=None, shear_range=0.0, zoom_range=0.0,
    channel_shift_range=0.0, fill_mode='nearest', cval=0.0, horizontal_flip=False,
    vertical_flip=False, rescale=None, preprocessing_function=None,
    data_format=None, validation_split=0.0, dtype=None
)

In [None]:
# Create data augmentation object
data_augmentor = ImageDataGenerator(rotation_range=50, 
                                    width_shift_range=0.01, 
                                    height_shift_range=0.01,
                                    rescale=1/255,
                                    brightness_range=(0.5, 1.5),
                                    horizontal_flip=False,
                                    vertical_flip=False,)
# fit the training data
data_augmentor.fit(x_train)
augment = data_augmentor.flow(x_train[0:9], batch_size=1)

for i in range(1, 6):
    plt.subplot(1,5,i)
    plt.imshow(augment.next().squeeze())
    plt.axis('off')
plt.show()

In [None]:
history_data_aumentada = model.fit(x_train, x_lab, epochs = 20, validation_data=(y_validation, y_lab))

In [None]:
model.evaluate(y_validation,y_lab)

In [None]:
history_au = history_data_aumentada.history

print(history_au.keys())

In [None]:
plot_train_instrumentation(epochs, history_au, 'accuracy', 'val_accuracy')
plot_train_instrumentation(epochs, history_au, 'loss', 'val_loss')