In [1]:
import numpy
import matplotlib.pyplot as plt 
%matplotlib inline
plt.rcParams["figure.figsize"]=[6,6]

In [2]:
#Cargamos los datos 
#En deep learning no se suele hacer validacion cruzada (a menos que el tamaño del dataset y el tiempo de entrenamiento lo permita). En lugar de eso se hacen simples separaciones entre datos de entrenamiento y de validacion 

In [3]:
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [9]:
from ipywidgets import interact, IntSlider

In [10]:
@interact(i=IntSlider(min=0, max=50, sstep=1,value=1))
def dibujar_numero(i):
    plt.imshow(x_train[i], cmap="gray")
    plt.title("Numero {}".format(y_train[i]))

interactive(children=(IntSlider(value=1, description='i', max=50), Output()), _dom_classes=('widget-interact',…

In [11]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten

In [12]:
numpy.unique(y_train)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8)

In [13]:
x_train.shape

(60000, 28, 28)

In [14]:
x_train_plano=x_train.reshape(x_train.shape[0], 28*28)
x_test_plano=x_test.reshape(x_test.shape[0], 28*28)

In [15]:
x_train_plano.shape

(60000, 784)

In [16]:
from keras.utils.np_utils import to_categorical

In [18]:
y_train_one_hot=to_categorical(y_train)
y_test_one_hot=to_categorical(y_test)

In [20]:
modelo=Sequential()
modelo.add(Dense(50,activation="relu", input_shape=(784,)))
modelo.add(Dense(250, activation="relu"))
modelo.add(Dense(numpy.unique(y_train).shape[0], activation="softmax"))

modelo.compile(optimizer="sgd", loss="categorical_crossentropy", metrics=["accuracy"])

modelo.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 50)                39250     
_________________________________________________________________
dense_3 (Dense)              (None, 250)               12750     
_________________________________________________________________
dense_4 (Dense)              (None, 10)                2510      
Total params: 54,510
Trainable params: 54,510
Non-trainable params: 0
_________________________________________________________________


In [21]:
modelo.fit(x_train_plano, y_train_one_hot, epochs=30, batch_size=500, verbose=0);

In [22]:
resultados={}

In [23]:
modelo.metrics_names

['loss', 'accuracy']

In [24]:
evaluacion_train=modelo.evaluate(x_train_plano, y_train_one_hot)
evaluacion_train



[1.2923966646194458, 0.4696333408355713]

In [25]:
evaluacion_test=modelo.evaluate(x_test_plano, y_test_one_hot)
evaluacion_test



[1.3457087278366089, 0.46709999442100525]

In [26]:
resultados["sin_reguñarizacion"]=[evaluacion_train[1], evaluacion_test[1]]

In [27]:
#Regularizacion l1 o l2
#Keras permite regularizar los pesos, los sesgos (bias) y las activaciones de forma independiente, pasando el parametro kernel_regularizer, bias_regularizer y activity_regularizer respectivamente

#Keras tiene los penalizadores l1, l2, l1_l2 (elasticnet)

In [29]:
from keras import regularizers

modelo_l2=Sequential()
modelo_l2.add(Dense(50,activation="relu", input_shape=(784,)))
modelo_l2.add(Dense(250, activation="relu", kernel_regularizer=regularizers.l2(0.05)))
modelo_l2.add(Dense(numpy.unique(y_train).shape[0], activation="softmax"))

modelo_l2.compile(optimizer="sgd", loss="categorical_crossentropy", metrics=["accuracy"])

modelo_l2.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 50)                39250     
_________________________________________________________________
dense_9 (Dense)              (None, 250)               12750     
_________________________________________________________________
dense_10 (Dense)             (None, 10)                2510      
Total params: 54,510
Trainable params: 54,510
Non-trainable params: 0
_________________________________________________________________


In [30]:
modelo_l2.fit(x_train_plano, y_train_one_hot, verbose=0, epochs=30, batch_size=500)

<tensorflow.python.keras.callbacks.History at 0x22c25e932c8>

In [31]:
acc_train=modelo_l2.evaluate(x_train_plano, y_train_one_hot)[1]
acc_train



0.682033360004425

In [32]:
acc_test=modelo_l2.evaluate(x_test_plano, y_test_one_hot)[1]
acc_test



0.6876000165939331

In [33]:
resultados["regularizacion_l2"]=[acc_train, acc_test]

In [34]:
#Hacemos lo mismo pero con regularizacion l1

In [36]:
modelo_l1=Sequential()
modelo_l1.add(Dense(50,activation="relu", input_shape=(784,)))
modelo_l1.add(Dense(250, activation="relu", kernel_regularizer=regularizers.l1(0.05)))
modelo_l1.add(Dense(numpy.unique(y_train).shape[0], activation="softmax"))

modelo_l1.compile(optimizer="sgd", loss="categorical_crossentropy", metrics=["accuracy"])

modelo_l1.summary()

modelo_l1.fit(x_train_plano, y_train_one_hot, verbose=0, epochs=30, batch_size=500)

acc_train=modelo_l1.evaluate(x_train_plano, y_train_one_hot)[1]
acc_train

acc_test=modelo_l1.evaluate(x_test_plano, y_test_one_hot)[1]
acc_test

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_14 (Dense)             (None, 50)                39250     
_________________________________________________________________
dense_15 (Dense)             (None, 250)               12750     
_________________________________________________________________
dense_16 (Dense)             (None, 10)                2510      
Total params: 54,510
Trainable params: 54,510
Non-trainable params: 0
_________________________________________________________________


0.23559999465942383

In [37]:
resultados["regularizacion_l1"]=[acc_train, acc_test]

In [38]:
resultados

{'sin_reguñarizacion': [0.4696333408355713, 0.46709999442100525],
 'regularizacion_l2': [0.682033360004425, 0.6876000165939331],
 'regularizacion_l1': [0.22808332741260529, 0.23559999465942383]}

In [39]:
#Dropout
#Ahora vamos a añadir Dropout a la misma red. Dropout simplemente ignora un porcentaje p de las unidades (neuronas) en cada iteracion del entrenamiento (forward prop y backprop)

In [40]:
from keras.layers import Dropout

In [43]:
modelo_dropout=Sequential()
modelo_dropout.add(Dense(50,activation="relu", input_shape=(784,)))
modelo_dropout.add(Dense(250, activation="relu"))
modelo_dropout.add(Dropout(0.2))
modelo_dropout.add(Dense(numpy.unique(y_train).shape[0], activation="softmax"))

modelo_dropout.compile(optimizer="sgd", loss="categorical_crossentropy", metrics=["accuracy"])

modelo_dropout.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_17 (Dense)             (None, 50)                39250     
_________________________________________________________________
dense_18 (Dense)             (None, 250)               12750     
_________________________________________________________________
dropout (Dropout)            (None, 250)               0         
_________________________________________________________________
dense_19 (Dense)             (None, 10)                2510      
Total params: 54,510
Trainable params: 54,510
Non-trainable params: 0
_________________________________________________________________


In [44]:
#Vemos que el numero de parametros a entrenar es el mismo, Dropout no añade pesos a la red

In [45]:
modelo_dropout.fit(x_train_plano, y_train_one_hot, verbose=0, epochs=30, batch_size=500)

acc_train=modelo_dropout.evaluate(x_train_plano, y_train_one_hot)[1]
acc_train

acc_test=modelo_dropout.evaluate(x_test_plano, y_test_one_hot)[1]
acc_test



0.6751999855041504

In [47]:
resultados["regularizacion_dropout"]=[acc_train, acc_test]

In [48]:
#Normalizacion en bloques (batch normalization)

In [50]:
from keras.layers import BatchNormalization

In [51]:
modelo_bnorm=Sequential()
modelo_bnorm.add(Dense(50,activation="relu", input_shape=(784,)))
modelo_bnorm.add(Dense(250, activation="relu"))
modelo_bnorm.add(BatchNormalization())
modelo_bnorm.add(Dense(numpy.unique(y_train).shape[0], activation="softmax"))

modelo_bnorm.compile(optimizer="sgd", loss="categorical_crossentropy", metrics=["accuracy"])

modelo_bnorm.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_20 (Dense)             (None, 50)                39250     
_________________________________________________________________
dense_21 (Dense)             (None, 250)               12750     
_________________________________________________________________
batch_normalization (BatchNo (None, 250)               1000      
_________________________________________________________________
dense_22 (Dense)             (None, 10)                2510      
Total params: 55,510
Trainable params: 55,010
Non-trainable params: 500
_________________________________________________________________


In [52]:
modelo_bnorm.fit(x_train_plano, y_train_one_hot, verbose=0, epochs=30, batch_size=500)

acc_train=modelo_bnorm.evaluate(x_train_plano, y_train_one_hot)[1]
acc_train

acc_test=modelo_bnorm.evaluate(x_test_plano, y_test_one_hot)[1]
acc_test



0.9624000191688538

In [53]:
resultados["batchh_normalization"]=[acc_train, acc_test]

In [54]:
#Batch Normalization + Dropout
#Una practiva comun es poner normalizacion batch y dropout juntos en una capa

In [55]:
modelo_bnorm_dropout=Sequential()
modelo_bnorm_dropout.add(Dense(50,activation="relu", input_shape=(784,)))
modelo_bnorm_dropout.add(Dense(250, activation="relu"))
modelo_bnorm_dropout.add(BatchNormalization())
modelo_bnorm_dropout.add(Dropout(0.2))
modelo_bnorm_dropout.add(Dense(numpy.unique(y_train).shape[0], activation="softmax"))

modelo_bnorm_dropout.compile(optimizer="sgd", loss="categorical_crossentropy", metrics=["accuracy"])

modelo_bnorm_dropout.summary()

modelo_bnorm_dropout.fit(x_train_plano, y_train_one_hot, verbose=0, epochs=30, batch_size=500)

acc_train=modelo_bnorm_dropout.evaluate(x_train_plano, y_train_one_hot)[1]
acc_train

acc_test=modelo_bnorm_dropout.evaluate(x_test_plano, y_test_one_hot)[1]
acc_test

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_23 (Dense)             (None, 50)                39250     
_________________________________________________________________
dense_24 (Dense)             (None, 250)               12750     
_________________________________________________________________
batch_normalization_1 (Batch (None, 250)               1000      
_________________________________________________________________
dropout_1 (Dropout)          (None, 250)               0         
_________________________________________________________________
dense_25 (Dense)             (None, 10)                2510      
Total params: 55,510
Trainable params: 55,010
Non-trainable params: 500
_________________________________________________________________


0.9585999846458435

In [56]:
resultados["batch_normalization + dropout"]=[acc_train, acc_test]

In [57]:
#Ahora metemos los resultados en un dataframe para inspeccionarlos

In [59]:
import pandas

resultados=pandas.DataFrame(resultados).T
resultados.columns=["acc_train", "acc_test"]
resultados["pct_diff"] = 1 - (resultados.acc_test/resultados.acc_train)

In [60]:
resultados.sort_values(by="pct_diff")

Unnamed: 0,acc_train,acc_test,pct_diff
regularizacion_l1,0.228083,0.2356,-0.032956
regularizacion_dropout,0.667667,0.6752,-0.011283
regularizacion_l2,0.682033,0.6876,-0.008162
sin_reguñarizacion,0.469633,0.4671,0.005394
batch_normalization + dropout,0.9658,0.9586,0.007455
batchh_normalization,0.9702,0.9624,0.00804
