In [1]:
Fuentes:
    
    - https://machinelearningmastery.com/weight-regularization-to-reduce-overfitting-of-deep-learning-models/

SyntaxError: ignored

- Regularization: 
 - Weight regularization
 - Dropout
 - Batch normalization
 - Data Augmentation (más adelante)


### Weight regularization

- El modelo más sencillo (el que tenga menos parámetros) que funcione bien
- Solucionar los problemas de overfitting en deep learning no es una tarea sencilla
- Generalmente se prefieren los modelos con parámetros pequeños, por lo que se penalizan los valores grandes
 - Coste proporcional al valor absoluto (L1) o proporcional a la raiz cuadrada del valor (L2)
 
 
### Dropout
 
- Desactivar neuronas aleatoriamente (i.e. poner a 0 en output features de una capa)
- Dropout rate determina la proporcion de neuronas a desactivar
- Funciona al eliminar patrones espurios (evita overfitting)

### Batch normalization

- Resta la media y divide entre la desviación típica de los datos del batch -> Estandarización de features

![image-2.png](attachment:image-2.png)

- Se suele situar en la salida de la capa (tras la función de activación)
- Mejora el entrenamiento de la red -> Mayor eficiencia computacional
- Permite el uso de valores de learning rate más altos
- Se puede considerar como un método de regularización aunque no es su principal ventaja. Dropout y L1/L2 aportan mayor regularización
- Existen variantes como Layer normalization, Instance normalization o Group normalization (CNNs)
![image.png](attachment:image.png)

# Práctica con REUTERS

### Carga de los datos

In [None]:
import numpy as np
import tensorflow as tf
# Importamos el dataset REUTERS y cargamos los datos
reuters = tf.keras.datasets.reuters
WORD_LIMIT = 10000
(training_data, training_labels), (testing_data, testing_labels) = reuters.load_data(num_words=WORD_LIMIT)
print(training_data.shape, training_labels.shape, testing_data.shape,testing_labels.shape)

### Inspección del conjunto de datos

In [None]:
# Los datos son numericos para decodificarlos, se puede usar reuters.get_word_index()
word_index = reuters.get_word_index()
reverse_word_index = dict({value : key for key, value in word_index.items()})
decoded = ' '.join(
    [reverse_word_index.get(i-3,'?') for i in training_data[5248]]
)
decoded

### One hot encoding

In [None]:
# Función auxiliar para representar las palabras ( en números 
import numpy as np
# one hot encoding del input, vector con cada indice indicando si una palabra esta presente
def one_hot_encode(data):
    encoded = np.zeros((len(data),WORD_LIMIT))
    for i, v in enumerate(data):
        encoded[i,v] = 1 # localiza las columnas del genero correspondiente, marca con 1
    return encoded

In [None]:
# Convertimos palabras en números
x_train = one_hot_encode(training_data)
x_test = one_hot_encode(testing_data)
print(x_train.shape)
print(x_test.shape)

In [None]:
print(x_test[3])

In [None]:
from tensorflow.keras.utils import to_categorical
y_train = to_categorical(training_labels)
y_test = to_categorical(testing_labels)
print(y_train.shape)
print(y_test.shape)

### Red Neuronal

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
# Vamos a codificar la topología de nuestro MLP
model = Sequential()
model.add(Dense(128,activation='relu',input_shape=(WORD_LIMIT,)))
model.add(Dense(64,activation='relu'))
model.add(Dense(46,activation='softmax')) # Reparto de la unidad de probabilidad entre num_classes

In [None]:
# Se compila

model.compile(optimizer='adam',
               loss='categorical_crossentropy', # ideal para clasificacion multiclase
               metrics=['accuracy'])

In [None]:
# Se entrena

H = model.fit(x_train,y_train,epochs=15,batch_size=32, validation_split=0.2)

In [None]:
# Análisis del proceso de entrenamiento

import matplotlib.pyplot as plt
# Muestro gráfica de accuracy y losses
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, 20), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, 20), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, 20), H.history["accuracy"], label="train_acc")
plt.plot(np.arange(0, 20), H.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend()

In [None]:
# Evaluación del modelo

print("[INFO]: Evaluando red neuronal...")
model.predict(x_test)
loss, accuracy = model.evaluate(x_test, y_test)
print('Loss {}, accuracy {}'.format(loss,accuracy))

## Regularization

#### Weight regularization L1/L2

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import regularizers


model_reg = Sequential()
model_reg.add(Dense(128,activation='relu', kernel_regularizer=regularizers.l2(0.01), input_shape=(WORD_LIMIT,)))
model_reg.add(Dense(64,activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model_reg.add(Dense(46,activation='softmax')) 

In [None]:
model_reg.compile(optimizer='adam',
               loss='categorical_crossentropy', #clasificación multiclase
               metrics=['accuracy'])

In [None]:
H = model_reg.fit(x_train,y_train,epochs=15,batch_size=32, validation_split=0.2)

In [None]:
import matplotlib.pyplot as plt

plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, 20), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, 20), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, 20), H.history["accuracy"], label="train_acc")
plt.plot(np.arange(0, 20), H.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend()

In [None]:
# Evaluación del modelo

print("[INFO]: Evaluando red neuronal...")
model_reg.predict(x_test)
loss, accuracy = model_reg.evaluate(x_test, y_test)
print('Loss {}, accuracy {}'.format(loss,accuracy))

#### Dropout

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

model_drop = Sequential()
model_drop.add(Dense(128,activation='relu', input_shape=(WORD_LIMIT,)))

model_drop.add(Dropout(0.75))
model_drop.add(Dense(64,activation='relu'))

model_drop.add(Dropout(0.75))
model_drop.add(Dense(46,activation='softmax')) 

In [None]:
model_drop.compile(optimizer='adam',
               loss='categorical_crossentropy', # clasificacion multiclase
               metrics=['accuracy'])

In [None]:
H = model_drop.fit(x_train,y_train,epochs=15,batch_size=32, validation_split=0.2)

In [None]:
import matplotlib.pyplot as plt

plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, 20), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, 20), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, 20), H.history["accuracy"], label="train_acc")
plt.plot(np.arange(0, 20), H.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend()

In [None]:
# Evaluación del modelo
print("[INFO]: Evaluando red neuronal...")
model_drop.predict(x_test)
loss, accuracy = model_drop.evaluate(x_test, y_test)
print('Loss {}, accuracy {}'.format(loss,accuracy))

#### Batch Normalization

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization

model_bn = Sequential()
model_bn.add(Dense(128,activation='relu', input_shape=(WORD_LIMIT,)))
model_bn.add(BatchNormalization())
model_bn.add(Dropout(0.75))
model_bn.add(Dense(64,activation='relu'))
model_bn.add(BatchNormalization())
model_bn.add(Dropout(0.75))
model_bn.add(Dense(46,activation='softmax')) 

In [None]:
model_bn.compile(optimizer='adam',
               loss='categorical_crossentropy', #clasificacion multiclase
               metrics=['accuracy'])

In [None]:
H = model_bn.fit(x_train,y_train,epochs=15,batch_size=32, validation_split=0.2)

In [None]:
import matplotlib.pyplot as plt

plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, 20), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, 20), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, 20), H.history["accuracy"], label="train_acc")
plt.plot(np.arange(0, 20), H.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend()

In [None]:
# Evaluación del modelo
print("[INFO]: Evaluando red neuronal...")
model_bn.predict(x_test)
loss, accuracy = model_bn.evaluate(x_test, y_test)
print('Loss {}, accuracy {}'.format(loss,accuracy))