# Exámenes A1 y A3 de APR en el grupo 4CO21, turno 2

# Examen A1: ejercicio con Fashion-MNIST

Los ejercicios de las sesiones de laboratorio del bloque 1 se han basado en el corpus Fashion-MNIST. Hemos utilizado el código siguiente para leer Fashion-MNIST con su partición train-test estándar, normalizando las imágenes a $\,[0,1]\,$, y estableciendo una partición train-val-test mediante partición del train estándar en train-val.

In [None]:
import numpy as np; import matplotlib.pyplot as plt
import os; os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'; import keras
input_dim = 784; num_classes = 10
(x_train_val, y_train_val), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()
x_train_val = x_train_val.reshape(-1, input_dim).astype("float32") / 255.0
x_test = x_test.reshape(-1, input_dim).astype("float32") / 255.0
y_train_val = keras.utils.to_categorical(y_train_val, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
x_train = x_train_val[:-10000]; x_val = x_train_val[-10000:]
y_train = y_train_val[:-10000]; y_val = y_train_val[-10000:]
print(f'train {x_train.shape} {y_train.shape} val {x_val.shape} {y_val.shape} test {x_test.shape} {y_test.shape}')

train (50000, 784) (50000, 10) val (10000, 784) (10000, 10) test (10000, 784) (10000, 10)



<p style="page-break-after:always;"></p>


En este ejercicio vamos a utilizar la arquitectura de MLP con una capa oculta de $\,800\,$ RELUs;  optimizador Adam por defecto, batch size $\,256;\;$ planificador ReduceLROnPlateau con factor $\,0.32\,$ y paciencia $\,5;\;$ y regularización mediante early stopping con paciencia $\,10$:

In [None]:
def run_exp(optimizer="adam"):
  M = keras.Sequential()
  M.add(keras.Input(shape=(784,)))
  M.add(keras.layers.Dense(units=800, activation='relu'))
  M.add(keras.layers.Dense(10, activation='softmax'))
  M.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
  reduce_cb = keras.callbacks.ReduceLROnPlateau(
    monitor='val_accuracy', factor=0.32, patience=5, restore_best_weights=True)
  early_cb = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=10, min_delta=1e-5)
  M.fit(x_train, y_train, batch_size=256, epochs=100, verbose=0, validation_data=(x_val, y_val),
    callbacks=[reduce_cb, early_cb])
  _, acc = M.evaluate(x_test, y_test, verbose=0)
  return acc


<p style="page-break-after:always;"></p>


El optimizador Adam que venimos utilizando proporciona una precisión de $\,89.90\%,\,$ cuando repetimos el experimento 5 veces, dado que cada ejecución inicializada con pesos aleatorios proporciona resultados diferentes y necesitamos promediar. Sin embargo, existen [optimizadores alternativos](https://keras.io/api/optimizers/) como el [optimizador AdamW](https://keras.io/api/optimizers/adamw/) que podemos evaluar de la misma manera:



In [None]:
import time; start = time.time()
keras.utils.set_random_seed(seed=23);

num_exp = 5; acc = np.zeros(num_exp)
for exp in range(num_exp):
    acc[exp] = run_exp(optimizer="adamw")
print(f'Precisión media: {acc.mean():.2%}  Desviación estándar: {acc.std():.2%}')
print('Tiempo (hh:mm:ss):', time.strftime('%H:%M:%S', time.gmtime(time.time() - start)))

Precisión media: 89.96%  Desviación estándar: 0.12%
Tiempo (hh:mm:ss): 00:01:19


<p style="page-break-after:always;"></p>

**Ejercicio (2 puntos):** $\;$ escoge otro [optimizador](https://keras.io/api/optimizers/) repitiendo el experimento 5 veces para realizar la estimación de la precisión en el conjunto de test como en el experimento anterior.

In [13]:
import numpy as np; import matplotlib.pyplot as plt
import os; os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'; import keras
input_dim = 784; num_classes = 10
(x_train_val, y_train_val), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()
x_train_val = x_train_val.reshape(-1, input_dim).astype("float32") / 255.0
x_test = x_test.reshape(-1, input_dim).astype("float32") / 255.0
y_train_val = keras.utils.to_categorical(y_train_val, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
x_train = x_train_val[:-10000]; x_val = x_train_val[-10000:]
y_train = y_train_val[:-10000]; y_val = y_train_val[-10000:]
print(f'train {x_train.shape} {y_train.shape} val {x_val.shape} {y_val.shape} test {x_test.shape} {y_test.shape}')

train (50000, 784) (50000, 10) val (10000, 784) (10000, 10) test (10000, 784) (10000, 10)


In [12]:
# We select Adamax
def run_exp(optimizer="adamax"):
  M = keras.Sequential()
  M.add(keras.Input(shape=(784,)))
  M.add(keras.layers.Dense(units=800, activation='relu'))
  M.add(keras.layers.Dense(10, activation='softmax'))
  M.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
  reduce_cb = keras.callbacks.ReduceLROnPlateau(
    monitor='val_accuracy', factor=0.32, patience=5, restore_best_weights=True)
  early_cb = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=10, min_delta=1e-5)
  M.fit(x_train, y_train, batch_size=256, epochs=100, verbose=0, validation_data=(x_val, y_val),
    callbacks=[reduce_cb, early_cb])
  _, acc = M.evaluate(x_test, y_test, verbose=1)
  return acc

In [14]:
import time; start = time.time()
keras.utils.set_random_seed(seed=23);

num_exp = 5; acc = np.zeros(num_exp)
for exp in range(num_exp):
    # We select adamax
    acc[exp] = run_exp(optimizer="adamax")
print(f'Precisión media: {acc.mean():.2%}  Desviación estándar: {acc.std():.2%}')
print('Tiempo (hh:mm:ss):', time.strftime('%H:%M:%S', time.gmtime(time.time() - start)))

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 752us/step - accuracy: 0.8961 - loss: 0.3179
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 665us/step - accuracy: 0.8995 - loss: 0.3076
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 680us/step - accuracy: 0.8972 - loss: 0.3115
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 534us/step - accuracy: 0.8959 - loss: 0.3189
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 734us/step - accuracy: 0.8953 - loss: 0.3208
Precisión media: 89.65%  Desviación estándar: 0.06%
Tiempo (hh:mm:ss): 00:01:49


<p style="page-break-after:always;"></p>

# Examen A3: ejercicio con CIFAR-10

En las dos primeras sesiones de laboratorio del bloque 2 seguimos con MNIST y Fashion-MNIST; vimos que con CNNs sencillas convenientemente regularizadas se obtenían precisiones en test muy buenas, del $\,99.5\%\,$ en MNIST y $\,92.0\%\,$ en Fashion-MNIST. A partir de la tercera sesión de laboratorio del bloque 2 utilizamos el corpus de imágenes a color CIFAR-10. El siguiente código lee CIFAR-10 con su partición train-test estándar y establece una partición train-val-test mediante partición del train estándar en train-val.

In [None]:
import numpy as np; import matplotlib.pyplot as plt
import os; os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import keras; from keras import layers
keras.utils.set_random_seed(23)
(x_train_val, y_train_val), (x_test, y_test) = keras.datasets.cifar10.load_data()
x_train_val = x_train_val.astype("float32")
x_test = x_test.astype("float32")
y_train_val = keras.utils.to_categorical(y_train_val, 10)
y_test = keras.utils.to_categorical(y_test, 10)
x_train = x_train_val[:-10000]; x_val = x_train_val[-10000:]
y_train = y_train_val[:-10000]; y_val = y_train_val[-10000:]
print(x_train.shape, y_train.shape, x_val.shape, y_val.shape, x_test.shape, y_test.shape)

(40000, 32, 32, 3) (40000, 10) (10000, 32, 32, 3) (10000, 10) (10000, 32, 32, 3) (10000, 10)



<p style="page-break-after:always;"></p>


En la tercera sesión del bloque 2 (sesión 8) vimos que una CNN sencilla obtenía precisiones alrededor del $73\%$. A continuación se define una función para realizar un experimento con una CNN incluso más sencilla. La normalización de imágenes, a $\,[-1, 1],\,$ se integra como primera capa tras la de entrada. Tras la normalización, la red aplica dos capas convolucionales de 32 y 64 filtros de $\,5\times 5$ y $\,3\times 3$, respectivamente. Cada capa convolucional viene seguida por una capa de agrupación AveragePooling2D con ventana $2\times 2.$ Tras los dos pares Conv2D-AveragePooling2D, la salida se aplana y se procesa mediante un MLP con 500 unidades seguida de una capa de regularización Dropout con probabilidad $\,0.5\,$.

In [None]:
def run_exp2():
  M = keras.Sequential()
  M.add(keras.Input(shape=(32, 32, 3)))
  M.add(layers.Rescaling(scale=1 / 127.5, offset=-1))
  M.add(layers.Conv2D(64, kernel_size=(5, 5), activation="relu"))
  M.add(layers.AveragePooling2D(pool_size=(2, 2)))
  M.add(layers.Conv2D(32, kernel_size=(3, 3), activation="relu"))
  M.add(layers.AveragePooling2D(pool_size=(2, 2)))
  M.add(layers.Flatten())
  M.add(layers.Dense(units=500, activation='relu'))
  M.add(layers.Dropout(0.5))
  M.add(layers.Dense(10, activation='softmax'))
  opt = keras.optimizers.Adam(learning_rate=0.00015)
  M.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])
  reduce_cb = keras.callbacks.ReduceLROnPlateau(
    monitor='val_accuracy', factor=0.32, patience=5, restore_best_weights=True)
  early_cb = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=10, min_delta=1e-5)
  M.fit(x_train, y_train, batch_size=256, epochs=100, verbose=0, validation_data=(x_val, y_val),
    callbacks=[reduce_cb, early_cb])
  _, acc = M.evaluate(x_test, y_test, verbose=0)
  return acc


<p style="page-break-after:always;"></p>


Repetimos `run_exp2` para comprobar que la CNN definida arriba obtiene una precisión próxima al $71\%$:

In [None]:
import time; start = time.time()
keras.utils.set_random_seed(seed=23); 
num_exp = 5; acc = np.zeros(num_exp)
for exp in range(num_exp):
    acc[exp] = run_exp2()
print(f'Precisión media: {acc.mean():.2%} Desviación estándar: {acc.std():.2%}')
print('Tiempo (hh:mm:ss):', time.strftime('%H:%M:%S', time.gmtime(time.time() - start)))

Precisión media: 71.17% Desviación estándar: 0.81%
Tiempo (hh:mm:ss): 00:06:20


<p style="page-break-after:always;"></p>

**Ejercicio (2 puntos):** $\;$ Define una nueva función `run_exp3` para experimentar con la CNN definida arriba añadiendo una o más capas de aumento de datos de las vistas en las sesiones de laboratorio ([RandomFlip](https://keras.io/api/layers/preprocessing_layers/image_augmentation/random_flip/), [RandomTranslation](https://keras.io/api/layers/preprocessing_layers/image_augmentation/random_translation/), [RandomRotation](https://keras.io/api/layers/preprocessing_layers/image_augmentation/random_rotation/), etc.). Escoge las [capas de aumento de imágenes](https://keras.io/api/layers/preprocessing_layers/image_augmentation/]) que consideres prometedoras y aplícalas adecuadamente. Repite `run_exp3` al menos dos veces para estimar la precisión; deberías obtener un $73\%$ al menos. 

In [1]:
import numpy as np; import matplotlib.pyplot as plt
import os; os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import keras; from keras import layers
keras.utils.set_random_seed(23)
(x_train_val, y_train_val), (x_test, y_test) = keras.datasets.cifar10.load_data()
x_train_val = x_train_val.astype("float32")
x_test = x_test.astype("float32")
y_train_val = keras.utils.to_categorical(y_train_val, 10)
y_test = keras.utils.to_categorical(y_test, 10)
x_train = x_train_val[:-10000]; x_val = x_train_val[-10000:]
y_train = y_train_val[:-10000]; y_val = y_train_val[-10000:]
print(x_train.shape, y_train.shape, x_val.shape, y_val.shape, x_test.shape, y_test.shape)

2024-12-11 20:05:12.823629: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1733943912.842555  139956 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1733943912.845997  139956 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


(40000, 32, 32, 3) (40000, 10) (10000, 32, 32, 3) (10000, 10) (10000, 32, 32, 3) (10000, 10)


In [2]:
def run_exp3():
  M = keras.Sequential()
  M.add(keras.Input(shape=(32, 32, 3)))
  M.add(layers.Rescaling(scale=1 / 127.5, offset=-1))
  # We add image aumentation layers. This performs our data augmentation
  M.add(layers.RandomFlip("horizontal"))
  M.add(layers.RandomRotation(0.1))
  M.add(layers.Conv2D(64, kernel_size=(5, 5), activation="relu"))
  M.add(layers.AveragePooling2D(pool_size=(2, 2)))
  M.add(layers.Conv2D(32, kernel_size=(3, 3), activation="relu"))
  M.add(layers.AveragePooling2D(pool_size=(2, 2)))
  M.add(layers.Flatten())
  M.add(layers.Dense(units=500, activation='relu'))
  M.add(layers.Dropout(0.5))
  M.add(layers.Dense(10, activation='softmax'))
  opt = keras.optimizers.Adam(learning_rate=0.00015)
  M.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])
  reduce_cb = keras.callbacks.ReduceLROnPlateau(
    monitor='val_accuracy', factor=0.32, patience=5, restore_best_weights=True)
  early_cb = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=10, min_delta=1e-5)
  M.fit(x_train, y_train, batch_size=256, epochs=100, verbose=0, validation_data=(x_val, y_val),
    callbacks=[reduce_cb, early_cb])
  _, acc = M.evaluate(x_test, y_test, verbose=1)
  return acc

In [3]:
import time; start = time.time()
keras.utils.set_random_seed(seed=23); 
num_exp = 2; acc = np.zeros(num_exp)
for exp in range(num_exp):
    acc[exp] = run_exp3()
print(f'Precisión media: {acc.mean():.2%} Desviación estándar: {acc.std():.2%}')
print('Tiempo (hh:mm:ss):', time.strftime('%H:%M:%S', time.gmtime(time.time() - start)))

I0000 00:00:1733943919.765359  139956 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2616 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1650, pci bus id: 0000:01:00.0, compute capability: 7.5
I0000 00:00:1733943922.492902  141742 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7202 - loss: 0.8050
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6956 - loss: 0.8766
Precisión media: 70.56% Desviación estándar: 1.32%
Tiempo (hh:mm:ss): 00:07:06


### Intentemos obtener mejores resultados (no se consigue)

In [6]:
def run_exp3():
  M = keras.Sequential()
  M.add(keras.Input(shape=(32, 32, 3)))
  M.add(layers.Rescaling(scale=1 / 127.5, offset=-1))
  M.add(layers.RandomFlip("horizontal"))
  # Añadimos RandomTranslation
  M.add(layers.RandomTranslation(0.2, 0.2, fill_mode="nearest"))
  M.add(layers.Conv2D(64, kernel_size=(5, 5), activation="relu"))
  M.add(layers.AveragePooling2D(pool_size=(2, 2)))
  M.add(layers.Conv2D(32, kernel_size=(3, 3), activation="relu"))
  M.add(layers.AveragePooling2D(pool_size=(2, 2)))
  M.add(layers.Flatten())
  M.add(layers.Dense(units=500, activation='relu'))
  M.add(layers.Dropout(0.5))
  M.add(layers.Dense(10, activation='softmax'))
  opt = keras.optimizers.Adam(learning_rate=0.00015)
  M.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])
  reduce_cb = keras.callbacks.ReduceLROnPlateau(
    monitor='val_accuracy', factor=0.32, patience=5, restore_best_weights=True)
  early_cb = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=10, min_delta=1e-5)
  M.fit(x_train, y_train, batch_size=256, epochs=100, verbose=0, validation_data=(x_val, y_val),
    callbacks=[reduce_cb, early_cb])
  _, acc = M.evaluate(x_test, y_test, verbose=1)
  return acc

In [7]:
import time; start = time.time()
keras.utils.set_random_seed(seed=23); 
num_exp = 2; acc = np.zeros(num_exp)
for exp in range(num_exp):
    acc[exp] = run_exp3()
print(f'Precisión media: {acc.mean():.2%} Desviación estándar: {acc.std():.2%}')
print('Tiempo (hh:mm:ss):', time.strftime('%H:%M:%S', time.gmtime(time.time() - start)))

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7019 - loss: 0.8566
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7047 - loss: 0.8563
Precisión media: 70.14% Desviación estándar: 0.01%
Tiempo (hh:mm:ss): 00:07:05
