# Redes de Neuronas con conexiones residuales y entrenados según _transfer learning_
## Práctica 3

#### Hugo Fole Avellás y José Romero Conde

 ### $\text{Ejercicio } 1$  
 ### (_2 puntos_) Define la capa ResidualBlock (ver Figura 2), usando como base la plantilla proporcionada en la Figura 3.
 - Ten en cuenta que el número de convoluciones depende de los valores de input_channels y out-put_channels.
 - Esta red no tiene capas de Pooling. La reducción del tamaño se realiza con el parámetro strides
de las convoluciones, pero se modifica únicamente en 1 (o 2) de las convoluciones del modelo.

!['arquitectura'](ResidualBlock.png)

In [None]:
#plantilla

#class ResidualBlock(Model):
#   def __init__(self, input_channels, output_channels, strides=(1, 1)):
#       ...
#    def call(self, x):
#

In [1]:
# ejemplo https://keras.io/guides/making_new_layers_and_models_via_subclassing/
#
#class ResNet(keras.Model):
#
#    def __init__(self, num_classes=1000):
#        super().__init__()
#        self.block_1 = ResNetBlock()
#        self.block_2 = ResNetBlock()
#        self.global_pool = layers.GlobalAveragePooling2D()
#        self.classifier = Dense(num_classes)
#
#    def call(self, inputs):
#        x = self.block_1(inputs)
#        x = self.block_2(x)
#        x = self.global_pool(x)
#        return self.classifier(x)

In [101]:
from tensorflow.keras import Model
from tensorflow.keras import layers
from tensorflow.keras import activations

class ResidualBlock(Model):
    def __init__(self, input_channels, output_channels, strides=(1, 1)):
        
        super().__init__()
        
        self.BN1 = layers.BatchNormalization()
        self.Conv1 = layers.Conv2D(filters = output_channels, 
                                   kernel_size = (1, 1),
                                   strides = strides,)
        self.BN2 = layers.BatchNormalization()
        self.Conv2 = layers.Conv2D(filters = output_channels, 
                                   kernel_size = (1, 1),
                                   strides = (1, 1))
        
        if input_channels != output_channels:
            self.salidaDistinta = True
            self.ConvFuera = layers.Conv2D(filters = output_channels, 
                                   kernel_size = (1, 1),
                                   strides = strides)
        else: self.salidaDistinta = False
            
    def call(self, x):
        x = self.BN1(x)
        y = activations.silu(x)
        x = self.Conv1(y)
        x = self.BN2(x)
        x = activations.silu(x)
        x = self.Conv2(x)
        if self.salidaDistinta:
            y = self.ConvFuera(y)
        x = x + y
        return x

 ### $\text{Ejercicio } 2$  
### (_2 puntos_) Define la red ResidualNetwork (ver Figura 1). Para comprobar su correcto funcionamiento haz lo siguiente:
 - Descarga los pesos del modelo preentrenado (los podrás encontrar en el canal de Teams de la asignatura).
 - Carga los pesos en tu modelo, haciendo uso de la función proporcionada en la Figura 4.
 - Comprueba que la precisión del modelo en CIFAR-100 es superior al 69 %.

!['arquitectura'](ResidualNetwork.png)

In [102]:
from tensorflow.keras import Sequential

ResidualNetwork = Sequential([
    layers.Conv2D(filters=16, kernel_size=(3,3),strides=(1,1),padding="same"), 
    # la configuración de la capa convolucional es para asegurarse que no se reduce tamaño
    ResidualBlock(16,64),
    ResidualBlock(64,64),
    ResidualBlock(64,64),
    ResidualBlock(64,128),
    ResidualBlock(128,128),
    ResidualBlock(128,128),
    ResidualBlock(128,256),
    ResidualBlock(256,256),
    ResidualBlock(256,256),
    layers.BatchNormalization(),
    layers.Activation(activations.silu),
    layers.GlobalAveragePooling2D(),
    layers.Dense(100)
    
])

In [103]:
import pickle

def load_weights(model, weight_file):
    with open(weight_file, 'rb') as f:
        weights = pickle.load(f)

    all_vars = model.trainable_weights + model.non_trainable_weights
    weight_list = [(x, weights[x]) for x in sorted(weights.keys())]
    weights = {}
    for i, var in enumerate(all_vars):
        aux = var.path.split('/')[-2:]
        classname = '_'.join(aux[0].split('_')[:-1])
        name = aux[1]
        assigned = False
        for j, (key, value) in enumerate(weight_list):
            if classname in key and name in key:
                try:
                    all_vars[i].assign(value)
                except:
                    continue
                print('assinging', key, 'to', var.path)
                del weight_list[j]
                assigned = True
                break
        if not assigned:
            raise Exception(var.path + ' cannot be loaded')

In [104]:
#load_weights(ResidualNetwork, "p2_model_weights.pkl")

In [133]:
## carga y procesado de datos

from tensorflow.keras.datasets import cifar100
import numpy as np

(x_train, Y_train), (x_test, Y_test) = cifar100.load_data()

x_train = (2*x_train.astype(float)-255)/(255)
x_test = (2*x_test.astype(float)-255)/(255)

# como la salida de la red son 100 nodos se sobreentiende 
# que tengo que one-hot-ear Y

y_train = np.zeros(shape=(Y_train.shape[0],max(Y_train)[0]+1))
y_train[np.arange(Y_train.size),Y_train] = 1

(np.max(x_train),np.min(x_train),np.max(x_test),np.min(x_test))

(1.0, -1.0, 1.0, -1.0)

In [139]:
np.arange(Y_train.size)

array([    0,     1,     2, ..., 49997, 49998, 49999])

In [106]:
from tensorflow.keras.losses import CategoricalCrossentropy as CCE

ResidualNetwork.compile(optimizer='adam',
                        loss=CCE(),
                        metrics=['accuracy'])

In [107]:
ResidualNetwork.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test))

Epoch 1/5


ValueError: Arguments `target` and `output` must have the same shape. Received: target.shape=(None, 1), output.shape=(None, 100)

In [116]:
ResidualNetwork(x_train)

2024-11-27 16:29:53.377421: W tensorflow/core/framework/op_kernel.cc:1840] OP_REQUIRES failed at conv_ops_impl.h:668 : INVALID_ARGUMENT: convolution filter must be 4-dimensional: [0]


InvalidArgumentError: Exception encountered when calling Conv2D.call().

[1m{{function_node __wrapped__Conv2D_device_/job:localhost/replica:0/task:0/device:CPU:0}} convolution filter must be 4-dimensional: [0] [Op:Conv2D][0m

Arguments received by Conv2D.call():
  • inputs=tf.Tensor(shape=(50000, 32, 32, 3), dtype=float32)

In [121]:
min(y_test)

array([0])