<a href="https://colab.research.google.com/github/DaveLoay/AP_Tarea_2/blob/main/AP_T2_EX3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Implementar una arquitectura RNN bidireccional con una capa GRU.

In [None]:
# Colab
# https://github.com/TylerYep/torchinfo
!pip install torchinfo
# https://zarr.readthedocs.io/en/stable/
!pip install zarr



Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# sistema de archivos
import os
# funciones aleatorias
import random
# descomprimir
import tarfile
# sistema de archivos
from os.path import join

# arreglos multidimensionales
import numpy as np
# redes neuronales
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets.utils as tvu
# almacenamiento de arreglos multidimensionales
import zarr
#redes
from torch.utils.data import DataLoader, random_split
# inspección de arquitectura
from torchinfo import summary

# directorio de datos
DATA_DIR = '../data'

# tamaño del lote
BATCH_SIZE = 32
# tamaño del vector de características
FEAT_SIZE = 1024

# reproducibilidad
SEED = 0
random.seed(SEED)
np.random.seed(SEED)
torch_gen = torch.manual_seed(SEED)

# Datos

In [None]:
class UCF11:

    def __init__(self, root, download=False):
        self.root = root
        self.zarr_dir = join(root, 'ucf11.zarr')
        if download:
            self.download()
        self.z = zarr.open(self.zarr_dir, 'r')
        self.paths = list(self.z.array_keys())
        
    def __getitem__(self, i):
        arr = self.z[self.paths[i]]
        x = np.array(arr)
        y = np.array(arr.attrs['y'], dtype=np.int64)
        return x, y

    def __len__(self):
        return len(self.paths)
    
    def _check_integrity(self):
        return os.path.isdir(self.zarr_dir)
    
    def _extract(self, root, filename):
        tar = tarfile.open(join(root, filename), "r:gz")
        tar.extractall(root)
        tar.close()

    def download(self):
        if self._check_integrity():
            print('Files already downloaded and verified')
            return
        tvu.download_url(
            url='https://cloud.xibalba.com.mx/s/apYrNA4iM4K65o7/download',
            root=self.root,
            filename='ucf11.zarr.tar.gz',
            md5='c8a82454f9ec092d00bcd99c849e03fd'
        )
        self._extract(self.root, 'ucf11.zarr.tar.gz')


In [None]:
ds = UCF11(join(DATA_DIR, 'ucf11'), True)
x, y = ds[0]
print(f'x shape={x.shape} dtype={x.dtype}')
print(f'x [0][:5]={x[0][:5]}')
print(f'y shape={y.shape} dtype={y.dtype} {y}')
print(f'y {y}')

Files already downloaded and verified
x shape=(10, 1024) dtype=float32
x [0][:5]=[0.00022111 0.00368518 0.00314753 0.00201778 0.09296297]
y shape=() dtype=int64 0
y 0


In [None]:
trn_size = int(0.8 * len(ds))
tst_size = len(ds) - trn_size
trn_ds, tst_ds = random_split(ds, [trn_size, tst_size])
len(trn_ds), len(tst_ds)

(1279, 320)

In [None]:
trn_dl = DataLoader(
    # conjunto
    trn_ds,
    # tamaño del lote
    batch_size=BATCH_SIZE,
    # desordenar
    shuffle=True,
    # procesos paralelos
    num_workers=2
)
tst_dl = DataLoader(
    # conjunto
    tst_ds,
    # tamaño del lote
    batch_size=BATCH_SIZE,
    # desordenar
    shuffle=True,
    # procesos paralelos
    num_workers=2
)


In [None]:
x, y = next(iter(trn_dl))
print(f'x shape={x.shape} dtype={x.dtype}')
print(f'y shape={y.shape} dtype={y.dtype}')

x shape=torch.Size([32, 10, 1024]) dtype=torch.float32
y shape=torch.Size([32]) dtype=torch.int64


#Modelo

In [None]:
class RNN(nn.Module):

    def __init__(self, input_size=1024, hidden_size=128, num_classes=11):
        super().__init__()
        self.bn = nn.BatchNorm1d(input_size)
        self.rnn = nn.GRU(input_size=input_size, hidden_size=hidden_size,
                          num_layers=1, batch_first=True)
        self.cls = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # Batch, Seq, Feats, Hidden
        # [B, S, F] => [B, F, S]
        x = x.movedim(1, 2)
        # [B, F, S]
        x = self.bn(x)
        # [B, F, S] => [B, S, F]
        x = x.movedim(1, 2)
        # [B, S, F] => [B, S, H]
        x, _ = self.rnn(x)
        # [B, S, H] => [B, H]
        # toma el último paso, participación 1
        x = x[:, -1, :]
        # [B, H] = [B, 11]
        x = self.cls(x)
        return x

In [None]:
model = RNN().eval()
model(torch.zeros(1, 10, 1024)).shape

torch.Size([1, 11])

#Inspección de arquitectura

In [None]:
summary(model, (1, 10, 1024), device='cpu', verbose=0)

Layer (type:depth-idx)                   Output Shape              Param #
RNN                                      [1, 11]                   --
├─BatchNorm1d: 1-1                       [1, 1024, 10]             2,048
├─GRU: 1-2                               [1, 10, 128]              443,136
├─Linear: 1-3                            [1, 11]                   1,419
Total params: 446,603
Trainable params: 446,603
Non-trainable params: 0
Total mult-adds (M): 4.43
Input size (MB): 0.04
Forward/backward pass size (MB): 0.09
Params size (MB): 1.79
Estimated Total Size (MB): 1.92

In [None]:
# optimizador
opt = optim.Adam(model.parameters(), lr=1e-3)

# ciclo de entrenamiento
EPOCHS = 10
for epoch in range(EPOCHS):

    # modelo en modo de entrenamiento
    model.train()
    
    # entrenamiento de una época
    for x, y_true in trn_dl:
        # hacemos inferencia para obtener los logits
        y_lgts = model(x)
        # calculamos la pérdida
        loss = F.cross_entropy(y_lgts, y_true)
        # vaciamos los gradientes
        opt.zero_grad()
        # retropropagamos
        loss.backward()
        # actulizamos parámetros
        opt.step()

    # desactivamos temporalmente la gráfica de cómputo
    with torch.no_grad():

        # modelo en modo de evaluación
        model.eval()
        
        losses, accs = [], []
        # validación de la época
        for x, y_true in tst_dl:
            # hacemos inferencia para obtener los logits
            y_lgts = model(x)
            # calculamos las probabilidades
            y_prob = F.softmax(y_lgts, 1)
            # obtenemos la clase predicha
            y_pred = torch.argmax(y_prob, 1)
            
            # calculamos la pérdida
            loss = F.cross_entropy(y_lgts, y_true)
            # calculamos la exactitud
            acc = (y_true == y_pred).type(torch.float32).mean()

            # guardamos históricos
            losses.append(loss.item() * 100)
            accs.append(acc.item() * 100)

        # imprimimos métricas
        loss = np.mean(losses)
        acc = np.mean(accs)
        print(f'E{epoch:2} loss={loss:6.2f} acc={acc:.2f}')

E 0 loss=202.30 acc=32.19
E 1 loss=178.21 acc=42.81
E 2 loss=165.81 acc=46.56
E 3 loss=158.54 acc=48.75
E 4 loss=147.89 acc=51.25
E 5 loss=147.74 acc=53.75
E 6 loss=149.61 acc=51.88
E 7 loss=144.33 acc=53.12
E 8 loss=148.34 acc=55.31
E 9 loss=145.79 acc=53.12


# 3.- Implementar una arquitectura CNN con una capa Conv1d.

# Modelo

In [None]:
from torch.nn.modules.activation import ReLU
class CNN(nn.Module):

    def __init__(self, input_size=1024, hidden_size=128, num_classes=11):
        super().__init__()
        self.cnn = nn.Sequential(
           nn.Conv1d(input_size,hidden_size,kernel_size=2),
           nn.Dropout(p=0.4),
           nn.ReLU(),
           nn.MaxPool2d(kernel_size=2, stride=2),
       )
        self.flatten = nn.Flatten()
        self.cls = nn.Linear(256,num_classes)

    def forward(self, x):
        # Batch, Seq, Feats, Hidden
        # [B, S, F] => [B, F, S]
        x = x.movedim(1, 2)
        # [B, F, S]
        # x = self.bn(x)
        # [B, F, S] => [B, S, F]
        # x = x.movedim(1, 2)
        # [B, S, F] => [B, S, H]
        # x, _ = self.conv1(x)
        x = self.cnn(x)
        # [B, S, H] => [B, H]
        # toma el último paso, participación 1
        # x = x[:, -1, :]
        # [B, H] = [B, 11]
        x = self.flatten(x)
        x = self.cls(x)
        return x

In [None]:
model = CNN().eval()
model(torch.zeros(1, 10, 1024)).shape

torch.Size([1, 11])

In [None]:
summary(model, (1, 10, 1024), device='cpu', verbose=0)

Layer (type:depth-idx)                   Output Shape              Param #
CNN                                      [1, 11]                   --
├─Sequential: 1-1                        [1, 64, 4]                --
│    └─Conv1d: 2-1                       [1, 128, 9]               262,272
│    └─Dropout: 2-2                      [1, 128, 9]               --
│    └─ReLU: 2-3                         [1, 128, 9]               --
│    └─MaxPool2d: 2-4                    [1, 64, 4]                --
├─Flatten: 1-2                           [1, 256]                  --
├─Linear: 1-3                            [1, 11]                   2,827
Total params: 265,099
Trainable params: 265,099
Non-trainable params: 0
Total mult-adds (M): 2.36
Input size (MB): 0.04
Forward/backward pass size (MB): 0.01
Params size (MB): 1.06
Estimated Total Size (MB): 1.11

In [None]:
# optimizador
opt = optim.Adam(model.parameters(), lr=1e-3)

# ciclo de entrenamiento
EPOCHS = 10
for epoch in range(EPOCHS):

    # modelo en modo de entrenamiento
    model.train()
    
    # entrenamiento de una época
    for x, y_true in trn_dl:
        # hacemos inferencia para obtener los logits
        y_lgts = model(x)
        # calculamos la pérdida
        loss = F.cross_entropy(y_lgts, y_true)
        # vaciamos los gradientes
        opt.zero_grad()
        # retropropagamos
        loss.backward()
        # actulizamos parámetros
        opt.step()

    # desactivamos temporalmente la gráfica de cómputo
    with torch.no_grad():

        # modelo en modo de evaluación
        model.eval()
        
        losses, accs = [], []
        # validación de la época
        for x, y_true in tst_dl:
            # hacemos inferencia para obtener los logits
            y_lgts = model(x)
            # calculamos las probabilidades
            y_prob = F.softmax(y_lgts, 1)
            # obtenemos la clase predicha
            y_pred = torch.argmax(y_prob, 1)
            
            # calculamos la pérdida
            loss = F.cross_entropy(y_lgts, y_true)
            # calculamos la exactitud
            acc = (y_true == y_pred).type(torch.float32).mean()

            # guardamos históricos
            losses.append(loss.item() * 100)
            accs.append(acc.item() * 100)

        # imprimimos métricas
        loss = np.mean(losses)
        acc = np.mean(accs)
        print(f'E{epoch:2} loss={loss:6.2f} acc={acc:.2f}')

E 0 loss=223.30 acc=27.81
E 1 loss=217.02 acc=26.56
E 2 loss=205.83 acc=29.38
E 3 loss=201.44 acc=34.69
E 4 loss=195.29 acc=35.94
E 5 loss=191.18 acc=33.44
E 6 loss=186.33 acc=39.06
E 7 loss=184.82 acc=39.38
E 8 loss=179.29 acc=42.81
E 9 loss=178.52 acc=39.38


# Modificar el tamaño de las capas para que ambos modelos tengan un número similar de parámetros.

# Modelo

In [None]:
from torch.nn.modules.activation import ReLU
class CNN2(nn.Module):

    def __init__(self, input_size=1024, hidden_size=128, num_classes=11):
        super().__init__()
        self.cnn = nn.Sequential(
           nn.Conv1d(input_size,hidden_size,kernel_size=3),
           nn.Dropout(p=0.4),
           nn.ReLU(),
           nn.MaxPool2d(kernel_size=3, stride=2),
       )
        self.flatten = nn.Flatten()
        self.cls = nn.Linear(189,num_classes)

    def forward(self, x):
        # Batch, Seq, Feats, Hidden
        # [B, S, F] => [B, F, S]
        x = x.movedim(1, 2)
        # [B, F, S]
        # x = self.bn(x)
        # [B, F, S] => [B, S, F]
        # x = x.movedim(1, 2)
        # [B, S, F] => [B, S, H]
        # x, _ = self.conv1(x)
        x = self.cnn(x)
        # [B, S, H] => [B, H]
        # toma el último paso, participación 1
        # x = x[:, -1, :]
        # [B, H] = [B, 11]
        x = self.flatten(x)
        x = self.cls(x)
        return x

In [None]:
model = CNN2().eval()
model(torch.zeros(1, 10, 1024)).shape

torch.Size([1, 11])

In [None]:
summary(model, (1, 10, 1024), device='cpu', verbose=0)

Layer (type:depth-idx)                   Output Shape              Param #
CNN2                                     [1, 11]                   --
├─Sequential: 1-1                        [1, 63, 3]                --
│    └─Conv1d: 2-1                       [1, 128, 8]               393,344
│    └─Dropout: 2-2                      [1, 128, 8]               --
│    └─ReLU: 2-3                         [1, 128, 8]               --
│    └─MaxPool2d: 2-4                    [1, 63, 3]                --
├─Flatten: 1-2                           [1, 189]                  --
├─Linear: 1-3                            [1, 11]                   2,090
Total params: 395,434
Trainable params: 395,434
Non-trainable params: 0
Total mult-adds (M): 3.15
Input size (MB): 0.04
Forward/backward pass size (MB): 0.01
Params size (MB): 1.58
Estimated Total Size (MB): 1.63

In [None]:
# optimizador
opt = optim.Adam(model.parameters(), lr=1e-3)

# ciclo de entrenamiento
EPOCHS = 10
for epoch in range(EPOCHS):

    # modelo en modo de entrenamiento
    model.train()
    
    # entrenamiento de una época
    for x, y_true in trn_dl:
        # hacemos inferencia para obtener los logits
        y_lgts = model(x)
        # calculamos la pérdida
        loss = F.cross_entropy(y_lgts, y_true)
        # vaciamos los gradientes
        opt.zero_grad()
        # retropropagamos
        loss.backward()
        # actulizamos parámetros
        opt.step()

    # desactivamos temporalmente la gráfica de cómputo
    with torch.no_grad():

        # modelo en modo de evaluación
        model.eval()
        
        losses, accs = [], []
        # validación de la época
        for x, y_true in tst_dl:
            # hacemos inferencia para obtener los logits
            y_lgts = model(x)
            # calculamos las probabilidades
            y_prob = F.softmax(y_lgts, 1)
            # obtenemos la clase predicha
            y_pred = torch.argmax(y_prob, 1)
            
            # calculamos la pérdida
            loss = F.cross_entropy(y_lgts, y_true)
            # calculamos la exactitud
            acc = (y_true == y_pred).type(torch.float32).mean()

            # guardamos históricos
            losses.append(loss.item() * 100)
            accs.append(acc.item() * 100)

        # imprimimos métricas
        loss = np.mean(losses)
        acc = np.mean(accs)
        print(f'E{epoch:2} loss={loss:6.2f} acc={acc:.2f}')

E 0 loss=223.79 acc=24.06
E 1 loss=216.21 acc=28.12
E 2 loss=208.91 acc=27.50
E 3 loss=201.43 acc=33.44
E 4 loss=198.06 acc=36.88
E 5 loss=190.10 acc=38.12
E 6 loss=190.42 acc=38.75
E 7 loss=181.84 acc=42.81
E 8 loss=177.44 acc=44.38
E 9 loss=172.77 acc=45.00


# Discusión

Se observó que la red con el bloque GRU con conexiones recursivas, tienen un mejor comportamiento en términos de eficiencia, a diferencia del modelo convolucional no recursivo, sin embargo, eso se reflejo en el tiempo de entrenamiento (a pesar de ser modelos pequeños). Una vez que se modificaron las redes para tener parámetros similares, el desempeño de ambas redes también fue similar, de tal forma que el modelo con el bloque GRU continuó teniendo un mejor desempeño, pero la diferencia entre ambos modelos era menor al $\%10$. 