<a href="https://colab.research.google.com/github/RabaDaba1/pooling-layer-analysis/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [72]:
import os
import math
from pathlib import Path



import torch

import torch.nn as nn

import torch.optim as optim

import torch.nn.functional as F



import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.tensorboard import SummaryWriter



import numpy as np

# Constants

In [73]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

device

device(type='cuda')

In [74]:
BATCH_SIZE = 64

NUM_WORKERS = 0

In [75]:
ROOT_DIR = Path('.')

DATA_DIR = ROOT_DIR / 'data'

REPORTS_DIR = ROOT_DIR / 'reports'

MODELS_DIR = REPORTS_DIR / 'models'

RESULTS_DIR = REPORTS_DIR / 'results'

RUNS_DIR = REPORTS_DIR / 'runs'

# Data loading

In [76]:
train_transform = transforms.Compose(

    [transforms.ToTensor(),

     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),

     transforms.RandomHorizontalFlip(p=0.5),

     transforms.RandomVerticalFlip(p=0.5)]

)

val_transform = transforms.Compose(

    [transforms.ToTensor(),

     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),

     transforms.RandomHorizontalFlip(p=0.5),

     transforms.RandomVerticalFlip(p=0.5)]

)

test_transform = transforms.Compose(

    [transforms.ToTensor(),

     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]

)

In [77]:
train_proportion = 0.9

num_train = 50000



indices = list(range(num_train))

split = int(np.floor(train_proportion * num_train))

np.random.shuffle(indices)



train_idx, val_idx = indices[:split], indices[split:]

train_sampler = SubsetRandomSampler(train_idx)

val_sampler = SubsetRandomSampler(val_idx)

In [78]:


train_dataset = datasets.CIFAR10(root=DATA_DIR, train=True,

                                 download=True, transform=train_transform)



val_dataset = datasets.CIFAR10(root=DATA_DIR, train=True,

                               download=True, transform=val_transform)



test_dataset = datasets.CIFAR10(root=DATA_DIR, train=False,

                                download=True, transform=test_transform)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [79]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE,
                          sampler=train_sampler, num_workers=NUM_WORKERS, pin_memory=True)

val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE,
                        sampler=val_sampler, num_workers=NUM_WORKERS, pin_memory=True)

test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False,
                         num_workers=NUM_WORKERS, pin_memory=True)

# Aggregating functions

In [80]:
def arithmetic_mean(X, dim, keepdim):

    return torch.mean(X, dim, keepdim)

In [81]:
def minimum(X, dim, keepdim):

    return torch.min(X, dim, keepdim).values

In [82]:
def product(X, dim, keepdim):

    return torch.prod(X, dim=dim, keepdim=keepdim)

In [83]:
def t_norm_lukasiewicz(X, dim, keepdim):

    sum_X = torch.sum(X, dim=dim, keepdim=keepdim) - 1

    return torch.max(sum_X, torch.tensor(0, device=X.device))

In [84]:
def t_norm_hamacher(tensor, dim, keepdim=False): 
    tensor_shape = list(tensor.shape)
    tensor_shape.pop(dim)
    out_tensor = tensor.new_zeros(tensor_shape)
    # Indexar la útlima dimensión facilita la legibilidad del código (tendríamos que usar torch.index_select en caso contrario)
    if (dim == -1) or (dim == len(tensor.shape)-1):
        out_tensor = tensor[..., 0] # Tensor auxiliar donde acumularemos la salida (hará las veces de x)
        for i in range(1, tensor.shape[dim]):  # La t-conorma es asociativa: Trataremos los elementos de 2 en 2
            # Dado que la t-norma es asociativa, trataremos los elementos de 2 en 2. En cada iteración:
            x = out_tensor
            y = tensor[..., i]
            diff_indices = torch.logical_or(x != 0, y != 0)
            # if x == y == 0 -> 0 (we already have it)
            # otherwise -> T(a, b) = (ab) / (a+b-ab)
            out_tensor[diff_indices] = (
                torch.mul(x[diff_indices], y[diff_indices]) / (x[diff_indices] + y[diff_indices] - torch.mul(x[diff_indices], y[diff_indices])))
            
    else:
        raise Exception('Use dim=-1')
    # If keepdims is True, expand the reduced dimension to size 1
    if keepdim:
        out_tensor = out_tensor.unsqueeze(dim)
    
    return out_tensor

In [85]:
def maximum(X, dim, keepdim):
    return torch.max(X, dim, keepdim).values

In [86]:
def t_conorm_lukasiewicz(X, dim, keepdim):
    sum_X = torch.sum(X, dim=dim, keepdim=keepdim)
    return torch.min(sum_X, torch.tensor(1.0, device=X.device))

In [87]:
def t_conorm_hamacher(tensor, dim = -1, keepdim = False):
    tensor_shape = list(tensor.shape)
    tensor_shape.pop(dim)
    out_tensor = tensor.new_zeros(tensor_shape)
    ones = tensor.new_ones(tensor_shape)
    # Indexar la Ãºtlima dimensiÃ³n facilita la legibilidad del cÃ³digo (tendrÃ­amos que usar torch.index_select en caso contrario)
    if (dim == -1) or (dim == len(tensor.shape)-1):
        out_tensor = tensor[..., 0] # Tensor auxiliar donde acumularemos la salida (harÃ¡ las veces de x)
        for i in range(1, tensor.shape[dim]):  # La t-conorma es asociativa: Trataremos los elementos de 2 en 2
            # Dado que la t-conorma es asociativa, trataremos los elementos de 2 en 2. En cada iteraciÃ³n:
            x = out_tensor
            y = tensor[..., i]
            diff_indices = torch.where(torch.abs(torch.mul(x, y)-1) > 1e-9) # Devuelve los Ã­ndices de los elementos para los cuÃ¡les x*y-1 > 0 (condiciÃ³n de la funciÃ³n por partes)
            # Asignamos los valores en funciÃ³n de las condiciones
            # if ab == 1 -> T(a, b) = 1
            out_tensor = ones  # Por defecto, asumimos que todos los valores caen en el caso x*y-1=0
            # otherwise -> T(a, b) = (2ab - a - b) / (ab - 1)
            out_tensor[diff_indices] = (
                2 * torch.mul(x[diff_indices], y[diff_indices]) - x[diff_indices] - y[diff_indices]) / (
                torch.mul(x[diff_indices], y[diff_indices]) - 1)  # Corregimos los valores para los cuÃ¡les x*y-1>0 (los que corresponden a los Ã­ndices de diff_indices)
    else:
        # El cÃ³digo serÃ­a idÃ©ntico, sustituyendo tensor[..., 0] por torch.index_select(tensor, dim, tensor.new_tensor([0], dtype=torch.int)).squeeze(dim)
        # torch.index_select(tensor, dim, tensor.new_tensor([0], dtype=torch.int)).squeeze(dim) indexa todos los elementos de la dimensiÃ³n dim
        # NO HACE FALTA IMPLEMENTARLO
        raise Exception('Utilizar la versiÃ³n con dim=-1')
    if keepdim:
        torch.unsqueeze(out_tensor, dim=dim)
    return out_tensor

In [88]:
def u_min_max(tensor, dim = -1, keepdim = False):
    tensor_shape = list(tensor.shape)
    tensor_shape.pop(dim)
    out_tensor = tensor.new_zeros(tensor_shape)
    ones = tensor.new_ones(tensor_shape)
    # Indexar la Ãºtlima dimensiÃ³n facilita la legibilidad del cÃ³digo (tendrÃ­amos que usar torch.index_select en caso contrario)
    if (dim == -1) or (dim == len(tensor.shape)-1):
        out_tensor = tensor[..., 0] # Tensor auxiliar donde acumularemos la salida (harÃ¡ las veces de x)
        for i in range(1, tensor.shape[dim]):  # La t-conorma es asociativa: Trataremos los elementos de 2 en 2
            # Dado que la t-conorma es asociativa, trataremos los elementos de 2 en 2. En cada iteraciÃ³n:
            x = out_tensor
            y = tensor[..., i]
            cond1 = torch.logical_or(x < 0, y > 0.5)
            cond2 = torch.logical_or(x < 0, y > 0.5)
            diff_indices = torch.logical_or(cond1, cond2)
            # Asignamos los valores en funciÃ³n de las condiciones
            # if a, b in [0,0'5]^2 -> U(a, b) = min(a,b)
            #out_tensor = torch.min(x,y).values  
            # otherwise -> U(a, b) = max(a,b)
            #out_tensor[diff_indices] = (torch.max(x,y).values) 
            out_tensor = torch.where(diff_indices, torch.max(x, y), torch.min(x, y))
    else:
        raise Exception('Utilizar la versiÃ³n con dim=-1')
    if keepdim:
        torch.unsqueeze(out_tensor, dim=dim)
    return out_tensor

In [89]:
def u_l_l(tensor, dim = -1, keepdim = False):
    tensor_shape = list(tensor.shape)
    tensor_shape.pop(dim)
    out_tensor = tensor.new_zeros(tensor_shape)
    ones = tensor.new_ones(tensor_shape)
    # Indexar la Ãºtlima dimensiÃ³n facilita la legibilidad del cÃ³digo (tendrÃ­amos que usar torch.index_select en caso contrario)
    if (dim == -1) or (dim == len(tensor.shape)-1):
        out_tensor = tensor[..., 0] # Tensor auxiliar donde acumularemos la salida (harÃ¡ las veces de x)
        for i in range(1, tensor.shape[dim]):  # La t-conorma es asociativa: Trataremos los elementos de 2 en 2
            # Dado que la t-conorma es asociativa, trataremos los elementos de 2 en 2. En cada iteraciÃ³n:
            x = out_tensor
            y = tensor[..., i]
            
            # Condiciones
            cond1 = torch.logical_and(x <= 0.5, y <= 0.5)  # Caso 1: (x, y) en [0, 0.5]^2
            cond2 = torch.logical_and(x >= 0.5, y >= 0.5)  # Caso 2: (x, y) en [0.5, 1]^2
            cond3 = ~(cond1 | cond2)  # Caso 3: En cualquier otro caso

            # Caso 1: max(x + y - 1, 0)
            if cond1.any():
                out_tensor[cond1] = torch.maximum(
                    x[cond1] + y[cond1] - 1, 
                    torch.tensor(0.0, device=tensor.device)
                )

            # Caso 2: min(x + y, 1)
            if cond2.any():
                out_tensor[cond2] = torch.minimum(
                    x[cond2] + y[cond2], 
                    torch.tensor(1.0, device=tensor.device)
                )

            # Caso 3: max(x, y)
            if cond3.any():
                out_tensor[cond3] = torch.maximum(x[cond3], y[cond3])
                
    else:
        raise Exception('Utilizar la versiÃ³n con dim=-1')
    if keepdim:
        torch.unsqueeze(out_tensor, dim=dim)
    return out_tensor

In [90]:
class ChoquetLayer(nn.Module):

    def __init__(self):
        super(ChoquetLayer, self).__init__()
        self.w = None

    def forward(self, x, dim, keepdim):
        input_size = x.size(-1)
        batch_size = x.size(0)
        if self.w is None or self.w.size(0) != batch_size:
            self.w = nn.Parameter(torch.randn(input_size)).to(device)

        x_sorted, indices = torch.sort(x, descending=True, dim=-1)  # (batch_size, input_size)
        v_Ai = self.compute_v_Ai(indices)  # (batch_size, n+1)
        v_delta = v_Ai[..., :-1] - v_Ai[..., 1:]  # (batch_size, input_size)
        x = torch.sum(x_sorted * v_delta, dim=-1)  # (batch_size)

        if keepdim:
          x.unsqueeze(dim)

        return x

    def compute_v_Ai(self, indices):
        w_sorted = self.w[indices]
        v_Ai = torch.cumsum(w_sorted, dim=-1)
        zeros = torch.zeros(*v_Ai.shape[:-1], 1).to(v_Ai.device)
        v_Ai = torch.cat((zeros, v_Ai), dim=-1)

        return v_Ai

# Layers

In [91]:
class AggPoolingLayer(nn.Module):
    def __init__(self, function, kernel_size, stride, padding= [0,0,0,0], dim = -1, keepdim = False):
        super().__init__()

        # Una tupla de 2 elementos con los tamaños [𝑘1,𝑘2] de cada ventana a tratar
        self.kernel_size = kernel_size
        # Tupla de 2 elementos que indican el número de elementos (en filas y columnas) que
        # deben saltarse tras reducir cada ventana, hasta encontrar la siguiente a tratar.
        self.stride = stride

        # Tupla de 4 elementos de la forma [𝑝𝑎𝑑_𝑙𝑒𝑓𝑡,𝑝𝑎𝑑_𝑟𝑖𝑔ℎ𝑡,𝑝𝑎𝑑_𝑢𝑝,𝑝𝑎𝑑_𝑑𝑜𝑤𝑛] que indica el
        # número de nuevas filas o columnas a añadir a la entrada, previo a aplicar la agregación.
        self.padding = padding

        # Define function and characteristics
        self.function = function
        self.dim = dim
        self.keepdim = keepdim

    def forward(self, X):
        # Normalize
        maximum = torch.max(X)
        minimum = torch.min(X)
        X = (X-minimum)/(maximum-minimum)
        # Añadir columnas/filas según padding
        X_pad = F.pad(X, pad=self.padding, mode='constant', value=0)
        # Vamos extrayendo las ventanas a agregar y colocándolas en filas
        X_aux = X_pad.unfold(2, size=self.kernel_size[0], step=self.stride[0]).unfold(3, size=self.kernel_size[1], step=self.stride[1])
        # Ponemos el formato correcto
        X_aux = X_aux.reshape([X_aux.shape[0], X_aux.shape[1], X_aux.shape[2], X_aux.shape[3], X_aux.shape[4] * X_aux.shape[5]])
        # Agg Func
        Y_temp = self.function(X_aux, dim = self.dim, keepdim = self.keepdim)
        # Denormalize
        Y = minimum + (maximum-minimum) * Y_temp

        return Y

In [92]:
class OWAPoolingLayer(nn.Module):



    def __init__(self, kernel_size, stride, padding= [0,0,0,0], dim = -1, keepdim = False):

        super().__init__()



        # Una tupla de 2 elementos con los tamaños [𝑘1,𝑘2] de cada ventana a tratar

        self.kernel_size = kernel_size



        # Tupla de 2 elementos que indican el número de elementos (en filas y columnas) que

        # deben saltarse tras reducir cada ventana, hasta encontrar la siguiente a tratar.

        self.stride = stride



        # Tupla de 4 elementos de la forma [𝑝𝑎𝑑_𝑙𝑒𝑓𝑡,𝑝𝑎𝑑_𝑟𝑖𝑔ℎ𝑡,𝑝𝑎𝑑_𝑢𝑝,𝑝𝑎𝑑_𝑑𝑜𝑤𝑛] que indica el

        # número de nuevas filas o columnas a añadir a la entrada, previo a aplicar la agregación.

        self.padding = padding



        # Define characteristics

        self.dim = dim

        self.keepdim = keepdim



        # Weights

        self.weight = nn.Parameter(torch.ones(1, self.kernel_size[0] * self.kernel_size[1]))





    def funcionOWA(self, X):



        tensor_ordered = torch.sort(X, descending = True)

        weight_norm = torch.nn.functional.softmax(self.weight, dim = self.dim)

        output = torch.sum(tensor_ordered[0] * weight_norm, dim = self.dim, keepdim = self.keepdim)



        return output



    def forward(self, X):



        # Normalize

        maximum = torch.max(X)

        minimum = torch.min(X)

        X = (X-minimum)/(maximum-minimum)



        # Añadir columnas/filas según padding

        X_pad = F.pad(X, pad=self.padding, mode='constant', value=0)



        # Vamos extrayendo las ventanas a agregar y colocándolas en filas

        X_aux = X_pad.unfold(2, size=self.kernel_size[0], step=self.stride[0]).unfold(3, size=self.kernel_size[1], step=self.stride[1])



        # Ponemos el formato correcto

        X_aux = X_aux.reshape([X_aux.shape[0], X_aux.shape[1], X_aux.shape[2], X_aux.shape[3], X_aux.shape[4] * X_aux.shape[5]])



        # Agg Func

        Y_temp = self.funcionOWA(X_aux)



        # Denormalize

        Y = minimum + (maximum-minimum) * Y_temp



        return Y

# Model

In [93]:
class LeNetModel(nn.Module):
    def __init__(self, function, conv_filters=[64, 64], linear_sizes=[384, 192], num_classes=10):
        super(LeNetModel, self).__init__()
        self.conv1 = nn.Conv2d(3, conv_filters[0], [3,3], [1,1])
        self.pool1 = AggPoolingLayer(ChoquetLayer(), [2,2], [2,2])
        self.conv2 = nn.Conv2d(conv_filters[0], conv_filters[1], [3,3], [1,1])
        self.pool2 = AggPoolingLayer(ChoquetLayer(), [2,2], [2,2])
        self.fc1 = nn.Linear(conv_filters[1]*6*6, linear_sizes[0])
        self.fc2 = nn.Linear(linear_sizes[0], linear_sizes[1])
        self.fc3 = nn.Linear(linear_sizes[1], num_classes)

    def forward(self, x: torch.Tensor):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        x = x.flatten(start_dim=1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x

# Training

In [94]:
writer = SummaryWriter(log_dir=RUNS_DIR)

In [95]:
def train(model, train_loader, criterion, optimizer, val_loader=None, num_epochs=20, device=device):
    train_acc = []
    train_loss = []

    # if val_loader is not None:
    val_acc = []
    val_loss = []

    for epoch in range(num_epochs):
        running_loss = 0.0
        count_evaluated = 0
        count_correct = 0

        for batch_idx, data in enumerate(train_loader, 0):

            model.train()

            inputs, labels = data[0].to(device), data[1].to(device)

            optimizer.zero_grad()

            outputs = model(inputs)

            loss = criterion(outputs, labels)

            loss.backward()

            optimizer.step()

            running_loss += loss.item()

            count_evaluated += inputs.shape[0]

            count_correct += torch.sum(labels == torch.max(outputs, dim=1)[1])



        print('Training: [%d, %5d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / (batch_idx+1)))



        train_loss.append(running_loss / (batch_idx+1))

        train_acc.append(float(count_correct) / count_evaluated)



        if val_loader is not None:

            running_loss_val = 0.0

            count_evaluated = 0

            count_correct = 0

            model.eval()



            with torch.no_grad():

                for val_batch_idx, data_val in enumerate(val_loader, 0):

                    inputs_val, labels_val = data_val[0].to(device), data_val[1].to(device)

                    outputs_val = model(inputs_val)

                    loss = criterion(outputs_val, labels_val)

                    running_loss_val += loss.item()

                    count_evaluated += inputs_val.shape[0]

                    count_correct += torch.sum(labels_val == torch.max(outputs_val, dim=1)[1])



                val_loss.append(running_loss_val / (val_batch_idx + 1))

                acc_val = float(count_correct) / count_evaluated



                print('Validation: epoch %d - acc: %.3f' %

                            (epoch + 1, acc_val))

                val_acc.append(acc_val)



        # Tensorboard

        writer.add_scalar('Loss/Validation', val_loss[-1], global_step=epoch)

        writer.add_scalar('Accuracy/Validation', val_acc[-1], global_step=epoch)

        writer.add_scalar('Loss/Train', train_loss[-1], global_step=epoch)

        writer.add_scalar('Accuracy/Train', train_acc[-1], global_step=epoch)

        for name, param in model.named_parameters():

            writer.add_histogram(f"Parameters/{name}", param, epoch)

            if param.grad is not None:

                writer.add_histogram(f"Gradients/{name}", param.grad, epoch)



    return model

# Testing

In [96]:
def test(model, test_loader, criterion, device='cuda'):

    with torch.no_grad():

        number_samples = 0

        number_correct = 0

        running_loss_test = 0.0

        for test_batch_idx, data_test in enumerate(test_loader, 0):

            inputs_test, labels_test = data_test[0].to(device), data_test[1].long().to(device)

            outputs_test = model(inputs_test)

            loss = criterion(outputs_test, labels_test)

            running_loss_test += loss.cpu().numpy()



            _, outputs_class = torch.max(outputs_test, dim=1)

            number_correct += torch.sum(outputs_class == labels_test).cpu().numpy()

            number_samples += len(labels_test)



        acc_test = number_correct / number_samples



        print('Test - Accuracy: %.3f' % acc_test)

        print('Test - CrossEntropy: %.3f' % (running_loss_test / (test_batch_idx+1)))

In [None]:
model = LeNetModel(ChoquetLayer).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

trained_model = train(model, train_loader, criterion, optimizer, val_loader=val_loader)

Training: [1,   704] loss: 2.290
Validation: epoch 1 - acc: 0.191
