## Autoencoder y clasificador convolucional sobre Fashion-MNIST

In [10]:
import matplotlib.pyplot as plt

import numpy as np

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from torchvision import datasets
from torchvision.transforms import ToTensor, transforms

seed = 27
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Dispositivo: {device}')  

Dispositivo: cpu


## Red neuronal autoencoder convolucional de varias capas

El encoder tienen que tener al menos dos capas convolucionales 2D y una lineal. 
El decoder tiene que realizar una transformaci ́on aproximadamente inversa, por ejemplo, utilizando primero una capa lineal y luego dos convolucionales traspuestas

In [30]:
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))])

# Download and load the training data
train_set_orig = datasets.FashionMNIST('MNIST_data/', download = True, train = True,  transform = transform)
valid_set_orig = datasets.FashionMNIST('MNIST_data/', download = True, train = False, transform = transform)

In [31]:
class CustomDataset(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, index):
        # Originalmente, retorna imagen y etiqueta
        image, label = self.dataset[index] 

        # Modificamos el dataset para que retorne solo la imagen
        input = image
        output = image
        return input, output 

In [32]:
train_set = CustomDataset(train_set_orig)
valid_set = CustomDataset(valid_set_orig)

In [36]:
print(f'Entrenamiento:\t{len(train_set)}') 
print(f'Validación:\t{len(valid_set)}')

Entrenamiento:	60000
Validación:	10000


In [None]:
class AutoEncoder(nn.Module):
    def __init__(self, p=0.2):
        super(AutoEncoder, self).__init__()
        self.flatten = nn.Flatten()

        # Encoder
        self.conv2d = nn.Sequential(
            
            nn.Conv2d(in_channels=1, out_channels=5, kernel_size=5, padding=0), # De 1 canal de 28x28 a 5 canales de 24x24 (kernel y padding)
            nn.ReLU(),
            nn.Dropout(p),
            nn.MaxPool2d(kernel_size=2, stride=2), # Se reduce a 12x12

            nn.Conv2d(in_channels=5, out_channels=10, kernel_size=3, padding=0), # De 5 canales de 12x12 a 10 canales de 10x10
            nn.ReLU(),
            nn.Dropout(p), 
            nn.MaxPool2d(kernel_size=2, stride=2), # Se reduce 5x5
        )

        # Linear
        self.linear = nn.Sequential(
            nn.Flatten(),
            nn.Linear(10*5*5, 10*5*5),
            nn.ReLU(),
            nn.Dropout(p),
        )
        
        # Decoder
        self.convt2d = nn.Sequential(
            nn.Unflatten(1, (10, 5, 5)),
            nn.ConvTranspose2d(10, 5, kernel_size=4, stride=2, padding=1), # De 5x5 a 12x12
            nn.ReLU(),
            nn.ConvTranspose2d(5, 1, kernel_size=4, stride=2, padding=1, output_padding=1), # De 12x12 a 28x28
            nn.Sigmoid(),
        )


    def forward(self, x):
        x = self.conv2d(x) # Encoder
        x = self.linear(x)
        x = self.convt2d(x) # Decoder 
        return x

In [47]:
p = 0.2
model = AutoEncoder(p=p)

image = torch.randn(1,1,28,28) # Batch, canal, size
output = model(image)
print(f'Input size: {image.shape}')
print(f'Output size: {output.shape}')

IndexError: Dimension out of range (expected to be in range of [-2, 1], but got 10)