## Create some Convolutional Auto Encoders with PyTorch

In [42]:
# imports
import os, glob

from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
from torch.optim import SGD
import torch.nn.functional as F

import cv2

### 1. Create variables

In [9]:
data_dir = r'C:\kaggle\plant-pathology-2021-fgvc8\train_images'
device = 'cpu'

### 2. Create datasets

In [10]:
from torch.utils.data import Dataset
from torchvision import transforms


In [26]:
class CAEDataset(Dataset):
    def __init__(self, im_paths, data_transforms=None) -> None:
        super().__init__()
        self.im_paths = im_paths
        self.data_transforms = data_transforms

    def  __len__(self):
        return len(self.im_paths)

    def __getitem__(self, index):
        im = cv2.imread(self.im_paths[index])
        if self.data_transforms:
            im = self.data_transforms(im)

        return im

#### Create the dataloaders

In [27]:
ds_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

In [28]:
im_paths = glob.glob(os.path.join(data_dir, '*.jpg'))

In [29]:
len(im_paths)

18632

In [33]:
train_ds = CAEDataset(im_paths=im_paths, data_transforms=ds_transforms)

In [34]:
train_loader = DataLoader(train_ds, batch_size=4, num_workers=0)

In [35]:
for x in train_loader:
    print(x.shape)
    break

torch.Size([4, 3, 224, 224])


### 3. Create Model

In [37]:
# define the NN architecture
class ConvAutoencoder(nn.Module):
    def __init__(self):
        super(ConvAutoencoder, self).__init__()
        # encoder layers #
        # conv layer (depth from 3 --> 16), 3x3 kernels
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        # conv layer (depth from 16 --> 4), 3x3 kernels
        self.conv2 = nn.Conv2d(16, 4, 3, padding=1)
        # pooling layer to reduce x-y dims by two; kernel and stride of 2
        self.pool = nn.MaxPool2d(2, 2)

        # decoder layers #
        # a kernel of 2 and a stride of 2 will increase the spatial dims by 2
        self.t_conv1 = nn.ConvTranspose2d(4, 16, 2, stride=2)
        self.t_conv2 = nn.ConvTranspose2d(16, 3, 2, stride=2)

    def forward(self, x):
        # encode #
        # add hidden layers with relu activation function
        # and max pooling after
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        # add second hidden layer
        x = F.relu(self.conv2(x))
        x = self.pool(x)  # compressed representation

        # decode #
        # add transpose conv layers, with relu activation function
        x = F.relu(self.t_conv1(x))
        # output layer (with sigmoid for scaling from 0 to 1)
        x = F.sigmoid(self.t_conv2(x))

        return x

In [39]:
network = ConvAutoencoder()
loss = nn.MSELoss()
opt = SGD(network.parameters(), lr=0.001)

In [43]:
i = 0

for _ in range(20):
    for data in train_loader:
        im = data
        opt.zero_grad()
        prediction = network(im)
        loss_value = loss(prediction, im)
        loss_value.backward()
        opt.step()
        print(loss_value)

        i += 1
        if i > 20:
            break



tensor(0.0343, grad_fn=<MseLossBackward0>)
tensor(0.0469, grad_fn=<MseLossBackward0>)
tensor(0.0371, grad_fn=<MseLossBackward0>)
tensor(0.0596, grad_fn=<MseLossBackward0>)
tensor(0.0494, grad_fn=<MseLossBackward0>)
tensor(0.0553, grad_fn=<MseLossBackward0>)
tensor(0.0725, grad_fn=<MseLossBackward0>)
tensor(0.0403, grad_fn=<MseLossBackward0>)
tensor(0.0530, grad_fn=<MseLossBackward0>)
tensor(0.0568, grad_fn=<MseLossBackward0>)
tensor(0.0356, grad_fn=<MseLossBackward0>)
tensor(0.0517, grad_fn=<MseLossBackward0>)
tensor(0.0346, grad_fn=<MseLossBackward0>)
tensor(0.0474, grad_fn=<MseLossBackward0>)
tensor(0.0430, grad_fn=<MseLossBackward0>)
tensor(0.0475, grad_fn=<MseLossBackward0>)
tensor(0.0452, grad_fn=<MseLossBackward0>)
tensor(0.0438, grad_fn=<MseLossBackward0>)
tensor(0.0424, grad_fn=<MseLossBackward0>)
tensor(0.0358, grad_fn=<MseLossBackward0>)
tensor(0.0430, grad_fn=<MseLossBackward0>)
tensor(0.0342, grad_fn=<MseLossBackward0>)
tensor(0.0342, grad_fn=<MseLossBackward0>)
tensor(0.03