In [1]:
%matplotlib inline
#%config InlineBackend.figure_format = ‘retina’
import matplotlib.pyplot as plt
import torch
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader

#import helper

In [2]:
from PIL import Image
from pathlib import Path

class MangoDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, root_dir, limit = None, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.imgs = [x for x in Path(root_dir).rglob("*.*") if not str(x).endswith(".json")]
        if limit:
            self.imgs = self.imgs[:limit]

    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        image = img = Image.open(self.imgs[idx]).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image

In [3]:
train_transforms = transforms.Compose([
                                transforms.RandomRotation(30),
                                transforms.RandomResizedCrop(224),
                                transforms.RandomHorizontalFlip(),
                                transforms.ToTensor()])
test_transforms = transforms.Compose([transforms.Resize(255),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor()])

ds = MangoDataset(r"F:\workspace\fascrapper\scrap_results\mango", transform=test_transforms, limit=50)
dataloader = torch.utils.data.DataLoader(ds, batch_size=32, shuffle=True)

In [4]:
images = next(iter(dataloader))
print("Shape", images[0].shape)
print("Min", torch.min(images[0]), "Max", torch.max(images[1]))


Shape torch.Size([3, 224, 224])
Min tensor(0.0549) Max tensor(0.9686)


In [15]:
import torch.nn as nn
from abc import abstractmethod

class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.encoder = self.build_encoder()
        self.decoder = self.build_decoder()

        
    @abstractmethod
    def build_encoder(self):
        pass
    
    @abstractmethod
    def build_decoder(self):
        pass
    
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded
    
class Linear_AutoEncoder(Autoencoder):
    def __init__(self, shapes):
        self.shapes = shapes
        super().__init__()
        
    @abstractmethod
    def build_encoder(self):
        encoder_layers = []

        for idx in (range(len(self.shapes) - 1)):
            encoder_layers.append(nn.Linear(self.shapes[idx], self.shapes[idx+1]))
            encoder_layers.append(nn.ReLU())

        encoder = nn.Sequential(*encoder_layers[:-1])
        return encoder # #-> :-1 -> remove last Relu
    
    @abstractmethod
    def build_decoder(self):
        decoder_layers = []

        for idx in reversed(range(len(self.shapes) - 1)):
            decoder_layers.append(nn.Linear(self.shapes[idx+1], self.shapes[idx]))
            decoder_layers.append(nn.ReLU())

        decoder_layers[-1] = nn.Sigmoid() #-> change depending on output value-range
        decoder = nn.Sequential(*decoder_layers)
        
        return decoder
    
class Conv_AutoEncoder(Autoencoder):
    def __init__(self):        
        super().__init__()
    
    @abstractmethod
    def build_encoder(self):
        pass
    
    @abstractmethod
    def build_decoder(self):
        pass

In [20]:
encoder = [nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2)]

In [27]:
decoder = [x for x in reversed(encoder)]
decoder = decoder[2:]
decoder

[Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False),
 ReLU(inplace=True),
 Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)),
 MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False),
 ReLU(inplace=True),
 Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))]

In [16]:
model = Linear_AutoEncoder([3*224*224, 112, 48])
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)

In [17]:
from tqdm.auto import tqdm

In [18]:
num_epochs = 10

for epoch in range(num_epochs):
    for img in tqdm(dataloader):
        img = img.reshape(-1, 3*224*224)
        prediction = model(img)
        loss = criterion(prediction, img)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print(f"Epoch:{epoch+1}, Loss: {loss.item():.4f}")
    

  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:1, Loss: 0.3182


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:2, Loss: 0.2511


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:3, Loss: 0.2350


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:4, Loss: 0.1525


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:5, Loss: 0.1302


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:6, Loss: 0.1056


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:7, Loss: 0.1087


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:8, Loss: 0.1024


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:9, Loss: 0.0837


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:10, Loss: 0.0844
