# This notebook contains code for training and feature vectors calculating using convolutional autoencoder. Vectors produced by recurrent ae show better results, so we didn't use this vectors in submission

In [1]:
from PIL import Image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle

In [2]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, dataloader
from statistics import mean
import visdom

In [4]:
class ImageDataset(Dataset):
    def __init__(self):
        super().__init__()
        self.filenames = [s.split('.')[0] for s in pd.read_csv('./metrics.csv')['Case'].tolist()]
        self.images = []
        
        for filename in self.filenames:
            origin_img = np.array(Image.open(f'./Dataset/Origin/{filename}.png').resize((16, 16)))
            if len(origin_img.shape) == 3:
                origin_img = origin_img[:, :, 0]
            self.images.append(
                (
                    np.stack([origin_img,
                              np.array(Image.open(f'./Dataset/Expert/{filename}_expert.png').resize((16, 16)))]) / 255,
                    np.stack([origin_img,
                              np.array(Image.open(f'./Dataset/sample_1/{filename}_s1.png').resize((16, 16)))]) / 255, 
                    np.stack([origin_img,
                              np.array(Image.open(f'./Dataset/sample_2/{filename}_s2.png').resize((16, 16)))]) / 255, 
                    np.stack([origin_img,
                              np.array(Image.open(f'./Dataset/sample_3/{filename}_s3.png').resize((16, 16)))]) / 255,
                )
            )
    
    def __len__(self):
        return len(self.filenames) * 4
    
    def __getitem__(self, idx):
        filename = self.filenames[idx % len(self.filenames)]
        image = self.images[idx % len(self.filenames)][idx // len(self.filenames)]
        return (filename, image)

In [5]:
class CNNAutoencoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels=2, out_channels=4, kernel_size=3, padding=1), # 16x16
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # 8x8
            nn.Conv2d(in_channels=4, out_channels=4, kernel_size=3, padding=1), # 16x16
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # 4x4
        )
        
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(in_channels=4, out_channels=4, kernel_size=2, stride=2), # 8x8
            nn.ConvTranspose2d(in_channels=4, out_channels=2, kernel_size=2, stride=2), # 16x16
#             nn.Sigmoid()
        )
        
    def encode(self, x):
        r'''
        x.size() == [bs, 2, 16, 16]
        '''
        return self.encoder(x)
    
    def decode(self, h):
        return self.decoder(h)
    
    def forward(self, x):
        encoded = self.encode(x)
        decoded = self.decode(encoded)
        return decoded

# Training

In [6]:
train_dataset = ImageDataset()
train_loader = dataloader.DataLoader(train_dataset, batch_size=4, shuffle=True)

In [8]:
model_ae = CNNAutoencoder().cuda()
model_opt = torch.optim.Adam(model_ae.parameters())

In [9]:
vis = visdom.Visdom(env='Cnn autoencoder train (1)')
step = 0

Setting up a new session...


In [None]:
model_ae.load_state_dict(torch.load('./cnn_ae_model.pth'))
model_opt.load_state_dict(torch.load('./cnn_ae_model_opt.pth'))

In [10]:
losses = []
for epoch in range(300):
    for batch in train_loader:
        _, images = batch
        images = images.float().cuda()
        model_opt.zero_grad()
        prediction = model_ae(images)
        loss = nn.functional.binary_cross_entropy_with_logits(prediction, images, reduction='sum')
        loss.backward()
        model_opt.step()

        losses.append(loss.cpu().detach().item() / images.size(0))

        if step % 20 == 0:
            vis.line(X=[step], Y=[mean(losses)], update='append', name='total loss', win='losses')
            losses = []

        step += 1

In [None]:
torch.save(model_ae.state_dict(), './cnn_ae_model.pth')
torch.save(model_opt.state_dict(), './cnn_ae_model_opt.pth')

# Inference

In [19]:
rev_idx = {fn: i for i, fn in enumerate(ds.filenames)}

In [20]:
marks = pd.read_csv('./Dataset/OpenPart.csv')
marks.head()

Unnamed: 0,Case,Sample 1,Sample 2,Sample 3
0,00000072_000.png,1,5,1
1,00000150_002.png,5,5,3
2,00000181_061.png,4,4,3
3,00000211_019.png,4,4,2
4,00000211_041.png,3,5,2


In [22]:
vectors = dict()

with torch.no_grad():
    for i, row in marks.iterrows():
        fname = row['Case'].split('.')[0]

        _, pred_exp = ds[rev_idx[fname]]
        _, pred_1 = ds[rev_idx[fname] + len(ds.filenames)]
        _, pred_2 = ds[rev_idx[fname] + 2 * len(ds.filenames)]
        _, pred_3 = ds[rev_idx[fname] + 3 * len(ds.filenames)]

        feature_exp = model_ae.encode(torch.tensor(pred_exp).unsqueeze(0).float().cuda()).cpu().view(1, -1).numpy()
        feature_1 = model_ae.encode(torch.tensor(pred_1).unsqueeze(0).float().cuda()).cpu().view(1, -1).numpy()
        feature_2 = model_ae.encode(torch.tensor(pred_2).unsqueeze(0).float().cuda()).cpu().view(1, -1).numpy()
        feature_3 = model_ae.encode(torch.tensor(pred_3).unsqueeze(0).float().cuda()).cpu().view(1, -1).numpy()
        
        vectors[fname] = [feature_exp, feature_1, feature_2, feature_3]

In [23]:
with open('./cnn_vectors.pickle', 'wb') as vec_file:
    pickle.dump(vectors, vec_file)