In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import cv2
import torch
import torch.functional as F
import torch.nn as nn
from time import time, sleep

from utils import *
from custom_pca import custom_pca
from video_loader import VideoLoader

### Example of video transformation

In [None]:
model = custom_pca()

video = VideoLoader('data/sample20s.mp4', grayscale=True)
t1 = time()
frames_rand = video.get_random_frames(0.6)
model.fit(frames_rand)
t2 = time()
reconstructed = []
for j, frames in enumerate(video):
    reconstructed.append(model.inverse_transform(model.transform(frames), shape=(video.height, video.width)))
reconstructed = np.vstack(reconstructed)
t3 = time()
print(reconstruction_error(video.get_all_frames(), reconstructed))
t4 = time()

print('Fitting time:', t2-t1)
print('Transform:', t3-t2)
print('Error calculation:', t4-t3)

### Dimensionality reduction using autoencoders

In [5]:
class BasicAutoEncoder(nn.Module):
    def __init__(self, inchannels, ncomp):
        super().__init__()
        
        # After that: B x 16 x 13 x 13
        self.transform_convs = nn.Sequential(nn.Conv2d(inchannels, 32, kernel_size=4), nn.ReLU(), # 253
                                            nn.Conv2d(32, 32, kernel_size=5, stride=2), nn.ReLU(), # 125
                                            nn.Conv2d(32, 32, kernel_size=5, stride=2), nn.ReLU(), # 61
                                            nn.Conv2d(32, 32, kernel_size=5), nn.ReLU(), # 57
                                            nn.Conv2d(32, 16, kernel_size=3, stride=2), nn.ReLU(), # 28
                                            nn.Conv2d(16, 16, kernel_size=4, stride=2), nn.ReLU()) # 13
        
        """
            to_lower_dim: map each sample from 16 x 12 x 12 to ncomp dimensions
            from_lower_dim: does the inverse mapping
        """
        self.ncomp = ncomp
        if ncomp == 200:
            self.to_lower_dim = nn.Sequential(nn.Conv2d(16, 8, kernel_size=5, stride=2), nn.ReLU())
            self.from_lower_dim = nn.Sequential(nn.ConvTranspose2d(8, 16, kernel_size=2, stride=2), nn.ReLU())
        elif ncomp == 150:
            self.to_lower_dim = nn.Sequential(nn.Conv2d(16, 6, kernel_size=5, stride=2), nn.ReLU())
            self.from_lower_dim = nn.Sequential(nn.ConvTranspose2d(6, 16, kernel_size=5, stride=2), nn.ReLU())
        elif ncomp == 100:
            self.to_lower_dim = nn.Sequential(nn.Conv2d(16, 4, kernel_size=5, stride=2), nn.ReLU())
            self.from_lower_dim = nn.Sequential(nn.ConvTranspose2d(4, 16, kernel_size=5, stride=2), nn.ReLU())
        elif ncomp == 50:
            self.to_lower_dim = nn.Sequential(nn.Conv2d(16, 2, kernel_size=5, stride=2), nn.ReLU())
            self.from_lower_dim = nn.Sequential(nn.ConvTranspose2d(2, 16, kernel_size=5, stride=2), nn.ReLU())
        elif ncomp == 25:
            self.to_lower_dim = nn.Sequential(nn.Conv2d(16, 1, kernel_size=5, stride=2), nn.ReLU())
            self.from_lower_dim = nn.Sequential(nn.ConvTranspose2d(1, 16, kernel_size=5, stride=2), nn.ReLU())
        elif ncomp == 16:
            self.to_lower_dim = nn.Sequential(nn.Conv2d(16, 1, kernel_size=7, stride=2), nn.ReLU())
            self.from_lower_dim = nn.Sequential(nn.ConvTranspose2d(1, 16, kernel_size=7, stride=2), nn.ReLU())
        else:
            print('The lower dimension must be one of:', self.ncomps())
            return
            
        # Inverse of the transform's convolutions
        self.inv_transform_convs = nn.Sequential(nn.ConvTranspose2d(16, 16, kernel_size=4, stride=2), nn.ReLU(),
                                                 nn.ConvTranspose2d(16, 32, kernel_size=3, stride=2), nn.ReLU(),
                                                 nn.ConvTranspose2d(32, 32, kernel_size=5), nn.ReLU(),
                                                 nn.ConvTranspose2d(32, 32, kernel_size=5, stride=2), nn.ReLU(),
                                                 nn.ConvTranspose2d(32, 32, kernel_size=5, stride=2), nn.ReLU(),
                                                 nn.ConvTranspose2d(32, 32, kernel_size=4), nn.ReLU())
        
    def transform(self, x):
        x = self.transform_convs(x)
        x = self.to_lower_dim(x)
        
        return x.view(x.shape[0], -1), x.shape
    
    def inverse_transform(self, x, shape):
        x = x.view(shape)
        x = self.from_lower_dim(x)
        x = self.inv_transform_convs(x)
        
        return x
    
    @staticmethod
    def ncomps():
        return [16, 25, 50, 100, 150, 200]
    
x = torch.normal(0, 5, (64, 3, 256, 256))
for ncomp in BasicAutoEncoder.ncomps():
    network = BasicAutoEncoder(3, ncomp)
    x_low, x_shape = network.transform(x)
    print(x_low.shape)
    x_reconstructed = network.inverse_transform(x_low, x_shape)
    print(x_reconstructed.shape)
print(f'Number of parameters for {network.ncomp}:', sum([p.numel() for p in network.parameters()]))

torch.Size([64, 16])
torch.Size([64, 32, 256, 256])
torch.Size([64, 25])
torch.Size([64, 32, 256, 256])
torch.Size([64, 50])
torch.Size([64, 32, 256, 256])
torch.Size([64, 100])
torch.Size([64, 32, 256, 256])
torch.Size([64, 150])
torch.Size([64, 32, 256, 256])
torch.Size([64, 200])
torch.Size([64, 32, 208, 208])
Number of parameters for 200: 193000
