## Creating an image decoder (Classifying)

In [1]:
import numpy as np
import torch
import os
import cv2

import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("Using CUDA.")
else:
    device = torch.device("cpu")
    print("Using CPU.")

Using CUDA.


In [3]:
# Import data
folder_path = 'video_frames/'

image_paths = os.listdir(folder_path)
image_paths = [folder_path + img_path for img_path in image_paths]

In [4]:
# Prepare data
y = []
for img in image_paths:
    y.append( cv2.imread(img, cv2.IMREAD_GRAYSCALE) )

y = np.array(y)
X = np.arange(y.shape[0])

X = torch.tensor(X)
#X = nn.functional.one_hot(X, num_classes=y.shape[0]).float()
y = torch.tensor(y).float()
y = y/255 # ensures that values scale between 0 and 1.

In [5]:
num_classes = y.shape[0]
output_dimensions = y[0].shape
print(f"Num classes: {num_classes}")
print(f"Output dimensions: {output_dimensions}")

Num classes: 6572
Output dimensions: torch.Size([180, 240])


### Fully connected NN: 

In [6]:
"""
class Conv_Decoder(nn.Module):
    def __init__(self, latent_dim=1, num_classes=num_classes):
        super().__init__()
        self.latent_dim = latent_dim
        self.num_classes = num_classes

        # FC layer for feature mapping
        self.fc = nn.Linear(num_classes, latent_dim * 3 * 4)

        # Deconvolution layers
        self.conv_1 = nn.ConvTranspose2d(latent_dim, 10,
                           kernel_size=5, stride=5, padding=0)
        self.conv_2 = nn.ConvTranspose2d(10, 10, 
                           kernel_size=3, stride=3, padding=0)
        self.conv_3 = nn.ConvTranspose2d(10, 10,
                           kernel_size=2, stride=2, padding=0)
        self.conv_4 = nn.ConvTranspose2d(10, num_classes,
                           kernel_size=2, stride=2, padding=0)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = nn.functional.one_hot(x, num_classes=self.num_classes).float()
        x = self.fc(x)
        x = x.view(self.latent_dim, 3, 4)
        print(f"Shape after conv0: {x.shape}")
        x = self.conv_1(x)
        x = self.relu(x)
        print(f"Shape after conv1: {x.shape}")
        
        x = self.conv_2(x)
        x = self.relu(x)
        print(f"Shape after conv2: {x.shape}")
        
        x = self.conv_3(x)
        x = self.relu(x)
        print(f"Shape after conv3: {x.shape}")
        
        x = self.conv_4(x)
        x = self.sigmoid(x)
        print(f"Shape after conv4: {x.shape}")
        
        return x
""";

In [7]:
class Conv_Decoder(nn.Module):
    def __init__(self, latent_dim=1, num_classes=num_classes):
        super().__init__()
        self.latent_dim = latent_dim
        self.num_classes = num_classes

        # FC layer for feature mapping
        self.fc = nn.Linear(num_classes, latent_dim * 3 * 4)

        # Deconvolution layers
        self.deconv_block = nn.Sequential(
            nn.ConvTranspose2d(latent_dim, 100, 
                               kernel_size=5, stride=5, padding=0),
            nn.ReLU(),
            nn.ConvTranspose2d(100, 100, 
                               kernel_size=3, stride=3, padding=0),
            nn.ReLU(),
            nn.ConvTranspose2d(100, 100,
                               kernel_size=2, stride=2, padding=0),
            nn.ReLU(),
            nn.ConvTranspose2d(100, 1,
                               kernel_size=2, stride=2, padding=0),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = nn.functional.one_hot(x, num_classes=self.num_classes).float()
        x = self.fc(x)
        x = x.view(-1, self.latent_dim, 3, 4)
        x = self.deconv_block(x)
        x = x.view(-1, 180, 240)
        return x

In [8]:
"""
X1 = nn.functional.one_hot(X[5], num_classes=num_classes).float()
linear_ = nn.Linear(num_classes, latent_dim * 3 * 4)
X2 = linear_(X1)
X3 = X2.view(-1, latent_dim, 3, 4)
X3.shape
""";

In [9]:
model = Conv_Decoder()
print(model)

Conv_Decoder(
  (fc): Linear(in_features=6572, out_features=12, bias=True)
  (deconv_block): Sequential(
    (0): ConvTranspose2d(1, 100, kernel_size=(5, 5), stride=(5, 5))
    (1): ReLU()
    (2): ConvTranspose2d(100, 100, kernel_size=(3, 3), stride=(3, 3))
    (3): ReLU()
    (4): ConvTranspose2d(100, 100, kernel_size=(2, 2), stride=(2, 2))
    (5): ReLU()
    (6): ConvTranspose2d(100, 1, kernel_size=(2, 2), stride=(2, 2))
    (7): Sigmoid()
  )
)


In [35]:
# Hyperparameters
decay_1 = 0.9 # Decay of moving average of gradient
decay_2 = 0.99 # Decay of moving average of squared gradient

lr = 0.00001
weight_decay = 0.0000000004

lr_decay_rate = 0.98

In [34]:
# Loss function, Optimizer and Scheduler
criterion = torch.nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), 
                              betas = (decay_1, decay_2),
                              lr=lr, 
                              weight_decay=weight_decay)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=lr_decay_rate)

In [12]:
model.to(device)
X = X.to(device)
y = y.to(device)

In [13]:
model_device = next(model.parameters()).device
inputs_device = X.device
labels_device = y.device

print(model_device)
print(inputs_device)
print(labels_device)

cuda:0
cuda:0
cuda:0


In [14]:
y[2000].shape

torch.Size([180, 240])

In [15]:
model.forward(torch.tensor(0, dtype=torch.long).to(device))[0].shape

torch.Size([180, 240])

In [16]:
dataloader = DataLoader(TensorDataset(X, y), batch_size=200, shuffle=True) # both train and test (deliberate overfit)

num_epochs = 100
verbose = True

In [36]:
# Training loop
loss_values = []
for epoch in range(num_epochs):
    running_loss = 0.0
    model.train() # Set to train mode
    for inputs, labels in dataloader:
        
        # Zero the gradients
        optimizer.zero_grad()
        
        # Feed-forward
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backprop
        loss.backward()
        optimizer.step()
        
        # Add loss
        batch_loss = loss.item()
        running_loss += batch_loss
        loss_values.append(batch_loss)
    
    # Average loss for the epoch
    avg_loss = running_loss / len(dataloader)

    # Update the learning rate at the end of each epoch
    scheduler.step()
    
    if verbose == True:
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.6f}")

Epoch 1/100, Loss: 0.044592
Epoch 2/100, Loss: 0.044397
Epoch 3/100, Loss: 0.044275
Epoch 4/100, Loss: 0.044185
Epoch 5/100, Loss: 0.044108
Epoch 6/100, Loss: 0.044031
Epoch 7/100, Loss: 0.043989
Epoch 8/100, Loss: 0.043951
Epoch 9/100, Loss: 0.043918
Epoch 10/100, Loss: 0.043883
Epoch 11/100, Loss: 0.043871
Epoch 12/100, Loss: 0.043850
Epoch 13/100, Loss: 0.043835
Epoch 14/100, Loss: 0.043802
Epoch 15/100, Loss: 0.043793
Epoch 16/100, Loss: 0.043801
Epoch 17/100, Loss: 0.043773
Epoch 18/100, Loss: 0.043769
Epoch 19/100, Loss: 0.043768
Epoch 20/100, Loss: 0.043768
Epoch 21/100, Loss: 0.043759
Epoch 22/100, Loss: 0.043739
Epoch 23/100, Loss: 0.043745
Epoch 24/100, Loss: 0.043758
Epoch 25/100, Loss: 0.043730
Epoch 26/100, Loss: 0.043734
Epoch 27/100, Loss: 0.043718
Epoch 28/100, Loss: 0.043729
Epoch 29/100, Loss: 0.043704
Epoch 30/100, Loss: 0.043714
Epoch 31/100, Loss: 0.043722
Epoch 32/100, Loss: 0.043718
Epoch 33/100, Loss: 0.043708
Epoch 34/100, Loss: 0.043716
Epoch 35/100, Loss: 0.0

### Model is too small to fully learn the images. I will freeze current weights and add another layer.

In [37]:
model

Conv_Decoder(
  (fc): Linear(in_features=6572, out_features=12, bias=True)
  (deconv_block): Sequential(
    (0): ConvTranspose2d(1, 100, kernel_size=(5, 5), stride=(5, 5))
    (1): ReLU()
    (2): ConvTranspose2d(100, 100, kernel_size=(3, 3), stride=(3, 3))
    (3): ReLU()
    (4): ConvTranspose2d(100, 100, kernel_size=(2, 2), stride=(2, 2))
    (5): ReLU()
    (6): ConvTranspose2d(100, 1, kernel_size=(2, 2), stride=(2, 2))
    (7): Sigmoid()
  )
)

In [41]:
# Freezing params

<generator object Module.parameters at 0x00000237829F5A80>

In [None]:
total

In [None]:
print(inputs)

In [None]:
inputs.to(device)

In [None]:
inputs_device = inputs.device

In [None]:
print(inputs_device)

In [None]:
num_params = sum(p.numel() for p in model.parameters())
model_size_mb = num_params * 4 / 1e6

print(f"Number of parameters: {num_params}")
print(f"Model size: {model_size_mb:.2f} MB")

In [None]:
del model
del inputs
del labels

torch.cuda.empty_cache()