## Creating an image decoder (Classifying)

In [1]:
import numpy as np
import torch
import os
import cv2

import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("Using CUDA.")
else:
    device = torch.device("cpu")
    print("Using CPU.")

Using CUDA.


In [3]:
# Import data
folder_path = 'video_frames/'

image_paths = os.listdir(folder_path)
image_paths = [folder_path + img_path for img_path in image_paths]

In [4]:
# Prepare data
y = []
for img in image_paths:
    y.append( cv2.imread(img, cv2.IMREAD_GRAYSCALE) )

y = np.array(y)
X = np.arange(y.shape[0])

X = torch.tensor(X)
#X = nn.functional.one_hot(X, num_classes=y.shape[0]).float()
y = torch.tensor(y)

In [5]:
num_classes = y.shape[0]
output_dimensions = y[0].shape
print(f"Num classes: {num_classes}")
print(f"Output dimensions: {output_dimensions}")

Num classes: 6572
Output dimensions: torch.Size([180, 240])


### Fully connected NN: 

In [56]:

class Conv_Decoder(nn.Module):
    def __init__(self, latent_dim=1, num_classes=num_classes):
        super().__init__()
        self.latent_dim = latent_dim
        self.num_classes = num_classes

        # FC layer for feature mapping
        self.fc = nn.Linear(num_classes, latent_dim * 3 * 4)

        # Deconvolution layers
        self.conv_1 = nn.ConvTranspose2d(latent_dim, 10,
                           kernel_size=5, stride=5, padding=0)
        self.conv_2 = nn.ConvTranspose2d(10, 10, 
                           kernel_size=3, stride=3, padding=0)
        self.conv_3 = nn.ConvTranspose2d(10, 10,
                           kernel_size=2, stride=2, padding=0)
        self.conv_4 = nn.ConvTranspose2d(10, num_classes,
                           kernel_size=2, stride=2, padding=0)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = nn.functional.one_hot(x, num_classes=self.num_classes).float()
        x = self.fc(x)
        x = x.view(self.latent_dim, 3, 4)
        print(f"Shape after conv0: {x.shape}")
        x = self.conv_1(x)
        x = self.relu(x)
        print(f"Shape after conv1: {x.shape}")
        
        x = self.conv_2(x)
        x = self.relu(x)
        print(f"Shape after conv2: {x.shape}")
        
        x = self.conv_3(x)
        x = self.relu(x)
        print(f"Shape after conv3: {x.shape}")
        
        x = self.conv_4(x)
        x = self.sigmoid(x)
        print(f"Shape after conv4: {x.shape}")
        
        return x


In [57]:
"""
class Conv_Decoder(nn.Module):
    def __init__(self, latent_dim=1, num_classes=num_classes):
        super().__init__()
        self.latent_dim = latent_dim
        self.num_classes = num_classes

        # FC layer for feature mapping
        self.fc = nn.Linear(num_classes, latent_dim * 3 * 4)

        # Deconvolution layers
        self.deconv_block = nn.Sequential(
            nn.ConvTranspose2d(latent_dim, 10, 
                               kernel_size=5, stride=3, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(10, 10, 
                               kernel_size=5, stride=3, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(10, 10,
                               kernel_size=5, stride=3, padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(10, 1,
                               kernel_size=1, stride=2, padding=0),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = nn.functional.one_hot(x, num_classes=self.num_classes).float()
        x = self.fc(x)
        x = x.view(-1, self.latent_dim, 3, 4)
        x = self.deconv_block(x)
        return x
"""

'\nclass Conv_Decoder(nn.Module):\n    def __init__(self, latent_dim=1, num_classes=num_classes):\n        super().__init__()\n        self.latent_dim = latent_dim\n        self.num_classes = num_classes\n\n        # FC layer for feature mapping\n        self.fc = nn.Linear(num_classes, latent_dim * 3 * 4)\n\n        # Deconvolution layers\n        self.deconv_block = nn.Sequential(\n            nn.ConvTranspose2d(latent_dim, 10, \n                               kernel_size=5, stride=3, padding=1),\n            nn.ReLU(),\n            nn.ConvTranspose2d(10, 10, \n                               kernel_size=5, stride=3, padding=1),\n            nn.ReLU(),\n            nn.ConvTranspose2d(10, 10,\n                               kernel_size=5, stride=3, padding=1),\n            nn.ReLU(),\n            nn.ConvTranspose2d(10, 1,\n                               kernel_size=1, stride=2, padding=0),\n            nn.Sigmoid()\n        )\n\n    def forward(self, x):\n        x = nn.functional.one_

In [58]:
"""
X1 = nn.functional.one_hot(X[5], num_classes=num_classes).float()
linear_ = nn.Linear(num_classes, latent_dim * 3 * 4)
X2 = linear_(X1)
X3 = X2.view(-1, latent_dim, 3, 4)
X3.shape
"""

'\nX1 = nn.functional.one_hot(X[5], num_classes=num_classes).float()\nlinear_ = nn.Linear(num_classes, latent_dim * 3 * 4)\nX2 = linear_(X1)\nX3 = X2.view(-1, latent_dim, 3, 4)\nX3.shape\n'

In [59]:
model = Conv_Decoder()
print(model)

Conv_Decoder(
  (fc): Linear(in_features=6572, out_features=12, bias=True)
  (conv_1): ConvTranspose2d(1, 10, kernel_size=(5, 5), stride=(5, 5))
  (conv_2): ConvTranspose2d(10, 10, kernel_size=(3, 3), stride=(3, 3))
  (conv_3): ConvTranspose2d(10, 10, kernel_size=(2, 2), stride=(2, 2))
  (conv_4): ConvTranspose2d(10, 6572, kernel_size=(2, 2), stride=(2, 2))
  (relu): ReLU()
  (sigmoid): Sigmoid()
)


In [60]:
model.forward(torch.tensor(0, dtype=torch.long))

Shape after conv0: torch.Size([1, 3, 4])
Shape after conv1: torch.Size([10, 15, 20])
Shape after conv2: torch.Size([10, 45, 60])
Shape after conv3: torch.Size([10, 90, 120])
Shape after conv4: torch.Size([6572, 180, 240])


tensor([[[0.4986, 0.4988, 0.4985,  ..., 0.4988, 0.4985, 0.4988],
         [0.4984, 0.4984, 0.4984,  ..., 0.4984, 0.4984, 0.4984],
         [0.4985, 0.4988, 0.4985,  ..., 0.4988, 0.4985, 0.4988],
         ...,
         [0.4984, 0.4984, 0.4984,  ..., 0.4984, 0.4984, 0.4984],
         [0.4985, 0.4988, 0.4985,  ..., 0.4988, 0.4985, 0.4988],
         [0.4984, 0.4984, 0.4984,  ..., 0.4984, 0.4984, 0.4984]],

        [[0.5006, 0.5005, 0.5006,  ..., 0.5005, 0.5006, 0.5005],
         [0.5004, 0.5006, 0.5004,  ..., 0.5006, 0.5004, 0.5006],
         [0.5006, 0.5005, 0.5006,  ..., 0.5005, 0.5006, 0.5005],
         ...,
         [0.5004, 0.5006, 0.5004,  ..., 0.5006, 0.5004, 0.5006],
         [0.5006, 0.5005, 0.5006,  ..., 0.5005, 0.5006, 0.5005],
         [0.5004, 0.5006, 0.5004,  ..., 0.5006, 0.5004, 0.5006]],

        [[0.4990, 0.4988, 0.4990,  ..., 0.4988, 0.4990, 0.4988],
         [0.4990, 0.4987, 0.4990,  ..., 0.4987, 0.4990, 0.4987],
         [0.4990, 0.4988, 0.4990,  ..., 0.4988, 0.4990, 0.

In [11]:
"""
# Hyperparameters
decay_1 = 0.9 # Decay of moving average of gradient
decay_2 = 0.99 # Decay of moving average of squared gradient

lr = 0.0001
weight_decay = 0.00004

lr_decay_rate = 0.98
"""

'\n# Hyperparameters\ndecay_1 = 0.9 # Decay of moving average of gradient\ndecay_2 = 0.99 # Decay of moving average of squared gradient\n\nlr = 0.0001\nweight_decay = 0.00004\n\nlr_decay_rate = 0.98\n'

In [12]:
"""
# Loss function, Optimizer and Scheduler
criterion = torch.nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), 
                              betas = (decay_1, decay_2),
                              lr=lr, 
                              weight_decay=weight_decay)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=lr_decay_rate)
"""

'\n# Loss function, Optimizer and Scheduler\ncriterion = torch.nn.MSELoss()\noptimizer = torch.optim.AdamW(model.parameters(), \n                              betas = (decay_1, decay_2),\n                              lr=lr, \n                              weight_decay=weight_decay)\nscheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=lr_decay_rate)\n'

In [13]:
# Loss function, Optimizer and Scheduler
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01, momentum = 0.9)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)

In [14]:
model.to(device)
X = X.to(device)
y = y.to(device)

In [15]:
model_device = next(model.parameters()).device
inputs_device = X.device
labels_device = y.device

print(model_device)
print(inputs_device)
print(labels_device)

cuda:0
cuda:0
cuda:0


In [16]:
dataloader = DataLoader(TensorDataset(X, y), batch_size=1, shuffle=True) # both train and test (deliberate overfit)

num_epochs = 10
verbose = True

In [17]:
# Training loop
for epoch in range(num_epochs):
    running_loss = 0.0
    model.train() # Set to train mode
    for inputs, labels in dataloader:
        
        # Zero the gradients
        optimizer.zero_grad()
        
        # Feed-forward
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backprop
        loss.backward()
        optimizer.step()
        
        # Add loss
        batch_loss = loss.item()
        running_loss += batch_loss
        loss_values.append(batch_loss)
    
    # Average loss for the epoch
    avg_loss = running_loss / len(dataloader)

    # Training accuracy
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)  # Get predicted class
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    train_acc = correct / total

    # Validation accuracy
    model.eval()  # Eval mode
    correct = 0
    total = 0
    with torch.no_grad(): #No gradient
        for inputs, labels in dataloader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    val_acc = correct / total
    
    # Update the learning rate at the end of each epoch
    scheduler.step()
    
    if verbose == True:
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.6f}")
        print(f"Train accuracy: {train_acc}, Validation accuracy: {val_acc}")

    if early_stopping==True and val_acc > 0.999:
        print("Achieved acceptable accuracy. Stopping early.")
        break

  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: The size of tensor a (215) must match the size of tensor b (240) at non-singleton dimension 3

In [None]:
print(inputs)

In [None]:
inputs.to(device)

In [None]:
inputs_device = inputs.device

In [None]:
print(inputs_device)

In [None]:
num_params = sum(p.numel() for p in model.parameters())
model_size_mb = num_params * 4 / 1e6

print(f"Number of parameters: {num_params}")
print(f"Model size: {model_size_mb:.2f} MB")

In [61]:
del model
del inputs
del labels

torch.cuda.empty_cache()