In [4]:
import os
import shutil
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import datasets, models, transforms
from torchvision.utils import save_image
from torch.utils.data import Dataset, DataLoader

print(torch.cuda.is_available())

True


In [5]:
from torch.utils.tensorboard import SummaryWriter

2024-06-06 22:08:55.064984: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Loading Datasets

loading from `/home/nfs/inf6/data/datasets/kth_actions`

In [6]:
import os
from glob import glob

# Define the split for training and eval sets
train_persons = {'person11', 'person12', 'person13', 'person14', 'person15', 'person16', 'person17', 'person02', 'person03', 'person05', 'person06', 'person07', 'person08', 'person09', 'person10', 'person18'}
val_persons = {'person19', 'person20', 'person21', 'person23', 'person24', 'person25', 'person01', 'person04'}

def get_sequences_with_labels(base_dir, persons):
    sequences = []
    labels = []
    classes = os.listdir(base_dir)
    class_to_idx = {cls_name: idx for idx, cls_name in enumerate(classes)}

    for cls in classes:
        for person in persons:
            person_sequences = glob(os.path.join(base_dir, cls, f'{person}*'))
            sequences.extend(person_sequences)
            labels.extend([class_to_idx[cls]] * len(person_sequences))
    
    return sequences, labels

base_dir = '/home/nfs/inf6/data/datasets/kth_actions/processed'
train_sequences, train_labels = get_sequences_with_labels(base_dir, train_persons)
val_sequences, val_labels = get_sequences_with_labels(base_dir, val_persons)


In [7]:
import cv2
import numpy as np

def load_frames(sequence_path):
    frame_files = sorted(glob(os.path.join(sequence_path, '*.png')))
    frames = [cv2.imread(frame_file) for frame_file in frame_files]
    frames = [cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in frames]  # Convert to RGB
    frames = [frame / 255.0 for frame in frames]  # Normalize
    return frames

def create_subsequences(frames, subsequence_length=10):
    subsequences = []
    for i in range(len(frames) - subsequence_length + 1):
        subsequences.append(frames[i:i + subsequence_length])
    return subsequences

sequence_path = train_sequences[0]
frames = load_frames(sequence_path)
subsequences = create_subsequences(frames)


In [9]:
class VideoDataset(Dataset):
    def __init__(self, sequences, labels, transform=None, subsequence_length=13):
        self.sequences = sequences
        self.labels = labels
        self.transform = transform
        self.subsequence_length = subsequence_length
        self.data = self.load_data()

    def load_data(self):
        data = []
        for sequence, label in zip(self.sequences, self.labels):
            frames = load_frames(sequence)
            subsequences = create_subsequences(frames, self.subsequence_length)
            for subsequence in subsequences:
                data.append((subsequence, label))
        return data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        subsequence, label = self.data[idx]
        if self.transform:
            subsequence = [self.transform(frame) for frame in subsequence]
        subsequence = torch.stack([frame for frame in subsequence])  # Convert to CxHxW
        label = torch.tensor(label)
        return subsequence, label

# Define transforms if needed
data_transforms = {
    'train': transforms.Compose([
        transforms.ToPILImage(),
        # transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.ToPILImage(),
        # transforms.Resize(256),
        # transforms.CenterCrop(224),
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Create dataset
train_dataset = VideoDataset(train_sequences, train_labels, transform=data_transforms['train'])
val_dataset = VideoDataset(val_sequences, val_labels, transform=data_transforms['val'])



Setting up dataloader

In [11]:
BATCH_SIZE = 8

In [12]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers = 4, pin_memory=True)
test_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers = 4, pin_memory=True)

In [103]:
train_dataset[0][0].size()
first_batch = next(iter(train_loader))

# Extract the first item in the batch
first_item = first_batch[0] # first_batch[0] is the data, first_batch[1] is the labels

# Print the shape of the first item
print("Shape of the first item in the first batch:", first_item.shape)

Shape of the first item in the first batch: torch.Size([8, 13, 3, 32, 32])


## Custom LSTM  
Using referred source as reference to build the LSTM cell. The reset parameters functions is required to get the correct distribution for the tensors, which prevents vanishing/exploding gradients or unwanted outcomes in the weights and biases.

Reference: https://github.com/piEsposito/pytorch-lstm-by-hand/blob/master/LSTM.ipynb - By Piero Esposito

#### Custom LSTM Cell

In [104]:
class CustomLSTMCell(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(CustomLSTMCell, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size

        # Forget gate weights
        self.W_f = nn.Parameter(torch.Tensor(input_size + hidden_size, hidden_size))
        self.b_f = nn.Parameter(torch.Tensor(hidden_size))

        # Input gate weights
        self.W_i = nn.Parameter(torch.Tensor(input_size + hidden_size, hidden_size))
        self.b_i = nn.Parameter(torch.Tensor(hidden_size))

        # Candidate cell state weights
        self.W_c = nn.Parameter(torch.Tensor(input_size + hidden_size, hidden_size))
        self.b_c = nn.Parameter(torch.Tensor(hidden_size))

        # Output gate weights
        self.W_o = nn.Parameter(torch.Tensor(input_size + hidden_size, hidden_size))
        self.b_o = nn.Parameter(torch.Tensor(hidden_size))

        self.reset_parameters()

    def reset_parameters(self):
        nn.init.kaiming_uniform_(self.W_f, a=0.01)
        nn.init.kaiming_uniform_(self.W_i, a=0.01)
        nn.init.kaiming_uniform_(self.W_c, a=0.01)
        nn.init.kaiming_uniform_(self.W_o, a=0.01)
        nn.init.constant_(self.b_f, 0)
        nn.init.constant_(self.b_i, 0)
        nn.init.constant_(self.b_c, 0)
        nn.init.constant_(self.b_o, 0)

    def forward(self, input, hx):
        # if mode == "zeroes":
        #     hx = (torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device),
        #           torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device))
        # elif mode == "random":
        #     hx = (torch.rand(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device),
        #           torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device))


        h_prev, c_prev = hx
        combined = torch.cat((input, h_prev), dim=1)

        # Forget gate
        f = torch.sigmoid(torch.matmul(combined, self.W_f) + self.b_f)

        # Input gate
        i = torch.sigmoid(torch.matmul(combined, self.W_i) + self.b_i)

        # Candidate cell state
        c_hat = torch.tanh(torch.matmul(combined, self.W_c) + self.b_c)

        # Updated cell state
        c = f * c_prev + i * c_hat

        # Output gate
        o = torch.sigmoid(torch.matmul(combined, self.W_o) + self.b_o)

        # Updated hidden state
        h = o * torch.tanh(c)

        return h, c

#### Simple Model using LSTM cell

In [105]:
class SequentialClassifierWithCells(nn.Module):
    """ 
    Sequential classifier for images. Embedded image rows are fed to a RNN
    Same as above, but using LSTMCells instead of the LSTM object
    
    Args:
    -----
    input_dim: integer
        dimensionality of the rows to embed
    emb_dim: integer 
        dimensionality of the vectors fed to the LSTM
    hidden_dim: integer
        dimensionality of the states in the cell
    mode: string
        intialization of the states
    """
    
    def __init__(self, input_dim, emb_dim, hidden_dim, num_layers=1, mode="zeros"):
        """ Module initializer """
        assert mode in ["zeros", "random"]
        super().__init__()
        self.hidden_dim =  hidden_dim
        self.num_layers = num_layers
        self.mode = mode

        # for embedding rows into vector representations
        self.encoder = nn.Linear(in_features=input_dim, out_features=emb_dim)
        
        # LSTM model       
        lstms = []
        for i in range(num_layers):
            in_size = emb_dim if i == 0 else hidden_dim
            lstms.append( CustomLSTMCell(input_size=in_size, hidden_size=hidden_dim) )
        self.lstm = nn.ModuleList(lstms)
        
        # FC-classifier
        self.classifier = nn.Linear(in_features=hidden_dim, out_features=6)
        
        return
    
    
    def forward(self, x):
        """ Forward pass through model """
        
        b_size, seq_size, n_channels, n_rows, n_cols = x.shape
        h, c = self.init_state(b_size=b_size, device=x.device) 
        
        #embedding frames
        x = x.view(b_size * seq_size, n_channels * n_rows * n_cols)  # Flatten each image
        embeddings = self.encoder(x)  # Apply encoder
        embeddings = embeddings.view(b_size, seq_size, -1)  # Reshape back to (batch_size, seq_size, emb_dim)
        
        # iterating over sequence length
        lstm_out = []
        for i in range(embeddings.shape[1]):
            lstm_input = embeddings[:, i, :]
            # iterating over LSTM Cells
            for j, lstm_cell in enumerate(self.lstm):
                h[j], c[j] = lstm_cell(lstm_input, (h[j], c[j]))
                lstm_input = h[j]
            lstm_out.append(lstm_input)
        lstm_out = torch.stack(lstm_out, dim=1)
            
        # classifying
        y = self.classifier(lstm_out[:, -1, :])  # feeding only output at last layer
        
        return y
    
        
    def init_state(self, b_size, device):
        """ Initializing hidden and cell state """
        if(self.mode == "zeros"):
            h = [torch.zeros(b_size, self.hidden_dim).to(device) for _ in range(self.num_layers)]
            c = [torch.zeros(b_size, self.hidden_dim).to(device) for _ in range(self.num_layers)]
        elif(self.mode == "random"):
            h = [torch.zeros(b_size, self.hidden_dim).to(device) for _ in range(self.num_layers)]
            c = [torch.zeros(b_size, self.hidden_dim).to(device) for _ in range(self.num_layers)]
        return h, c

#### Convolutional Encoder - extracting features from the frames before being fed as a sequence.

In [27]:
class ConvEncoder(nn.Module):
    def __init__(self, input_channels=3, feature_dim=64):
        super(ConvEncoder, self).__init__()
        
        # Define convolutional layers
        self.conv1 = nn.Conv2d(input_channels, 32, kernel_size=5, stride=1, padding=0)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=0)
        self.conv3 = nn.Conv2d(64, feature_dim, kernel_size=5, stride=1, padding=0)
        
        # Define pooling layers
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        
        # Applying convolutional layers and pooling
        self.model = nn.Sequential(
            self.conv1,
            nn.ReLU(),
            self.pool,
            self.conv2,
            nn.ReLU(),
            self.pool,
            # self.conv3,
            # nn.ReLU(),
            # self.pool
        )
        
    def forward(self, x):        
        return self.model(x)



#### Convolutional LSTM cell. Using convolutional operations to replace the units of the LSTM

In [14]:
class ConvLSTMCell(nn.Module):
    def __init__(self, input_channels, hidden_channels, kernel_size):
        super(ConvLSTMCell, self).__init__()
        self.input_channels = input_channels
        self.hidden_channels = hidden_channels
        self.kernel_size = kernel_size
        self.padding = kernel_size // 2
        
        # Define convolutional operations for input, forget, output gates, and cell state
        self.conv_i = nn.Conv2d(in_channels=input_channels + hidden_channels,
                                out_channels=hidden_channels,
                                kernel_size=kernel_size,
                                padding=self.padding)
        self.conv_f = nn.Conv2d(in_channels=input_channels + hidden_channels,
                                out_channels=hidden_channels,
                                kernel_size=kernel_size,
                                padding=self.padding)
        self.conv_c = nn.Conv2d(in_channels=input_channels + hidden_channels,
                                out_channels=hidden_channels,
                                kernel_size=kernel_size,
                                padding=self.padding)
        self.conv_o = nn.Conv2d(in_channels=input_channels + hidden_channels,
                                out_channels=hidden_channels,
                                kernel_size=kernel_size,
                                padding=self.padding)
        
    def forward(self, x, hidden_state):
        h_prev, c_prev = hidden_state
        
        combined_input = torch.cat((x, h_prev), dim=1)
        
        input_gate = torch.sigmoid(self.conv_i(combined_input))
        forget_gate = torch.sigmoid(self.conv_f(combined_input))
        output_gate = torch.sigmoid(self.conv_o(combined_input))
        
        cell_state_candidate = torch.tanh(self.conv_c(combined_input))
        
        cell_state = forget_gate * c_prev + input_gate * cell_state_candidate
        hidden_state = output_gate * torch.tanh(cell_state)
        
        return hidden_state, cell_state
    
    def init_hidden(self, batch_size, c, h, w):
        return (torch.zeros(batch_size, c, h, w, device=self.conv_lstm.conv_i.weight.device),
                torch.zeros(batch_size, c, h, w, device=self.conv_lstm.conv_i.weight.device))


#### We set up a module that uses the ConvLSTM cell to keep track of sequential information

In [15]:
class RecurrentModule(nn.Module):
    def __init__(self, input_channels, hidden_channels, kernel_size):
        super(RecurrentModule, self).__init__()
        self.conv_lstm = ConvLSTMCell(input_channels, hidden_channels, kernel_size)
        
    def forward(self, x):
        batch_size, seq_len, c, h, w = x.size()
        h_t, c_t = self.init_hidden(batch_size, c, h, w)
        
        outputs = []
        for t in range(seq_len):
            h_t, c_t = self.conv_lstm(x[:, t, :, :, :], (h_t, c_t))
            outputs.append(h_t)
            
        outputs = torch.stack(outputs, dim=1)
        return outputs[:, -1, :, :, :]
    
    def init_hidden(self, batch_size, c, h, w):
        return (torch.zeros(batch_size, c, h, w, device=self.conv_lstm.conv_i.weight.device),
                torch.zeros(batch_size, c, h, w, device=self.conv_lstm.conv_i.weight.device))


#### Creating a model that encapsulates (encoder, ConvLSTM, classifier)

In [16]:
class ActionRecognitionModel(nn.Module):
    def __init__(self, input_channels, hidden_channels, kernel_size, feature_dim, output_dim):
        super(ActionRecognitionModel, self).__init__()
        
        self.encoder = ConvEncoder(input_channels=input_channels, feature_dim=feature_dim)
        self.conv_lstm = ConvLSTMCell(input_channels=feature_dim, hidden_channels=hidden_channels, kernel_size=kernel_size)
        self.classifier = nn.Sequential(
            nn.Conv2d(hidden_channels, hidden_channels, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
            nn.Linear(hidden_channels, output_dim)
        )
        
    def init_hidden(self, batch_size, hidden_channels, spatial_size):
        height, width = spatial_size
        h = torch.zeros(batch_size, hidden_channels, height, width, device=next(self.parameters()).device)
        c = torch.zeros(batch_size, hidden_channels, height, width, device=next(self.parameters()).device)
        return h, c
    
    def forward(self, x):
        batch_size, seq_len, channels, height, width = x.size()
        c_in = x.view(batch_size * seq_len, channels, height, width)
        
        encoded = self.encoder(c_in)
        encoded = encoded.view(batch_size, seq_len, encoded.size(1), encoded.size(2), encoded.size(3))
        
        h, c = self.init_hidden(batch_size, self.conv_lstm.hidden_channels, (encoded.size(3), encoded.size(4)))
        
        outputs = []
        for t in range(seq_len):
            h, c = self.conv_lstm(encoded[:, t, :, :, :], (h, c))
            outputs.append(h)
        
        outputs = torch.stack(outputs, dim=1)
        outputs = outputs[:, -1, :, :, :]
        
        output = self.classifier(outputs)
        return output

model = ActionRecognitionModel(input_channels=3, hidden_channels=128, kernel_size=3, feature_dim=64, output_dim=6)
input_tensor = torch.randn(8, 15, 3, 64, 64)  # Batch of 8, sequence length of 15, 3 channels, 64x64 image
output = model(input_tensor)
print(output.shape)

torch.Size([8, 6])


## Training Template

In [33]:
from torch.cuda.amp import GradScaler, autocast

scaler = GradScaler()

In [18]:
def train_epoch(model, train_loader, optimizer, criterion, epoch, device):
    """ Training a model for one epoch """
    
    loss_list = []
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()
        with autocast():
            # Forward pass to get output/logits
            outputs = model(images)
            
            # Calculate Loss: softmax --> cross entropy loss
            loss = criterion(outputs, labels)
        loss_list.append(loss.item())
         
        # Getting gradients w.r.t. parameters
        scaler.scale(loss).backward()
         
        # Updating parameters
        scaler.step(optimizer)
        scaler.update()
        
    mean_loss = np.mean(loss_list)
    return mean_loss, loss_list


@torch.no_grad()
def eval_model(model, eval_loader, criterion, device):
    """ Evaluating the model for either validation or test """
    model.eval()
    correct = 0
    total = 0
    loss_list = []
    with autocast():
        for images, labels in eval_loader:
            images = images.to(device)
            labels = labels.to(device)
            
            # Forward pass only to get logits/output
            outputs = model(images)
                    
            loss = criterion(outputs, labels)
            loss_list.append(loss.item())
                
            # Get predictions from the maximum value
            preds = torch.argmax(outputs, dim=1)
            correct += len( torch.where(preds==labels)[0] )
            total += len(labels)
                 
    # Total correct predictions and loss
    accuracy = correct / total * 100
    loss = np.mean(loss_list)
    
    return accuracy, loss


def train_model(model, optimizer, scheduler, criterion, train_loader, valid_loader, num_epochs, tboard=None, start_epoch=0):
    """ Training a model for a given number of epochs"""
    model.train()
    train_loss = []
    val_loss =  []
    loss_iters = []
    valid_acc = []
    assert tboard is not None, f"Tensorboard must be provided!"
    
    for epoch in tqdm(range(num_epochs)):
           
        # validation epoch
        model.eval()  # important for dropout and batch norms
        accuracy, loss = eval_model(
                    model=model, eval_loader=valid_loader,
                    criterion=criterion, device=device
            )
        valid_acc.append(accuracy)
        val_loss.append(loss)
        tboard.add_scalar(f'Accuracy/Valid', accuracy, global_step=epoch+start_epoch)
        tboard.add_scalar(f'Loss/Valid', loss, global_step=epoch+start_epoch)
        
        # training epoch
        model.train()  # important for dropout and batch norms
        mean_loss, cur_loss_iters = train_epoch(
                model=model, train_loader=train_loader, optimizer=optimizer,
                criterion=criterion, epoch=epoch, device=device
            )
        scheduler.step()
        train_loss.append(mean_loss)
        tboard.add_scalar(f'Loss/Train', mean_loss, global_step=epoch+start_epoch)

        loss_iters = loss_iters + cur_loss_iters
        
        if(epoch % 5 == 0 or epoch==num_epochs-1):
            print(f"Epoch {epoch+1}/{num_epochs}")
            print(f"    Train loss: {round(mean_loss, 5)}")
            print(f"    Valid loss: {round(loss, 5)}")
            print(f"    Accuracy: {accuracy}%")
            print("\n")
    
    print(f"Training completed")
    return train_loss, val_loss, loss_iters, valid_acc

In [19]:
def save_model(model, optimizer, epoch, stats, path="", name=""):
    """ Saving model checkpoint """
    
    if(not os.path.exists(path+"models")):
        os.makedirs(path+"models")
    savepath = f"{path}models/{name}_checkpoint_epoch_{epoch}.pth"

    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'stats': stats
    }, savepath)
    return


def load_model(model, optimizer, savepath):
    """ Loading pretrained checkpoint """
    
    checkpoint = torch.load(savepath)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint["epoch"]
    stats = checkpoint["stats"]
    
    return model, optimizer, epoch, stats


def smooth(f, K=5):
    """ Smoothing a function using a low-pass filter (mean) of size K """
    kernel = np.ones(K) / K
    f = np.concatenate([f[:int(K//2)], f, f[int(-K//2):]])  # to account for boundaries
    smooth_f = np.convolve(f, kernel, mode="same")
    smooth_f = smooth_f[K//2: -K//2]  # removing boundary-fixes
    return smooth_f

def set_random_seed(random_seed=None):
    """
    Using random seed for numpy and torch
    """
    if(random_seed is None):
        random_seed = 13
    os.environ['PYTHONHASHSEED'] = str(random_seed)
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed_all(random_seed)
    return


def count_model_params(model):
    """ Counting the number of learnable parameters in a nn.Module """
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return num_params

## Training

In [23]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
model = SequentialClassifierWithCells(input_dim=3*32*32, emb_dim=6000, hidden_dim=128, num_layers=2, mode="zeros")
count_model_params(model)

1189766

In [37]:
model

SequentialClassifierWithCells(
  (encoder): Linear(in_features=3072, out_features=6000, bias=True)
  (lstm): ModuleList(
    (0-1): 2 x CustomLSTMCell()
  )
  (classifier): Linear(in_features=128, out_features=6, bias=True)
)

In [38]:
model = model.to(device)

In [39]:
# classification loss function
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

# Decay LR by a factor of 0.1 every 5 epochs
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.2)

In [40]:
TBOARD_LOGS = os.path.join(os.getcwd(),"Lab Work", "CUDA_Assignment_4", "tboard_logs", "RNNs", "CustomLSTMcells")
if not os.path.exists(TBOARD_LOGS):
    os.makedirs(TBOARD_LOGS)

shutil.rmtree(TBOARD_LOGS)
writer = SummaryWriter(TBOARD_LOGS)

In [121]:
train_loss, val_loss, loss_iters, valid_acc = train_model(
        model=model, optimizer=optimizer, scheduler=scheduler, criterion=criterion,
        train_loader=train_loader, valid_loader=test_loader, num_epochs=5, tboard=writer
    )

 20%|██████████████████████▍                                                                                         | 1/5 [09:58<39:54, 598.72s/it]

Epoch 1/5
    Train loss: 1.45161
    Valid loss: 1.81185
    Accuracy: 13.313790987797397%




100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [50:10<00:00, 602.13s/it]

Epoch 5/5
    Train loss: 1.23768
    Valid loss: 1.3696
    Accuracy: 41.16049719362829%


Training completed





In [126]:
stats = [train_loss, val_loss, loss_iters, valid_acc]
save_model(model, optimizer, 5, stats, path="Lab Work/CUDA_Assignment_4/", name="simpleCustomRNN")

### Full model

In [28]:
model = ActionRecognitionModel(input_channels=3, hidden_channels=128, kernel_size=3, feature_dim=64, output_dim=6)
count_model_params(model)

1189766

In [29]:
model

ActionRecognitionModel(
  (encoder): ConvEncoder(
    (conv1): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1))
    (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
    (conv3): Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1))
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (model): Sequential(
      (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1))
      (1): ReLU()
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (3): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
      (4): ReLU()
      (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
  )
  (conv_lstm): ConvLSTMCell(
    (conv_i): Conv2d(192, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv_f): Conv2d(192, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv_c): Conv2d(192, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv_o): Conv2d(192, 128, kerne

In [30]:
model = model.to(device)
# classification loss function
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

# Decay LR by a factor of 0.2 every 5 epochs
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.2)


In [31]:
TBOARD_LOGS = os.path.join(os.getcwd(),"Lab Work", "CUDA_Assignment_4", "tboard_logs", "RNNs", "FullConvLSTM_32spatial")
if not os.path.exists(TBOARD_LOGS):
    os.makedirs(TBOARD_LOGS)

shutil.rmtree(TBOARD_LOGS)
writer = SummaryWriter(TBOARD_LOGS)


In [32]:
train_loss, val_loss, loss_iters, valid_acc = train_model(
        model=model, optimizer=optimizer, scheduler=scheduler, criterion=criterion,
        train_loader=train_loader, valid_loader=test_loader, num_epochs=5, tboard=writer
    )


 20%|█████████████████████████████▌                                                                                                                      | 1/5 [07:06<28:26, 426.51s/it]

Epoch 1/5
    Train loss: 0.55114
    Valid loss: 1.79468
    Accuracy: 14.688572272592998%




100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [35:31<00:00, 426.24s/it]

Epoch 5/5
    Train loss: 0.07614
    Valid loss: 1.36215
    Accuracy: 72.95089417592656%


Training completed





In [None]:
stats = [train_loss, val_loss, loss_iters, valid_acc]
save_model(model, optimizer, 5, path="Lab Work/CUDA_Assignment_4/", name="FullConvLSTM")