# Model 1

In [1]:
import torch.nn as nn
import torch
from torchsummary import summary

## Constants

In [2]:
NUM_SEGMENTS = 4
RES_NEXT_OUT = 2048
NUM_EPOCHS = 20

In [3]:
LABELS_PATH = "jester-v1-labels.csv"
with open(LABELS_PATH) as labels_file:
    labels = labels_file.readlines()
    #labels = [label[:-1] for label in labels]
    labels_encode_dict = dict(zip(labels, range(len(labels))))
    labels_decode_dict = dict(zip(range(len(labels)), labels))

## Architecture

In [4]:
class GestureClassifier(nn.Module):
    def __init__(self, input_size, num_classes):

        super().__init__()

        self.bottleneck_size = 512

        # define 
        self.ln1 = nn.Linear(input_size, self.bottleneck_size)
        self.ln2 = nn.Linear(self.bottleneck_size, num_classes)
        
        # init
        self.initialize_layer(self.ln1)
        self.initialize_layer(self.ln2)


    def forward(self, x):
        x = torch.relu(x) # as we didn't have it for the output of the ResNeXt
        x = torch.relu(self.ln1(x))
        x = self.ln2(x)
        return x
        

    @staticmethod
    def initialize_layer(layer):
        if hasattr(layer, "bias"):
            nn.init.zeros_(layer.bias)
        if hasattr(layer, "weight"):
            nn.init.kaiming_normal_(layer.weight)

In [5]:
model = GestureClassifier(input_size = RES_NEXT_OUT * NUM_SEGMENTS, num_classes=len(labels_encode_dict))
device = "cuda"
model.to(device)
summary(model, input_size = (RES_NEXT_OUT*NUM_SEGMENTS,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                  [-1, 512]       4,194,816
            Linear-2                   [-1, 27]          13,851
Total params: 4,208,667
Trainable params: 4,208,667
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.03
Forward/backward pass size (MB): 0.00
Params size (MB): 16.05
Estimated Total Size (MB): 16.09
----------------------------------------------------------------


# Frame selection function

In [6]:
random = False

In [7]:
@staticmethod
def _select_frames(list_of_frames, num_segments, random):
    n = len(list_of_frames)
    segment_boundaries = np.linspace(0, n, num_segments + 1, dtype=int)  # Define segment boundaries
    if not random:
        selected_indices = segment_boundaries[:-1]  # Take the first index of each segment
    else:
        selected_indices = [np.random.randint(segment_boundaries[i], segment_boundaries[i + 1]) 
                        for i in range(num_segments)]  # Sample 1 index per segment
    selected_frames = [list_of_frames[i] for i in selected_indices]  # Map indices to frames

    return selected_frames