Pretrained 3D convnet and the pre-processing code to get dataset adapted from the following repo: https://github.com/jfzhang95/pytorch-video-recognition

**Model Initialization**

In [0]:
import torch
import torch.nn as nn

class C3D(nn.Module):

    def __init__(self, num_classes, pretrained=True):
        super(C3D, self).__init__()

        '''
        Pretrained feature extractor
        '''
        self.dropout = nn.Dropout(p=0.5)
        self.relu = nn.ReLU()
        self.conv1 = nn.Conv3d(3, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.pool1 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))
        self.conv2 = nn.Conv3d(64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.pool2 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
        self.conv3a = nn.Conv3d(128, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.conv3b = nn.Conv3d(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.pool3 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
        self.conv4a = nn.Conv3d(256, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.conv4b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.pool4 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
        self.conv5a = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.conv5b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
        self.pool5 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=(0, 1, 1))
        self.fc6 = nn.Linear(8192, 4096)
        self.fc7 = nn.Linear(4096, 4096)

        '''
        Trained Layer
        '''
        self.fc8 = nn.Linear(4096, num_classes)
        '''
        Xavier initialization
        '''
        torch.nn.init.xavier_uniform_(self.fc8.weight)

        if pretrained:
            self.__load_pretrained_weights()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool1(x)
        x = self.relu(self.conv2(x))
        x = self.pool2(x)
        x = self.relu(self.conv3a(x))
        x = self.relu(self.conv3b(x))
        x = self.pool3(x)
        x = self.relu(self.conv4a(x))
        x = self.relu(self.conv4b(x))
        x = self.pool4(x)
        x = self.relu(self.conv5a(x))
        x = self.relu(self.conv5b(x))
        x = self.pool5(x)
        x = x.view(-1, 8192)
        x = self.relu(self.fc6(x))
        x = self.dropout(x)
        x = self.relu(self.fc7(x))
        x = self.dropout(x)

        logits = self.fc8(x)
        return logits

    def __load_pretrained_weights(self):
        corresp_name = {
                        "features.0.weight": "conv1.weight",
                        "features.0.bias": "conv1.bias",
                        "features.3.weight": "conv2.weight",
                        "features.3.bias": "conv2.bias",
                        "features.6.weight": "conv3a.weight",
                        "features.6.bias": "conv3a.bias",
                        "features.8.weight": "conv3b.weight",
                        "features.8.bias": "conv3b.bias",
                        "features.11.weight": "conv4a.weight",
                        "features.11.bias": "conv4a.bias",
                        "features.13.weight": "conv4b.weight",
                        "features.13.bias": "conv4b.bias",
                        "features.16.weight": "conv5a.weight",
                        "features.16.bias": "conv5a.bias",
                        "features.18.weight": "conv5b.weight",
                        "features.18.bias": "conv5b.bias",
                        "classifier.0.weight": "fc6.weight",
                        "classifier.0.bias": "fc6.bias",
                        "classifier.3.weight": "fc7.weight",
                        "classifier.3.bias": "fc7.bias",
                        }

        p_dict = torch.load('c3d-pretrained.pth')
        s_dict = self.state_dict()
        for name in p_dict:
            if name not in corresp_name:
                continue
            s_dict[corresp_name[name]] = p_dict[name]
        self.load_state_dict(s_dict)

**Training Loop**

In [0]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torch.autograd import Variable

num_epochs = 35
model.train()

model = C3D(num_classes=51, pretrained=True)

for param in model.parameters():
    param.requires_grad = False

model.fc8 = nn.Linear(4096, 51)

loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.fc8.parameters(), lr=5e-4, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[15,25], gamma=0.1)

model.cuda()
loss_function.cuda()

'''
Preprocessing and dataloaders
'''
train_dataloader = DataLoader(VideoDataset(dataset='hmdb51', split='train',clip_len=16), batch_size=32, shuffle=True, num_workers=4)
val_dataloader   = DataLoader(VideoDataset(dataset='hmdb51', split='val',  clip_len=16), batch_size=32, num_workers=4)
test_dataloader  = DataLoader(VideoDataset(dataset='hmdb51', split='test', clip_len=16), batch_size=32, num_workers=4)

training_loss = []
training_acc = []
validation_loss = []
validation_acc = []

for z in range(num_epochs):
    for i, (inputs, labels) in enumerate(train_dataloader):
        inputs = inputs.cuda()
        labels = labels.cuda()
        outputs = model(inputs)

        probs = nn.Softmax(dim=1)(outputs)
        preds = torch.max(probs, 1)[1]
        loss = loss_function(outputs, labels)
        accuracy = torch.sum(preds == labels.data)

        print("Epoch:",z, "Loss:",loss.item())
        training_loss.append(loss.item())
        training_acc.append(accuracy.item())

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if i % 40 == 0:
            with torch.no_grad():
              total_val_loss = 0
              total_val_acc = 0
              model.eval()

            m = 0
            for j, (inputs, labels) in enumerate(val_dataloader):
                inputs = inputs.cuda()
                labels = labels.cuda()

                outputs = model(inputs)
                val_probs = nn.Softmax(dim=1)(outputs)
                val_loss = loss_function(outputs, labels)
                val_preds = torch.max(val_probs, 1)[1]
                total_val_acc += torch.sum(val_preds == labels.data).item()

                total_val_loss += val_loss.item()
                m += 1

            print("Validation Loss:", total_val_loss/m)
            validation_loss.append(total_val_loss/m)
            validation_acc.append(total_val_acc/m)
            model.train()
            
    scheduler.step()
    torch.save(model.state_dict(), 'c3d_hdm51_part2.pt')
    np.save('Training_Loss_2', np.array(training_loss))
    np.save('Training_Accuracy_2', np.array(training_acc))
    np.save('Val_Loss_2', np.array(validation_loss))
    np.save('Val_Accuracy_2', np.array(validation_acc))

**Test Set Performance**

In [0]:
m = 0
with torch.no_grad():
              total_test_loss = 0
              total_test_acc = 0
              model.eval()
loss_function = nn.CrossEntropyLoss()
for j, (inputs, labels) in enumerate(test_dataloader):
    inputs = inputs.cuda()
    labels = labels.cuda()

    outputs = model(inputs)
    test_probs = nn.Softmax(dim=1)(outputs)
    test_loss = loss_function(outputs, labels)
    preds = torch.max(test_probs, 1)[1]
    print(test_loss)

    total_test_loss += test_loss.item()
    total_test_acc += torch.sum(preds == labels.data).item()
    m += 1

print('Test Loss:', total_test_loss/m)
print('Test Accuracy:', total_test_acc / m)