In [51]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import numpy as np
import cv2
from representational_network import embedNet, posEnc


In [None]:
import torch
import torch.nn as nn

class Encoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.embed = embedNet(d_model=512)
        self.pos = posEnc(d_model=512)
        self.mha = nn.MultiheadAttention(embed_dim=512, num_heads=8, batch_first=True)
        self.fc = nn.Linear(512, 512)
        self.norm1 = nn.LayerNorm(512)
        self.norm2 = nn.LayerNorm(512)

    def forward(self, x):
        x = self.embed(x)
        x = x + self.pos(x)
        res = x
        attn_output, _ = self.mha(x, x, x)
        x = attn_output + res
        x = self.norm1(x)
        res2 = x
        x = self.fc(x)
        x = x + res2
        x = self.norm2(x)
        return x

class transformerNet(nn.Module):
    def __init__(self, num_classes=101):
        super().__init__()
        self.encoder = Encoder()
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.encoder(x)
        x = x.mean(dim=1)  
        logits = self.fc(x)
        return logits



In [53]:
def train(model, loader, criterion, optimizer):
    print('training')
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    for batch_idx, (videos, labels) in enumerate(loader):
        print(f"batch {batch_idx+1}/{len(loader)}")
        videos, labels = videos, labels
        optimizer.zero_grad()
        outputs = model(videos)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * labels.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    avg_loss = total_loss / total
    acc = correct / total
    print(f"avg_loss={avg_loss:.4f}, acc={acc:.4f}")
    return avg_loss, acc

def evaluate(model, loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for videos, labels in loader:
            videos, labels = videos, labels
            outputs = model(videos)
            loss = criterion(outputs, labels)
            total_loss += loss.item() * labels.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    avg_loss = total_loss / total
    acc = correct / total
    return avg_loss, acc

In [None]:
from dataset import UCFdataset

frame_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(128),
    transforms.CenterCrop(112),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

class_index_file = '/Users/sanjitk./team-2/UTD-team-2/04-Action Recognition (UCF Dataset)/ucfTrainTestlist/classInd.txt'
train_split = '/Users/sanjitk./team-2/UTD-team-2/04-Action Recognition (UCF Dataset)/ucfTrainTestlist/trainlist01.txt'
test_split = '/Users/sanjitk./team-2/UTD-team-2/04-Action Recognition (UCF Dataset)/ucfTrainTestlist/testlist01.txt'

train_dataset = UCFdataset(class_index_file, train_split, transform=frame_transform)
test_dataset = UCFdataset(class_index_file, test_split, transform=frame_transform)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=1)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=1)

if __name__ == "__main__":
    num_classes = 101
    model = transformerNet(num_classes=num_classes)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    best_acc = 0

    print('starting')
    
    for epoch in range(5):
        train_loss, train_acc = train(model, train_loader, criterion, optimizer)
        test_loss, test_acc = evaluate(model, test_loader, criterion)
        print(f'on epoch {epoch}: train_loss={train_loss:.4f}, train_acc={train_acc:.4f}, test_loss={test_loss:.4f}, test_acc={test_acc:.4f}')
        if test_acc > best_acc:
            best_acc = test_acc
            torch.save(model.state_dict(), 'best_model.pth')




starting
training
batch 1/4769
batch 2/4769
batch 3/4769
batch 4/4769
batch 5/4769
batch 6/4769
batch 7/4769
batch 8/4769
batch 9/4769
batch 10/4769
batch 11/4769
batch 12/4769


KeyboardInterrupt: 