In [1]:
from torchvision.datasets import ImageFolder
import os
import csv
import pdb
import time
from tqdm import tqdm, trange
import numpy as np
import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.nn.utils import clip_grad_norm_
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import transforms
import torchvision
import torch.optim.lr_scheduler as lr_scheduler
from PIL import Image

from models.spatial_transforms import *
from models.temporal_transforms import *
from models import models as TSN_model

from models.action_vst import action_vst

In [None]:
device = 'cuda:0'

best_acc = 0.
# seed = 1
# torch.manual_seed(seed)
# torch.cuda.manual_seed(seed)
# torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
clip_gradient = 8
lr = 0.01
lr_steps = [5,10,15]
step_size = 10
weight_decay = 5e-4
batch_size = 4

input_mean=[.485, .456, .406]
input_std=[.229, .224, .225]
normalize = GroupNormalize(input_mean, input_std)

scales = [1, .875, .75, .66]

criterion = nn.CrossEntropyLoss().to(device)

display=20

In [None]:
class dataset_video(Dataset):
    def __init__(self, frame_path, spatial_transform, temporal_transform):
        self.frame_path = frame_path
        self.rgb_samples, self.labels = [], []
        for i in os.listdir(os.path.join(frame_path, 'NonViolence_Keyframes')):
            self.rgb_samples.append('NonViolence_Keyframes/'+ i)
            self.labels.append(0)
        for j in os.listdir(os.path.join(frame_path, 'Violence_Keyframes')):
            self.rgb_samples.append('Violence_Keyframes/'+ j)
            self.labels.append(1)
        self.sample_num = len(self.rgb_samples)
        self.spatial_transform = spatial_transform
        self.temporal_transform = temporal_transform


    def __getitem__(self, idx):
        rgb_name = self.rgb_samples[idx]
        label = self.labels[idx]
        clip_rgb_frames = []
        clip_depth_frames = []
        
        rgb_video = rgb_name[rgb_name.find('/')+1:rgb_name.rfind('-')+1] # +1 to advoid error prefix
        rgb_folder = os.path.join(self.frame_path, rgb_name[:rgb_name.rfind('/')])
        for i in os.listdir(rgb_folder):
            if i.startswith(rgb_video):
                rgb_cache = Image.open(os.path.join(rgb_folder, i)).convert("RGB")
                clip_rgb_frames.append(rgb_cache)
        clip_rgb_frames = self.spatial_transform(clip_rgb_frames)
        n, h, w = clip_rgb_frames.size()
        return clip_rgb_frames.view(3, -1, h, w), int(label)
        
    def __len__(self):
        return int(self.sample_num)

In [None]:
trans_train  = torchvision.transforms.Compose([
                        GroupMultiScaleCrop(224, scales),
                        Stack(roll=True),
                        ToTorchFormatTensor(div=True),
                        normalize
                        ])
temporal_transform_train = torchvision.transforms.Compose([
                                    TemporalUniformCrop_train(8)
                                    ])   

dataset = dataset_video('data/RLVS', spatial_transform=trans_train, temporal_transform = temporal_transform_train)
# dataset = dataset_video('data/RWF', spatial_transform=trans_train, temporal_transform = temporal_transform_train)

In [None]:
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=16)
test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=16)

In [None]:
model = action_vst(num_classes=2)
model = model.to(device)

In [None]:
optimizer = optim.SGD(model.parameters(), lr = lr, momentum = 0.9, weight_decay=weight_decay)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=10, threshold=0.01, verbose=True)

In [None]:
# import datetime
# log_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
# log_filepath = "logs/"+log_time+".txt"

In [None]:
from sklearn.metrics import precision_recall_fscore_support

def train_model(train_loader):
    model.train()
    running_loss = 0.0
    running_corrects = 0
    y_true = []
    y_pred = []
    for i, data in enumerate(tqdm(train_loader, desc='Training'), 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        running_corrects += torch.sum(preds == labels.data)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())
    train_loss = running_loss / len(train_loader)
    train_acc = running_corrects.double() / len(train_loader.dataset)
    train_prec, train_rec, train_f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')
    log = 'Training Loss: {:.4f} Acc: {:.4f} Prec: {:.4f} Rec: {:.4f} F1: {:.4f}'.format(train_loss, train_acc, train_prec, train_rec, train_f1)
    print(log)
    return train_loss


def evaluate_model(val_loader):
    model.eval()
    correct = 0
    total = 0
    running_loss = 0.0
    y_true = []
    y_pred = []
    with torch.no_grad():
        for data in val_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            y_true += labels.tolist()
            y_pred += predicted.tolist()

    accuracy = correct / total
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
    avg_loss = running_loss / len(val_loader)
    log = 'Validation Loss: {:.4f} Acc: {:.4f} Prec: {:.4f} Rec: {:.4f} F1: {:.4f}'.format(avg_loss, accuracy, precision, recall, f1)
    print(log)
    return accuracy


In [None]:
num_epochs = 120
train_losses = []
val_accs = []
for epoch in range(num_epochs):
    train_loss = train_model(train_loader)
    val_acc = evaluate_model(test_loader)
    train_losses.append(train_loss)
    val_accs.append(val_acc)
    log = 'Epoch [{}/{}], Train Loss: {:.4f}, Val Acc: {:.4f}'.format(
        epoch+1, num_epochs, train_loss, val_acc)
    print(log)
    # with open(log_filepath, "a") as f:
    #     f.write(log)
    #     f.write('\n')
    scheduler.step(val_acc)

