<a href="https://colab.research.google.com/github/Linkanblomman/Fight_recognition/blob/master/train_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Connect to seperate folder that have been loaded into Google drive

In [1]:
# Change the example path "/content/drive/My Drive/Colab_Notebooks/fight_recognition/" to the downloaded project folder
!ln -s "/content/drive/My Drive/Colab_Notebooks/fight_recognition/" /content/fight_recognition

Install decord for video slicing (https://github.com/dmlc/decord)

In [None]:
!pip install decord

In [3]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
import numpy as np
import os
import time
import pandas as pd
import math
from PIL import Image


import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')

from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split

from tqdm import tqdm

from decord import VideoReader
from decord import bridge
#from decord import cpu, gpu

from fight_recognition.SGDR import CosineAnnealingLR_with_Restart

import fight_recognition.model as ResNet
from fight_recognition.spatial_transforms import (Compose, Normalize, Resize, CenterCrop,
                                CornerCrop, MultiScaleCornerCrop,
                                RandomResizedCrop, RandomHorizontalFlip,
                                ToTensor, ScaleValue, ColorJitter,
                                PickFirstChannels)

In [None]:
seed_value = 42
torch.manual_seed(seed_value)

In [None]:
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))

In [6]:
batch_size = 16
model_architecture = 50 # Generate resnet model

# K - Kinetics-700
# KM - Kinetics-700 and Moments in Time
dataset = 'K'

device = torch.device('cuda:0')
print(f"Computation device: {device}\n")

Computation device: cuda:0



In [7]:
# read the data.csv file and get the video paths and labels
df = pd.read_csv('./fight_recognition/input/data.csv')
X = df.video_path.values # video paths
y = df.target.values # targets

(xtrain, xtest, ytrain, ytest) = train_test_split(X, y, test_size=0.20, random_state=seed_value)

print(f"Training videos: {len(xtrain)}")
print(f"Training labels: {len(ytrain)}\n")

print(f"Validation videos: {len(xtest)}")
print(f"Validation labels: {len(ytest)}\n")

Training videos: 240
Training labels: 240

Validation videos: 60
Validation labels: 60



In [8]:
# custom dataset
class VideoDataset(Dataset):
    def __init__(self, videos, labels=None, spatial_transform=None):
        self.X = videos
        self.y = labels
        self.spatial_transform = spatial_transform
    
    def __len__(self):
        return (len(self.X))
    
    def __getitem__(self, i):
        vr = VideoReader(self.X[i]) # Read video
        bridge.set_bridge('native')
        duration = len(vr)
        frames = 16
        steps = math.floor(duration/frames)
        start_frame = 0
        stop_frame = steps * frames
        
        frame_id_list = range(start_frame, stop_frame, steps) 
        
        clip = []
        video_snippet = vr.get_batch(frame_id_list).asnumpy() # Will get a batch of 16 frames from video

        # Transform into images
        for img in video_snippet:
            im_pil = Image.fromarray(img)
            clip.append(im_pil)

        if self.spatial_transform is not None:
            clip = [self.spatial_transform(img) for img in clip]

        clip = torch.stack(clip, 0).permute(1, 0, 2, 3)
   
        label = self.y[i]
        
        return (clip.clone().detach().requires_grad_(True), torch.tensor(label, dtype=torch.long))

Mean and std https://github.com/kenshohara/3D-ResNets-PyTorch/blob/master/main.py

In [9]:
if (dataset == 'K'):
  mean = [0.4345, 0.4051, 0.3775]
  std = [0.2768, 0.2713, 0.2737]
else:
  mean = [0.5, 0.5, 0.5]
  std = [0.5, 0.5, 0.5]

sample_size = 112 # resolution of frame

# MultiScaleCornerCrop (four-corner cropping)
scales = [1.0]
scale_step = 1 / (2**(1 / 4))
for _ in range(1, 5):
    scales.append(scales[-1] * scale_step)

spatial_transform_train = Compose([
                                   MultiScaleCornerCrop(sample_size, scales),
                                   RandomHorizontalFlip(),
                                   ToTensor(),
                                   Normalize(mean, std)
                                   ])

spatial_transform_validation =  Compose([Resize(sample_size),
                                        CenterCrop(sample_size),
                                        ToTensor(),
                                        Normalize(mean, std)])

In [10]:
train_data = VideoDataset(xtrain, ytrain, spatial_transform_train)
test_data = VideoDataset(xtest, ytest, spatial_transform_validation)

TrainLoader = DataLoader(train_data, batch_size=batch_size, shuffle=True) # If shuffle is set to True, it will have the data reshuffled at every epoch
TestLoader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

Download pre-trained models (https://github.com/kenshohara/3D-ResNets-PyTorch)

In [11]:
model = ResNet.initialize_model(model_architecture=model_architecture, model_dataset=dataset, num_classes=2)

Model: ResNet34
Dataset: Kinetics-700

Model parameters
Learning rate: 3.0000000000000012e-09
Momentum: 0.9
Weight_decay: 0.0001


In [12]:
for name, child in model.named_children():
    if name in ['layer4','fc']: # Layer that will be unfrozen
        print(name + ' is unfrozen')
        for param in child.parameters():
            param.requires_grad = True
    else:
        print(name + ' is frozen')
        for param in child.parameters():
            param.requires_grad = False  

model.to(device)

optimizer = optim.SGD([{'params': model.layer4.parameters()}, 
                      {'params': model.fc.parameters(), 'lr': 3e-3}
                       ], lr=3e-05,momentum=.9, weight_decay=.0001)

criterion = nn.CrossEntropyLoss()

# SGDR
t_mult = 1 # cycle multiplication
t_max = 25 # Maximum number of iterations/epochs
scheduler = CosineAnnealingLR_with_Restart(optimizer, T_max=t_max, T_mult=t_mult, model=model, out_dir='./fight_recognition/outputs/snapshots/', take_snapshot=True, eta_min=3e-09) # eta_min – Minimum learning rate

conv1 is frozen
bn1 is frozen
relu is frozen
maxpool is frozen
layer1 is frozen
layer2 is frozen
layer3 is frozen
layer4 is unfrozen
avgpool is frozen
fc is unfrozen


In [None]:
# learning rate for each layer
for param_group in optimizer.param_groups:
    print(param_group['lr'])

In [None]:
model.eval()

In [None]:
for n, p in model.named_parameters():
  print(p.device, " ", n)

In [None]:
print("Check model requires_grad params\n")
print("Status\tParameters\n")
for n, p in model.named_parameters():
    print(p.requires_grad, " ", n)

In [17]:
total_params = sum(p.numel() for p in model.parameters())
print(f"{total_params:,} total parameters.")
total_trainable_params = sum(
    p.numel() for p in model.parameters() if p.requires_grad)
print(f"{total_trainable_params:,} training parameters.")

63,514,562 total parameters.
39,067,650 training parameters.


In [18]:
# training function
def fit(model, train_dataloader):
    print('Training')
    model.train() # training mode activated if no_grad() have deactivate the gradient calculation part in validation function
    train_running_loss = 0.0
    train_running_correct = 0
    for i, data in tqdm(enumerate(train_dataloader), total=int(len(train_data)/train_dataloader.batch_size)):
        data, target = data[0].to(device), data[1].to(device) 
        optimizer.zero_grad() # Reset optimizer to zero otherwise it will just accumulate all the gradients
        outputs = model(data) # Input the bathed images to the model to get a output (prediction)
        
        # From the loss function we will get back a loss tensor. PyTorch have the computaional graph for the tensor that will be used in the backpropagation step 
        loss = criterion(outputs, target) # calculate the loss from the loss/error function (prediction_label - true_label)
        train_running_loss += loss.item() # new loss value to update the current training loss value
        _, preds = torch.max(input=outputs.data, dim=1) # Returns the maximum value of all elements in the input tensor
        train_running_correct += (preds == target).sum().item() # Count the right numbers of correct prediction
        loss.backward() # Calculate gradients
        optimizer.step() # Update the weights
        
    train_loss = train_running_loss/len(train_dataloader.dataset)
    train_accuracy = 100. * train_running_correct/len(train_dataloader.dataset)
    
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}")
    
    return train_loss, train_accuracy

In [19]:
#validation function
def validate(model, test_dataloader):
    print('Validating')
    model.eval()
    val_running_loss = 0.0
    val_running_correct = 0
    with torch.no_grad():
        for i, data in tqdm(enumerate(test_dataloader), total=int(len(test_data)/test_dataloader.batch_size)):
            data, target = data[0].to(device), data[1].to(device)
            outputs = model(data)
            loss = criterion(outputs, target)
            
            val_running_loss += loss.item()
            _, preds = torch.max(outputs.data, 1)
            val_running_correct += (preds == target).sum().item()
        
        val_loss = val_running_loss/len(test_dataloader.dataset)
        val_accuracy = 100. * val_running_correct/len(test_dataloader.dataset)
        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.2f}')
        
        return val_loss, val_accuracy

In [None]:
train_loss , train_accuracy = [], []
val_loss , val_accuracy = [], []

epochs = 100

start = time.time()
for epoch in range(epochs):
    scheduler.step() # SGDR
    print(f"Epoch {epoch+1} of {epochs}")
    train_epoch_loss, train_epoch_accuracy = fit(model, TrainLoader)
    val_epoch_loss, val_epoch_accuracy = validate(model, TestLoader)
    train_loss.append(train_epoch_loss)
    train_accuracy.append(train_epoch_accuracy)
    val_loss.append(val_epoch_loss)
    val_accuracy.append(val_epoch_accuracy)
end = time.time()

print(f"{(end-start)/60:.3f} minutes")

In [None]:
# accuracy plots
plt.figure(figsize=(10, 7))
plt.plot(train_accuracy, color='green', label='train accuracy')
plt.plot(val_accuracy, color='blue', label='validataion accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.savefig('./fight_recognition/outputs/accuracy_3DCNN.png')
plt.show()

# loss plots
plt.figure(figsize=(10, 7))
plt.plot(train_loss, color='orange', label='train loss')
plt.plot(val_loss, color='red', label='validataion loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.savefig('./fight_recognition/outputs/loss_3DCNN.png')
plt.show()

# serialize the model to disk
print('Saving model...')
torch.save(model.state_dict(), "./fight_recognition/outputs/fight_reco_3DCNNmodel.pth")
 
print('TRAINING COMPLETE')