<a href="https://colab.research.google.com/github/Linkanblomman/Fight_recognition/blob/master/hyperparameter_testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Get access to files on computer (mount drive)

Easy access to files from "content/fight_recognition" folder

In [None]:
# Example, your Google drive folder:"/content/drive/My Drive/Colab_Notebooks/fight_recognition/" 
# Example, colab folder: /content/fight_recognition
!ln -s "/content/drive/My Drive/Colab_Notebooks/fight_recognition/" /content/fight_recognition
# If incorrect folder and you want to reset colab: Runtime -> Factory reset runtime

In [None]:
!pip install decord

In [None]:
!pip install tensorboard

In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
import numpy as np
import datetime, os
import time
from datetime import datetime
import pandas as pd
import math
from PIL import Image

import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')

from sklearn.model_selection import train_test_split

from tqdm import tqdm

from decord import VideoReader
from decord import bridge

from fight_recognition.SGDR import CosineAnnealingLR_with_Restart

import fight_recognition.model as ResNet
from fight_recognition.spatial_transforms import (Compose, Normalize, Resize, CenterCrop,
                                CornerCrop, MultiScaleCornerCrop,
                                RandomResizedCrop, RandomHorizontalFlip,
                                ToTensor, ScaleValue, ColorJitter,
                                PickFirstChannels)

from torch.utils.tensorboard import SummaryWriter
from itertools import product

Check if the GPU is enabled.

If not, then change CPU to GPU: Runtime -> Change runtime type -> Hardware accelerator -> GPU


In [None]:
%tensorflow_version 2.x
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
 raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

In [None]:
seed_value = 42
torch.manual_seed(seed_value)

In [None]:
print("Device_count:", torch.cuda.device_count())
print("Device_name:",torch.cuda.get_device_name(0))

In [None]:
device = torch.device('cuda:0')
print(f"Computation device: {device}")

In [None]:
# custom dataset
class VideoDataset(Dataset):
    def __init__(self, videos, labels=None, spatial_transform=None):
        self.X = videos
        self.y = labels
        self.spatial_transform = spatial_transform
    
    def __len__(self):
        return (len(self.X))
    
    def __getitem__(self, i):
        vr = VideoReader(self.X[i]) # Read video
        bridge.set_bridge('native') # native output: <class 'decord.ndarray.NDArray'>, (240, 426, 3)
        duration = len(vr) # Number of frames in video
        frames = 16
        steps = math.floor(duration/frames)
        start_frame = 0
        stop_frame = steps * frames
        
        frame_id_list = range(start_frame, stop_frame, steps) # positions of frames
        
        video_snippet = vr.get_batch(frame_id_list).asnumpy() # Will get a batch of 16 frames from video
        clip = []
        # Transform into images
        for img in video_snippet:
            im_pil = Image.fromarray(img)
            clip.append(im_pil)

        # Spatial transform on images
        if self.spatial_transform is not None:
            clip = [self.spatial_transform(img) for img in clip]

        clip = torch.stack(clip, 0).permute(1, 0, 2, 3) # [Batch, Channel, Depth, Height, Width]
   
        label = self.y[i]
        
        # Video (stacked frames): torch.Size([1, 3, 16, 112, 112])
        # Label: torch.Size([1])
        return (clip.clone().detach().requires_grad_(True), torch.tensor(label, dtype=torch.long))

In [None]:
class RunBuilder():
    @staticmethod
    def get_runs(params):

        Run = namedtuple('Run', params.keys())

        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))

        return runs

In [None]:
# Generate resnet model (34, 50)
# K - Kinetics-700
# KM - Kinetics-700 and Moments in Time
model_architecture = 50
dataset = 'KM'

model = ResNet.initialize_model(model_architecture=model_architecture, model_dataset=dataset, num_classes=2)

In [None]:
# Freeze/Unfreeze layers
for name, child in model.named_children():
    if name in ['layer4', 'fc']: # Layer that will be unfrozen
        print(name + ' is unfrozen')
        for param in child.parameters():
            param.requires_grad = True
    else:
        print(name + ' is frozen')
        for param in child.parameters():
            param.requires_grad = False  

In [None]:
model.to(device) # Network on the GPU
model.eval() # eval mode

In [None]:
total_params = sum(p.numel() for p in model.parameters())
print(f"{total_params:,} total parameters.")
total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"{total_trainable_params:,} training parameters.")

In [None]:
# training function
def fit(model, train_dataloader):
    print('Training')
    model.train() # training mode activated if no_grad() have deactivate the gradient calculation part in validation function
    train_running_loss = 0.0
    train_running_correct = 0
    for i, data in tqdm(enumerate(train_dataloader), total=int(len(train_data)/train_dataloader.batch_size)):
        data, target = data[0].to(device), data[1].to(device) 
        optimizer.zero_grad() # Reset optimizer to zero otherwise it will accumulate all the gradients
        outputs = model(data) # Input the bathed images to the model to get a output (prediction)
        
        # From the loss function we will get back a loss tensor. PyTorch have the computaional graph for the tensor that will be used in the backpropagation step 
        loss = criterion(outputs, target) # calculate the loss from the loss/error function (prediction_label - true_label)
        train_running_loss += loss.item() # new loss value to update the current training loss value
        _, preds = torch.max(input=outputs.data, dim=1) # Returns the maximum value of all elements in the input tensor
        train_running_correct += (preds == target).sum().item() # Count the right numbers of correct prediction
        loss.backward() # Calculate gradients
        optimizer.step() # Update the weights
        
    train_loss = train_running_loss/len(train_dataloader.dataset)
    train_accuracy = 100. * train_running_correct/len(train_dataloader.dataset)
    
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}")
    
    return train_loss, train_accuracy

In [None]:
#validation function
def validate(model, test_dataloader):
    print('Validating')
    model.eval()
    val_running_loss = 0.0
    val_running_correct = 0
    with torch.no_grad():
        for i, data in tqdm(enumerate(test_dataloader), total=int(len(test_data)/test_dataloader.batch_size)):
            data, target = data[0].to(device), data[1].to(device)
            outputs = model(data)
            loss = criterion(outputs, target)
            
            val_running_loss += loss.item()
            _, preds = torch.max(outputs.data, 1)
            val_running_correct += (preds == target).sum().item()
        
        val_loss = val_running_loss/len(test_dataloader.dataset)
        val_accuracy = 100. * val_running_correct/len(test_dataloader.dataset)
        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.2f}')
        
        return val_loss, val_accuracy

In [None]:
if (dataset == 'K'):
  mean = [0.4345, 0.4051, 0.3775]
  std = [0.2768, 0.2713, 0.2737]
else:
  mean = [0.5, 0.5, 0.5]
  std = [0.5, 0.5, 0.5]

sample_size = 112 # resolution of frame

# MultiScaleCornerCrop (four-corner cropping)
scales = [1.0]
scale_step = 1 / (2**(1 / 4))

for _ in range(1, 5):
    scales.append(scales[-1] * scale_step)

spatial_transform_train = Compose([
                                   MultiScaleCornerCrop(sample_size, scales),
                                   RandomHorizontalFlip(),
                                   ToTensor(),
                                   Normalize(mean, std)
                                   ])

spatial_transform_validation =  Compose([
                                        Resize(sample_size),
                                        CenterCrop(sample_size),
                                        ToTensor(),
                                        Normalize(mean, std)
                                        ])

In [None]:
# Read the data.csv file and get the video paths and labels
df = pd.read_csv('./fight_recognition/input/data.csv')
X = df.video_path.values # video paths
y = df.target.values # targets

# Split into training and valtidation/test dataset
(xtrain, xtest, ytrain, ytest) = train_test_split(X, y, test_size=0.20, random_state=seed_value)

print(f"Training videos: {len(xtrain)}")
print(f"Training labels: {len(ytrain)}\n")

print(f"Validation videos: {len(xtest)}")
print(f"Validation labels: {len(ytest)}\n")

In [None]:
# Test diffrent hyperparamters
params = OrderedDict(
    lr = [.001]
    ,batch_size = [16, 32]
    ,momentum = [.9]
)

for run in RunBuilder.get_runs(params):
  ResNet = model

  # Create training and testing dataset
  train_data = VideoDataset(xtrain, ytrain, spatial_transform_train)
  test_data = VideoDataset(xtest, ytest, spatial_transform_validation)

  TrainLoader = DataLoader(train_data, batch_size=run.batch_size, shuffle=True) # If shuffle is set to True, it will have the data reshuffled at every epoch
  TestLoader = DataLoader(test_data, batch_size=run.batch_size, shuffle=False)

  optimizer = optim.SGD([{'params': ResNet.layer4.parameters()}, 
                        {'params': ResNet.fc.parameters(), 'lr': run.lr}
                          ], lr=run.lr*1e-2,momentum=run.momentum, weight_decay=0.0001)


  criterion = nn.CrossEntropyLoss()

  # SGDR
  t_mult = 1 # Cycle multiplication
  t_max = 20 # Maximum number of iterations/epochs
  scheduler = CosineAnnealingLR_with_Restart(optimizer, T_max=t_max, T_mult=t_mult, model=ResNet,
                                             out_dir='./fight_recognition/outputs/snapshots/',
                                             take_snapshot=True,
                                             eta_min=1e-9)

  comment = f'-{run}' # Comments in Tensorboard for each run
  current_time = datetime.now().strftime("Date_%Y-%m-%d_Time_%H-%M-%S")

  log_dir = os.path.join(
      'runs', 
      current_time + '_' + comment
  )

  tb = SummaryWriter(log_dir=log_dir) # In "runs" folder

  epochs = 10
  start = time.time()
  for epoch in range(epochs):
      scheduler.step() # SGDR
      print(f"Epoch {epoch+1} of {epochs}")
      train_epoch_loss, train_epoch_accuracy = fit(ResNet, TrainLoader) # Train network
      val_epoch_loss, val_epoch_accuracy = validate(ResNet, TestLoader) # Validate network on test/validation dataset

      # Add to Scalar in Tensorboard
      tb.add_scalar("Training Loss", train_epoch_loss * run.batch_size, epoch)
      tb.add_scalar("Valifation Loss", val_epoch_loss * run.batch_size, epoch)
      tb.add_scalar("Training accuracy", train_epoch_accuracy, epoch)
      tb.add_scalar("Valifation accuracy", val_epoch_accuracy, epoch)

  end = time.time()

  tb.close()
  torch.cuda.empty_cache()

  print(f"{(end-start)/60:.3f} minutes")

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs