Baseline model reference: https://github.com/TwentyBN/something-something-v2-baseline


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
%pwd
%cd /content/gdrive/MyDrive/something-something/
%pwd

/content/gdrive/MyDrive/something-something


'/content/gdrive/MyDrive/something-something'

In [None]:
#%ls | wc -l

In [None]:
#Unzip something-something-v2 dataset
#!cat 20bn-something-something-v2-?? | tar zx

In [None]:
!pip install av



In [None]:
import os
import cv2
import sys
import importlib
import torch
import torchvision
import numpy as np
import torch.nn as nn
import signal
import time
#import torch.utils.data
from torch.utils.data.dataset import Subset

In [None]:
sys.path.insert(0, '/content/gdrive/MyDrive/something-something/code/')

In [None]:
from data_parser import WebmDataset
from data_loader_av import VideoFolder

from models.multi_column import MultiColumn
from transforms_video import *
from grad_cam_videos import GradCam
from callbacks import (PlotLearning, AverageMeter)

from utils import *
from pprint import pprint

In [None]:
import io
import base64
from IPython.display import HTML

In [None]:
config = {
    "model_name": "model_convlstm",
    "output_dir": "/content/gdrive/MyDrive/something-something/trained_models/",

    "input_mode": "av",

    "data_folder": "/content/gdrive/MyDrive/something-something/something-something-dataset/20bn-something-something-v2/",

    "json_data_train": "/content/gdrive/MyDrive/something-something/something-something-dataset/annotations/something-something-v2-train.json",
    "json_data_val": "/content/gdrive/MyDrive/something-something/something-something-dataset/annotations/something-something-v2-validation.json",
    "json_data_test": "/content/gdrive/MyDrive/something-something/something-something-dataset/annotations/something-something-v2-test.json",

    "json_file_labels": "/content/gdrive/MyDrive/something-something/something-something-dataset/annotations/something-something-v2-labels.json",

    "num_workers": 0,

    "num_classes": 174,
    "batch_size": 10,
    "clip_size": 60,
    
    "nclips_train": 1,
    "nclips_val": 1,

    "upscale_factor_train": 1.4,
    "upscale_factor_eval": 1.0,

    "step_size_train": 1,
    "step_size_val": 1,

    "lr": 0.008,
    "last_lr": 0.00001,
    "momentum": 0.9,
    "weight_decay": 0.00001,
    "num_epochs": 100,
    "print_freq": 100,

    "conv_model": "models.model3D_1",
    "input_spatial_size": 64,

    "column_units": 512,
    "save_features": True,
    
    "mode" : 'train',
    "start_epoch" : 0
}

In [None]:
class MultiColumn(nn.Module):

    def __init__(self, num_classes, conv_column, column_units,
                 clf_layers=None):
        """
        - Example multi-column network
        - Useful when a video sample is too long and has to be split into
          multiple clips
        - Processes 3D-CNN on each clip and averages resulting features across
          clips before passing it to classification(FC) layer

        Args:
        - Input: Takes in a list of tensors each of size
                 (batch_size, 3, sequence_length, W, H)
        - Returns: logits of size (batch size, num_classes)
        """
        super(MultiColumn, self).__init__()
        self.num_classes = num_classes
        self.column_units = column_units
        self.conv_column = conv_column(64,3)
        self.clf_layers = clf_layers

        if not self.clf_layers:
            self.clf_layers = torch.nn.Sequential(
                                 nn.Linear(column_units, self.num_classes)
                                )

    def forward(self, inputs, get_features=False):
        outputs = []
        num_cols = len(inputs)

        for idx in range(num_cols):
            x = inputs[idx]
            x = x.permute(0, 2, 1, 3, 4)
            x1 = self.conv_column(x)
            outputs.append(x1)

        outputs = torch.stack(outputs).permute(1, 0, 2)
        outputs = torch.squeeze(torch.sum(outputs, 1), 1)
        avg_output = outputs / float(num_cols)
        outputs = self.clf_layers(avg_output)
        if get_features:
            return outputs, avg_output
        else:
            return outputs

In [None]:
class ConvLSTMCell(nn.Module):

    def __init__(self, input_dim, hidden_dim, kernel_size, bias):
        """
        Initialize ConvLSTM cell.

        Parameters
        ----------
        input_dim: int
            Number of channels of input tensor.
        hidden_dim: int
            Number of channels of hidden state.
        kernel_size: (int, int)
            Size of the convolutional kernel.
        bias: bool
            Whether or not to add the bias.
        """

        super(ConvLSTMCell, self).__init__()

        self.input_dim = input_dim
        self.hidden_dim = hidden_dim

        self.kernel_size = kernel_size
        self.padding = kernel_size[0] // 2, kernel_size[1] // 2
        self.bias = bias

        self.conv = nn.Conv2d(in_channels=self.input_dim + self.hidden_dim,
                              out_channels=4 * self.hidden_dim,
                              kernel_size=self.kernel_size,
                              padding=self.padding,
                              bias=self.bias)

    def forward(self, input_tensor, cur_state):
        h_cur, c_cur = cur_state
        combined = torch.cat([input_tensor, h_cur], dim=1)  # concatenate along channel axis
        combined_conv = self.conv(combined)
        cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_dim, dim=1)
        i = torch.sigmoid(cc_i)
        f = torch.sigmoid(cc_f)
        o = torch.sigmoid(cc_o)
        g = torch.tanh(cc_g)

        c_next = f * c_cur + i * g
        h_next = o * torch.tanh(c_next)

        return h_next, c_next

    def init_hidden(self, batch_size, image_size):
        height, width = image_size
        return (torch.zeros(batch_size, self.hidden_dim, height, width, device=self.conv.weight.device),
                torch.zeros(batch_size, self.hidden_dim, height, width, device=self.conv.weight.device))

In [None]:
class EncoderDecoderConvLSTM(nn.Module):
    def __init__(self, nf, in_chan):
        super(EncoderDecoderConvLSTM, self).__init__()

        """ ARCHITECTURE 

        # Encoder (ConvLSTM)
        # Encoder Vector (final hidden state of encoder)
        # Decoder (ConvLSTM) - takes Encoder Vector as input
        # Decoder (3D CNN) - produces regression predictions for our model

        """
        self.encoder_1_convlstm = ConvLSTMCell(input_dim=in_chan,
                                               hidden_dim=nf,
                                               kernel_size=(3, 3),
                                               bias=True)

        self.encoder_2_convlstm = ConvLSTMCell(input_dim=nf,
                                               hidden_dim=nf,
                                               kernel_size=(3, 3),
                                               bias=True)

        self.decoder_1_convlstm = ConvLSTMCell(input_dim=nf,  # nf + 1
                                               hidden_dim=nf,
                                               kernel_size=(3, 3),
                                               bias=True)

        self.decoder_2_convlstm = ConvLSTMCell(input_dim=nf,
                                               hidden_dim=nf,
                                               kernel_size=(3, 3),
                                               bias=True)

        self.decoder_CNN = nn.Conv3d(in_channels=nf,
                                     out_channels=512,
                                     kernel_size=(3, 3, 3),
                                     padding=(1, 1, 1))


    def autoencoder(self, x, seq_len, future_step, h_t, c_t, h_t2, c_t2, h_t3, c_t3, h_t4, c_t4):

        outputs = []

        # encoder
        for t in range(seq_len):
            h_t, c_t = self.encoder_1_convlstm(input_tensor=x[:, t, :, :, :],
                                               cur_state=[h_t, c_t])  # we could concat to provide skip conn here
            h_t2, c_t2 = self.encoder_2_convlstm(input_tensor=h_t,
                                                 cur_state=[h_t2, c_t2])  # we could concat to provide skip conn here
        # encoder_vector
        encoder_vector = h_t2

        # decoder
        for t in range(future_step):
            h_t3, c_t3 = self.decoder_1_convlstm(input_tensor=encoder_vector,
                                                 cur_state=[h_t3, c_t3])  # we could concat to provide skip conn here
            h_t4, c_t4 = self.decoder_2_convlstm(input_tensor=h_t3,
                                                 cur_state=[h_t4, c_t4])  # we could concat to provide skip conn here
            encoder_vector = h_t4
            outputs += [h_t4]  # predictions
        outputs = torch.stack(outputs, 1)
        outputs = outputs.permute(0, 2, 1, 3, 4)
        outputs = self.decoder_CNN(outputs)
        outputs = outputs.mean(-1).mean(-1).mean(-1)

        return outputs

    def forward(self, x, future_seq=10, hidden_state=None):

        """
        Parameters
        ----------
        input_tensor:
            5-D Tensor of shape (b, t, c, h, w)        #   batch, time, channel, height, width
        """

        # find size of different input dimensions
        b, seq_len, _, h, w = x.size()

        # initialize hidden states
        h_t, c_t = self.encoder_1_convlstm.init_hidden(batch_size=b, image_size=(h, w))
        h_t2, c_t2 = self.encoder_2_convlstm.init_hidden(batch_size=b, image_size=(h, w))
        h_t3, c_t3 = self.decoder_1_convlstm.init_hidden(batch_size=b, image_size=(h, w))
        h_t4, c_t4 = self.decoder_2_convlstm.init_hidden(batch_size=b, image_size=(h, w))

        # autoencoder forward
        outputs = self.autoencoder(x, seq_len, future_seq, h_t, c_t, h_t2, c_t2, h_t3, c_t3, h_t4, c_t4)

        return outputs

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device_ids = []
if device.type == "cuda":
    # How many GPUs are there?
    print(torch.cuda.device_count())
    device_ids = [torch.cuda.current_device()]
print(device, device_ids)

1
cuda [0]


In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Mon Apr 26 04:22:48 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    25W / 300W |      2MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('To enable a high-RAM runtime, select the Runtime > "Change runtime type"')
  print('menu, and then select High-RAM in the Runtime shape dropdown. Then, ')
  print('re-execute this cell.')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 27.4 gigabytes of available RAM

You are using a high-RAM runtime!


In [None]:
global best_loss
best_loss = float('Inf')

In [None]:
if config["input_mode"] == "av":
    from data_loader_av import VideoFolder
elif config["input_mode"] == "skvideo":
    from data_loader_skvideo import VideoFolder
else:
    raise ValueError("Please provide a valid input mode")

In [None]:
# set run output folder
model_name = config["model_name"]
output_dir = config["output_dir"]
save_dir = os.path.join(output_dir, model_name)
print(" > Output folder for this run -- {}".format(save_dir))
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
    os.makedirs(os.path.join(save_dir, 'plots'))

 > Output folder for this run -- /content/gdrive/MyDrive/something-something/trained_models/model_convlstm


In [None]:
%cd /content/gdrive/MyDrive/something-something/
%ls

/content/gdrive/MyDrive/something-something
20bn-something-something-v2-00  20bn-something-something-v2-12
20bn-something-something-v2-01  20bn-something-something-v2-13
20bn-something-something-v2-02  20bn-something-something-v2-14
20bn-something-something-v2-03  20bn-something-something-v2-15
20bn-something-something-v2-04  20bn-something-something-v2-16
20bn-something-something-v2-05  20bn-something-something-v2-17
20bn-something-something-v2-06  20bn-something-something-v2-18
20bn-something-something-v2-07  20bn-something-something-v2-19
20bn-something-something-v2-08  [0m[01;34mcode[0m/
20bn-something-something-v2-09  [01;34msomething-something-dataset[0m/
20bn-something-something-v2-10  [01;34mtrained_models[0m/
20bn-something-something-v2-11


In [None]:
# create model
print(" > Creating model ... !")
model = MultiColumn(config['num_classes'], EncoderDecoderConvLSTM,
                        int(config["column_units"]))

 > Creating model ... !


In [None]:
# multi GPU setting
model = torch.nn.DataParallel(model, device_ids).to(device)

In [None]:
# optionally resume from a checkpoint
checkpoint_path = os.path.join(config['output_dir'],
                                   config['model_name'],
                                   'model_best.pth.tar')

In [None]:
if config['mode'] is 'resume':
    if os.path.isfile(checkpoint_path):
        print(" > Loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(checkpoint_path)
        args.start_epoch = checkpoint['epoch']
        best_loss = checkpoint['best_loss']
        model.load_state_dict(checkpoint['state_dict'])
        print(" > Loaded checkpoint '{}' (epoch {})"
              .format(checkpoint_path, checkpoint['epoch']))
    else:
        print(" !#! No checkpoint found at '{}'".format(
            checkpoint_path))

In [None]:
# define augmentation pipeline
upscale_size_train = int(config['input_spatial_size'] * config["upscale_factor_train"])
upscale_size_eval = int(config['input_spatial_size'] * config["upscale_factor_eval"])

In [None]:
# Random crop videos during training
transform_train_pre = ComposeMix([
        [RandomRotationVideo(15), "vid"],
        [Scale(upscale_size_train), "img"],
        [RandomCropVideo(config['input_spatial_size']), "vid"],
         ])

# Center crop videos during evaluation
transform_eval_pre = ComposeMix([
        [Scale(upscale_size_eval), "img"],
        [torchvision.transforms.ToPILImage(), "img"],
        [torchvision.transforms.CenterCrop(config['input_spatial_size']), "img"],
         ])

# Transforms common to train and eval sets and applied after "pre" transforms
transform_post = ComposeMix([
        [torchvision.transforms.ToTensor(), "img"],
        [torchvision.transforms.Normalize(
                   mean=[0.485, 0.456, 0.406],  # default values for imagenet
                   std=[0.229, 0.224, 0.225]), "img"]
         ])

In [None]:
train_data = VideoFolder(root=config['data_folder'],
                             json_file_input=config['json_data_train'],
                             json_file_labels=config['json_file_labels'],
                             clip_size=config['clip_size'],
                             nclips=config['nclips_train'],
                             step_size=config['step_size_train'],
                             is_val=False,
                             transform_pre=transform_train_pre,
                             transform_post=transform_post,
                             #augmentation_mappings_json=config['augmentation_mappings_json'],
                             #augmentation_types_todo=config['augmentation_types_todo'],
                             get_item_id=False,
                             )
print(len(train_data))
train_data = Subset(train_data, np.arange(20000))
print(train_data.dataset.classes)
print(len(train_data))

168913
['Approaching something with your camera', 'Attaching something to something', 'Bending something so that it deforms', 'Bending something until it breaks', 'Burying something in something', 'Closing something', 'Covering something with something', 'Digging something out of something', 'Dropping something behind something', 'Dropping something in front of something', 'Dropping something into something', 'Dropping something next to something', 'Dropping something onto something', 'Failing to put something into something because something does not fit', 'Folding something', 'Hitting something with something', 'Holding something', 'Holding something behind something', 'Holding something in front of something', 'Holding something next to something', 'Holding something over something', 'Laying something on the table on its side, not upright', 'Letting something roll along a flat surface', 'Letting something roll down a slanted surface', 'Letting something roll up a slanted surface, so

In [None]:
print(" > Using {} processes for data loader.".format(
        config["num_workers"]))

 > Using 0 processes for data loader.


In [None]:
#def my_collate(batch):
#    "Puts each data field into a tensor with outer dimension batch size"
#    batch = filter (lambda x:x is not None, batch)
#    return torch.utils.data.dataloader.default_collate(list(batch))

In [None]:
train_loader = torch.utils.data.DataLoader(
        train_data,
        batch_size=config['batch_size'], shuffle=False,
        num_workers=config['num_workers'], pin_memory=True,
        drop_last=True)

In [None]:
val_data = VideoFolder(root=config['data_folder'],
                           json_file_input=config['json_data_val'],
                           json_file_labels=config['json_file_labels'],
                           clip_size=config['clip_size'],
                           nclips=config['nclips_val'],
                           step_size=config['step_size_val'],
                           is_val=True,
                           transform_pre=transform_eval_pre,
                           transform_post=transform_post,
                           get_item_id=True,
                           )
val_data = Subset(val_data, np.arange(1000))

In [None]:
val_loader = torch.utils.data.DataLoader(
        val_data,
        batch_size=config['batch_size'], shuffle=False,
        num_workers=config['num_workers'], pin_memory=True,
        drop_last=False)

In [None]:
test_data = VideoFolder(root=config['data_folder'],
                            json_file_input=config['json_data_test'],
                            json_file_labels=config['json_file_labels'],
                            clip_size=config['clip_size'],
                            nclips=config['nclips_val'],
                            step_size=config['step_size_val'],
                            is_val=True,
                            transform_pre=transform_eval_pre,
                            transform_post=transform_post,
                            get_item_id=True,
                            is_test=True,
                            )
test_data = Subset(test_data, np.arange(1000))

In [None]:
test_loader = torch.utils.data.DataLoader(
        test_data,
        batch_size=config['batch_size'], shuffle=False,
        num_workers=config['num_workers'], pin_memory=True,
        drop_last=False)

In [None]:
print(" > Number of dataset classes : {}".format(len(train_data.dataset.classes)))
assert len(train_data.dataset.classes) == config["num_classes"]

 > Number of dataset classes : 174


In [None]:
# define loss function (criterion)
criterion = nn.CrossEntropyLoss().to(device)

In [None]:
# define optimizer
lr = config["lr"]
last_lr = config["last_lr"]
momentum = config['momentum']
weight_decay = config['weight_decay']
optimizer = torch.optim.SGD(model.parameters(), lr,
                            momentum=momentum,
                            weight_decay=weight_decay)

In [None]:
# **************************Only Validate***********************
if config["mode"] == "validate":
        validate(test_loader, model, criterion, train_data.dataset.classes_dict)
        print(" > Evaluation DONE !")

In [None]:
# set callbacks
plotter = PlotLearning(os.path.join(
    save_dir, "plots"), config["num_classes"])
lr_decayer = torch.optim.lr_scheduler.ReduceLROnPlateau(
                    optimizer, 'min', factor=0.5, patience=2, verbose=True)
val_loss = float('Inf')

In [None]:
# set end condition by num epochs
num_epochs = int(config["num_epochs"])
if num_epochs == -1:
    num_epochs = 999999

In [None]:
print(" > Training is getting started...")
print(" > Training takes {} epochs.".format(num_epochs))
start_epoch = config["start_epoch"] #args.start_epoch if args.resume else 0

 > Training is getting started...
 > Training takes 100 epochs.


In [None]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)

        if config['nclips_train'] > 1:
            input_var = list(input.split(config['clip_size'], 2))
            for idx, inp in enumerate(input_var):
                input_var[idx] = inp.to(device)
        else:
            input_var = [input.to(device)]

        target = target.to(device)

        model.zero_grad()
        # compute output and loss
        output = model(input_var)
        loss = criterion(output, target)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.detach().cpu(), target.detach().cpu(), topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % config["print_freq"] == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                      epoch, i, len(train_loader), batch_time=batch_time,
                      data_time=data_time, loss=losses, top1=top1, top5=top5))
    return losses.avg, top1.avg, top5.avg

In [None]:
def validate(val_loader, model, criterion, class_to_idx=None):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    logits_matrix = []
    features_matrix = []
    targets_list = []
    item_id_list = []

    end = time.time()
    with torch.no_grad():
        for i, (input, target, item_id) in enumerate(val_loader):

            if config['nclips_val'] > 1:
                input_var = list(input.split(config['clip_size'], 2))
                for idx, inp in enumerate(input_var):
                    input_var[idx] = inp.to(device)
            else:
                input_var = [input.to(device)]

            target = target.to(device)

            # compute output and loss
            output, features = model(input_var, config['save_features'])
            loss = criterion(output, target)

            if config["mode"] == 'validate':
                logits_matrix.append(output.cpu().data.numpy())
                features_matrix.append(features.cpu().data.numpy())
                targets_list.append(target.cpu().numpy())
                item_id_list.append(item_id)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output.detach().cpu(), target.detach().cpu(), topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(prec1.item(), input.size(0))
            top5.update(prec5.item(), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % config["print_freq"] == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i, len(val_loader), batch_time=batch_time, loss=losses,
                          top1=top1, top5=top5))

    print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
          .format(top1=top1, top5=top5))

    if config["mode"] == 'validate':
        logits_matrix = np.concatenate(logits_matrix)
        features_matrix = np.concatenate(features_matrix)
        targets_list = np.concatenate(targets_list)
        item_id_list = np.concatenate(item_id_list)
        print(logits_matrix.shape, targets_list.shape, item_id_list.shape)
        save_results(logits_matrix, features_matrix, targets_list,
                     item_id_list, class_to_idx, config)
        get_submission(logits_matrix, item_id_list, class_to_idx, config)
    return losses.avg, top1.avg, top5.avg

In [None]:
for epoch in range(start_epoch, num_epochs):

    lrs = [params['lr'] for params in optimizer.param_groups]
    print(" > Current LR(s) -- {}".format(lrs))
    if np.max(lr) < last_lr and last_lr > 0:
        print(" > Training is DONE by learning rate {}".format(last_lr))
        break

    # train for one epoch
    train_loss, train_top1, train_top5 = train(
        train_loader, model, criterion, optimizer, epoch)

    # evaluate on validation set
    val_loss, val_top1, val_top5 = validate(val_loader, model, criterion)

    # set learning rate
    lr_decayer.step(val_loss, epoch)

    # plot learning
    plotter_dict = {}
    plotter_dict['loss'] = train_loss
    plotter_dict['val_loss'] = val_loss
    plotter_dict['acc'] = train_top1 / 100
    plotter_dict['val_acc'] = val_top1 / 100
    plotter_dict['learning_rate'] = lr
    plotter.plot(plotter_dict)

    print(" > Validation loss after epoch {} = {}".format(epoch, val_loss))

    # remember best loss and save the checkpoint
    is_best = val_loss < best_loss
    best_loss = min(val_loss, best_loss)
    save_checkpoint({
        'epoch': epoch + 1,
        'arch': "Conv4Col",
        'state_dict': model.state_dict(),
        'best_loss': best_loss,
    }, is_best, config)

 > Current LR(s) -- [0.008]


  imgs = [f.to_rgb().to_nd_array() for f in reader.decode(video=0)]


Epoch: [0][0/2000]	Time 1.780 (1.780)	Data 1.209 (1.209)	Loss 5.1696 (5.1696)	Prec@1 0.000 (0.000)	Prec@5 10.000 (10.000)
Epoch: [0][100/2000]	Time 1.749 (1.709)	Data 1.210 (1.169)	Loss 5.1640 (5.1470)	Prec@1 0.000 (0.891)	Prec@5 0.000 (5.743)
Epoch: [0][200/2000]	Time 1.686 (1.704)	Data 1.146 (1.164)	Loss 5.0639 (5.1315)	Prec@1 0.000 (1.244)	Prec@5 10.000 (7.313)
Epoch: [0][300/2000]	Time 1.756 (1.705)	Data 1.216 (1.165)	Loss 5.0399 (5.1122)	Prec@1 10.000 (1.329)	Prec@5 10.000 (7.542)
Epoch: [0][400/2000]	Time 1.629 (1.704)	Data 1.089 (1.164)	Loss 4.8714 (5.0874)	Prec@1 0.000 (1.372)	Prec@5 0.000 (7.406)
Epoch: [0][500/2000]	Time 1.713 (1.703)	Data 1.174 (1.163)	Loss 4.9290 (5.0704)	Prec@1 0.000 (1.597)	Prec@5 0.000 (7.605)
Epoch: [0][600/2000]	Time 1.690 (1.701)	Data 1.150 (1.161)	Loss 4.8096 (5.0581)	Prec@1 10.000 (1.631)	Prec@5 20.000 (7.438)
Epoch: [0][700/2000]	Time 1.669 (1.700)	Data 1.129 (1.161)	Loss 4.6283 (5.0463)	Prec@1 10.000 (1.655)	Prec@5 40.000 (7.532)
Epoch: [0][800/20



 > Validation loss after epoch 0 = 5.0240919876098635
 > Best model found at this epoch. Saving ...
 > Current LR(s) -- [0.008]
Epoch: [1][0/2000]	Time 1.738 (1.738)	Data 1.197 (1.197)	Loss 5.0068 (5.0068)	Prec@1 0.000 (0.000)	Prec@5 10.000 (10.000)
Epoch: [1][100/2000]	Time 1.718 (1.697)	Data 1.178 (1.157)	Loss 5.1090 (4.9579)	Prec@1 0.000 (1.980)	Prec@5 0.000 (9.802)
Epoch: [1][200/2000]	Time 1.695 (1.691)	Data 1.155 (1.151)	Loss 4.7872 (4.9660)	Prec@1 0.000 (1.891)	Prec@5 20.000 (9.403)
Epoch: [1][300/2000]	Time 1.707 (1.692)	Data 1.167 (1.152)	Loss 4.9953 (4.9667)	Prec@1 10.000 (1.827)	Prec@5 10.000 (9.070)
Epoch: [1][400/2000]	Time 1.683 (1.691)	Data 1.143 (1.150)	Loss 4.7932 (4.9672)	Prec@1 0.000 (1.721)	Prec@5 10.000 (8.703)
Epoch: [1][500/2000]	Time 1.696 (1.692)	Data 1.157 (1.152)	Loss 4.8989 (4.9685)	Prec@1 0.000 (1.836)	Prec@5 0.000 (8.643)
Epoch: [1][600/2000]	Time 1.669 (1.692)	Data 1.128 (1.152)	Loss 4.8304 (4.9684)	Prec@1 10.000 (1.797)	Prec@5 20.000 (8.436)
Epoch: [1][7