Baseline model reference: https://github.com/TwentyBN/something-something-v2-baseline


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
%pwd
%cd /content/gdrive/MyDrive/something-something/
%pwd

/content/gdrive/MyDrive/something-something


'/content/gdrive/MyDrive/something-something'

In [None]:
#%ls | wc -l

In [None]:
#Unzip something-something-v2 dataset
#!cat 20bn-something-something-v2-?? | tar zx

In [None]:
!pip install av



In [None]:
import os
import cv2
import sys
import importlib
import torch
import torchvision
import numpy as np
import torch.nn as nn
import signal
import time
#import torch.utils.data

In [None]:
sys.path.insert(0, '/content/gdrive/MyDrive/something-something/code/')

In [None]:
from data_parser import WebmDataset
from data_loader_av import VideoFolder

from models.multi_column import MultiColumn
from transforms_video import *
from grad_cam_videos import GradCam
from callbacks import (PlotLearning, AverageMeter)

from utils import *
from pprint import pprint

from math import factorial
from torch.nn.utils import weight_norm

In [None]:
import io
import base64
from IPython.display import HTML

In [None]:
ttype = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor

In [None]:
config = {
    "model_name": "model_sith",
    "output_dir": "trained_models/",

    "input_mode": "av",

    "data_folder": "/content/gdrive/MyDrive/something-something/something-something-dataset/20bn-something-something-v2/",

    "json_data_train": "/content/gdrive/MyDrive/something-something/something-something-dataset/annotations/something-something-v2-train.json",
    "json_data_val": "/content/gdrive/MyDrive/something-something/something-something-dataset/annotations/something-something-v2-validation.json",
    "json_data_test": "/content/gdrive/MyDrive/something-something/something-something-dataset/annotations/something-something-v2-test.json",

    "json_file_labels": "/content/gdrive/MyDrive/something-something/something-something-dataset/annotations/something-something-v2-labels.json",

    "num_workers": 0,

    "num_classes": 174,
    "batch_size": 10,
    "clip_size": 60,
    
    "nclips_train": 1,
    "nclips_val": 1,

    "upscale_factor_train": 1.4,
    "upscale_factor_eval": 1.0,

    "step_size_train": 1,
    "step_size_val": 1,

    "lr": 0.008,
    "last_lr": 0.00001,
    "momentum": 0.9,
    "weight_decay": 0.00001,
    "num_epochs": 1,
    "print_freq": 100,

    "conv_model": "models.model3D_1",
    "input_spatial_size": 64,

    "column_units": 512,
    "save_features": True,
    
    "mode" : 'train',
    "start_epoch" : 0
}

In [None]:
sith_params1 = {"in_features":123008, 
                "tau_min":1, "tau_max":20.0, 'buff_max':40,
                "k":50,
                "ntau":5, 'g':0,  
                "ttype":ttype, 
                "hidden_size":10, "act_func":nn.ReLU()}
sith_params2 = {"in_features":sith_params1['hidden_size'], 
                "tau_min":1, "tau_max":200.0,  'buff_max':240,
                "k":50,
                "ntau":5, 'g':0, 
                "ttype":ttype, 
                "hidden_size":20, "act_func":nn.ReLU()}
layer_params = [sith_params1, sith_params2]
dropout=.0

In [None]:
class MultiColumn(nn.Module):

    def __init__(self, num_classes, conv_column, column_units,
                 clf_layers=None):
        """
        - Example multi-column network
        - Useful when a video sample is too long and has to be split into
          multiple clips
        - Processes 3D-CNN on each clip and averages resulting features across
          clips before passing it to classification(FC) layer

        Args:
        - Input: Takes in a list of tensors each of size
                 (batch_size, 3, sequence_length, W, H)
        - Returns: logits of size (batch size, num_classes)
        """
        super(MultiColumn, self).__init__()
        self.num_classes = num_classes
        self.column_units = column_units
        self.conv_column = conv_column(layer_params,dropout)
        self.clf_layers = clf_layers

        if not self.clf_layers:
            self.clf_layers = torch.nn.Sequential(
                                 nn.Linear(column_units, self.num_classes)
                                )

    def forward(self, inputs, get_features=False):
        outputs = []
        num_cols = len(inputs)

        for idx in range(num_cols):
            x = inputs[idx]
            x = x.permute(0, 2, 1, 3, 4)
            x1 = self.conv_column(x)
            outputs.append(x1)

        outputs = torch.stack(outputs).permute(1, 0, 2)
        outputs = torch.squeeze(torch.sum(outputs, 1), 1)
        avg_output = outputs / float(num_cols)
        outputs = self.clf_layers(avg_output)
        if get_features:
            return outputs, avg_output
        else:
            return outputs

In [None]:
# Impulse-based SITH class
class iSITH(torch.nn.Module):
    def __init__(self, tau_min=.1, tau_max=100., buff_max=None, k=50, ntau=50, dt=1, g=0.0,
                 ttype=torch.FloatTensor):
        super(iSITH, self).__init__()
        """A SITH module using the perfect equation for the resulting ftilde
        
        Parameters
        ----------
        
            - tau_min: float
                The center of the temporal receptive field for the first taustar produced. 
            - tau_max: float
                The center of the temporal receptive field for the last taustar produced. 
            - buff_max: int
                The maximum time in which the filters go into the past. NOTE: In order to 
                achieve as few edge effects as possible, buff_max needs to be bigger than
                tau_max, and dependent on k, such that the filters have enough time to reach 
                very close to 0.0. Plot the filters and you will see them go to 0. 
            - k: int
                Temporal Specificity of the taustars. If this number is high, then taustars
                will always be more narrow.
            - ntau: int
                Number of taustars produced, spread out logarithmically.
            - dt: float
                The time delta of the model. The there will be int(buff_max/dt) filters per
                taustar. Essentially this is the base rate of information being presented to the model
            - g: float
                Typically between 0 and 1. This parameter is the scaling factor of the output
                of the module. If set to 1, the output amplitude for a delta function will be
                identical through time. If set to 0, the amplitude will decay into the past, 
                getting smaller and smaller. This value should be picked on an application to 
                application basis.
            - ttype: Torch Tensor
                This is the type we set the internal mechanism of the model to before running. 
                In order to calculate the filters, we must use a DoubleTensor, but this is no 
                longer necessary after they are calculated. By default we set the filters to 
                be FloatTensors. NOTE: If you plan to use CUDA, you need to pass in a 
                cuda.FloatTensor as the ttype, as using .cuda() will not put these filters on 
                the gpu. 
            
                
        """
        self.k = k
        self.tau_min = tau_min
        self.tau_max = tau_max
        if buff_max is None:
            buff_max = 3*tau_max
        self.buff_max = buff_max
        self.ntau = ntau
        self.dt = dt
        self.g = g
        
        self.c = (tau_max/tau_min)**(1./(ntau-1))-1
        
        self.tau_star = tau_min*(1+self.c)**torch.arange(ntau).type(torch.DoubleTensor)
        
        self.times = torch.arange(dt, buff_max+dt, dt).type(torch.DoubleTensor)
        
        A = ((1/self.tau_star)*(k**(k+1)/factorial(k))*(self.tau_star**self.g)).unsqueeze(1)
        self.filters = A*((self.times.unsqueeze(0)/self.tau_star.unsqueeze(1))**(k+1)) * \
                        torch.exp(k*(-self.times.unsqueeze(0)/self.tau_star.unsqueeze(1)))
        self.filters = torch.flip(self.filters, [-1]).unsqueeze(1).unsqueeze(1)
        self.filters = self.filters.type(ttype)
    
    def extra_repr(self):
        s = "ntau={ntau}, tau_min={tau_min}, tau_max={tau_max}, buff_max={buff_max}, dt={dt}, k={k}, g={g}"
        s = s.format(**self.__dict__)
        return s    
    
    def forward(self, inp):
        """Takes in (Batch, 1, features, sequence) and returns (Batch, Taustar, features, sequence)"""
        assert(len(inp.shape) >= 4)        
        out = torch.conv2d(inp, self.filters[:, :, :, -inp.shape[-1]:], 
                           padding=[0, self.filters[:, :, :, -inp.shape[-1]:].shape[-1]])
                           #padding=[0, self.filters.shape[-1]])
        # note we're scaling the output by both dt and the k/(k+1)
        # Off by 1 introduced by the conv2d
        return out[:, :, :, 1:inp.shape[-1]+1]*self.dt*self.k/(self.k+1)

In [None]:
class _DeepSITH_core(nn.Module):
    def __init__(self, layer_params):
        super(_DeepSITH_core, self).__init__()

        hidden_size = layer_params.pop('hidden_size', layer_params['in_features'])
        in_features = layer_params.pop('in_features', None)
        act_func = layer_params.pop('act_func', None)

        self.sith = iSITH(**layer_params)

        if act_func is None:
            self.linear = weight_norm(nn.Linear(layer_params['ntau']*in_features,
                                                hidden_size))
            nn.init.kaiming_normal_(self.linear.weight.data)  
        else:
            self.linear = nn.Sequential(weight_norm(nn.Linear(layer_params['ntau']*in_features,
                                                hidden_size)),
                                        act_func)
            nn.init.kaiming_normal_(self.linear[0].weight.data)  
    
    def forward(self, inp):
        x = self.sith(inp)
        x = x.transpose(3,2).transpose(2,1)
        x = x.view(x.shape[0], x.shape[1], -1)
        x = self.linear(x)
        return x


class DeepSITH(nn.Module):
    """A Module built for SITH like an LSTM
    Parameters
    ----------
    layer_params: list
        A list of dictionaries for each layer in the desired DeepSITH. All
        of the parameters needed for the SITH part of the Layers, as well as
        a hidden_size and optional act_func are required to be present.
    layer_params keys
    -----------------
    hidden_size: int (default in_features)
        The size of the output of the hidden layer. Please note that the
        in_features parameter for the next layer's SITH representation should be
        equal to the previous layer's hidden_size. This parameter will default
        to the in_features of the current SITH layer if not specified.
    act_func: torch.nn.Module (default None)
        The torch layer of the desired activation function, or None if no
        there is no desired activation function between layers.
    In addition to these keys, you must include all of the non-optional SITH
    layer keys in each dictionary. Please see the SITH docstring for
    suggestions.
    """
    def __init__(self, layer_params, dropout=.5):
        super(DeepSITH, self).__init__()

        self.encoder = nn.Sequential(nn.Conv2d(in_channels=3,
                              out_channels=32, kernel_size=(3,3)),
                              nn.Flatten())
        
        self.layers = nn.ModuleList([_DeepSITH_core(layer_params[i])
                                      for i in range(len(layer_params))])
        self.dropouts = nn.ModuleList([nn.Dropout(dropout) for i in range(len(layer_params) - 1)])

        self.decoder = nn.Sequential(nn.Linear(20, 1024),
                                     nn.Unflatten(2, (32, 32)))
        
        self.decoder_conv3d = nn.Conv3d(in_channels=1,
                                     out_channels=512,
                                     kernel_size=(3, 3, 3),
                                     padding=(0, 1, 1))
        
    def forward(self, inp):
        x = inp

        encoded = []
        for t in range(x.shape[1]):
            encoded += [self.encoder(x[:,t,:,:,:])]
        
        encoded_stacked = torch.stack(encoded, 1)

        encoded_stacked.unsqueeze_(1)

        encoded_stacked = encoded_stacked.permute(0,1,3,2)

        for i, l in enumerate(self.layers[:-1]):
            x = l(encoded_stacked)
            x = self.dropouts[i](x)
            x = x.unsqueeze(1).transpose(3,2)
        x = self.layers[-1](x)

        outputs = self.decoder(x)
        outputs.unsqueeze_(1)
        outputs = self.decoder_conv3d(outputs)
        outputs = torch.nn.Sigmoid()(outputs)
        outputs = outputs.mean(-1).mean(-1).mean(-1)

        return outputs

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device_ids = []
if device.type == "cuda":
    # How many GPUs are there?
    print(torch.cuda.device_count())
    device_ids = [torch.cuda.current_device()]
print(device, device_ids)

1
cuda [0]


In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Sun Apr 25 04:59:45 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    25W / 300W |      2MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('To enable a high-RAM runtime, select the Runtime > "Change runtime type"')
  print('menu, and then select High-RAM in the Runtime shape dropdown. Then, ')
  print('re-execute this cell.')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 27.4 gigabytes of available RAM

You are using a high-RAM runtime!


In [None]:
global best_loss
best_loss = float('Inf')

In [None]:
if config["input_mode"] == "av":
    from data_loader_av import VideoFolder
elif config["input_mode"] == "skvideo":
    from data_loader_skvideo import VideoFolder
else:
    raise ValueError("Please provide a valid input mode")

In [None]:
# set run output folder
model_name = config["model_name"]
output_dir = config["output_dir"]
save_dir = os.path.join(output_dir, model_name)
print(" > Output folder for this run -- {}".format(save_dir))
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
    os.makedirs(os.path.join(save_dir, 'plots'))

 > Output folder for this run -- trained_models/model3D_1


In [None]:
%cd /content/
%ls

/content
[0m[01;34mgdrive[0m/  [01;34msample_data[0m/


In [None]:
# create model
print(" > Creating model ... !")
model = MultiColumn(config['num_classes'], DeepSITH,
                        int(config["column_units"]))

 > Creating model ... !


In [None]:
# multi GPU setting
model = torch.nn.DataParallel(model, device_ids).to(device)

In [None]:
# optionally resume from a checkpoint
checkpoint_path = os.path.join(config['output_dir'],
                                   config['model_name'],
                                   'model_best.pth.tar')

In [None]:
if config['mode'] is 'resume':
    if os.path.isfile(checkpoint_path):
        print(" > Loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(checkpoint_path)
        args.start_epoch = checkpoint['epoch']
        best_loss = checkpoint['best_loss']
        model.load_state_dict(checkpoint['state_dict'])
        print(" > Loaded checkpoint '{}' (epoch {})"
              .format(checkpoint_path, checkpoint['epoch']))
    else:
        print(" !#! No checkpoint found at '{}'".format(
            checkpoint_path))

In [None]:
# define augmentation pipeline
upscale_size_train = int(config['input_spatial_size'] * config["upscale_factor_train"])
upscale_size_eval = int(config['input_spatial_size'] * config["upscale_factor_eval"])

In [None]:
# Random crop videos during training
transform_train_pre = ComposeMix([
        [RandomRotationVideo(15), "vid"],
        [Scale(upscale_size_train), "img"],
        [RandomCropVideo(config['input_spatial_size']), "vid"],
         ])

# Center crop videos during evaluation
transform_eval_pre = ComposeMix([
        [Scale(upscale_size_eval), "img"],
        [torchvision.transforms.ToPILImage(), "img"],
        [torchvision.transforms.CenterCrop(config['input_spatial_size']), "img"],
         ])

# Transforms common to train and eval sets and applied after "pre" transforms
transform_post = ComposeMix([
        [torchvision.transforms.ToTensor(), "img"],
        [torchvision.transforms.Normalize(
                   mean=[0.485, 0.456, 0.406],  # default values for imagenet
                   std=[0.229, 0.224, 0.225]), "img"]
         ])

In [None]:
train_data = VideoFolder(root=config['data_folder'],
                             json_file_input=config['json_data_train'],
                             json_file_labels=config['json_file_labels'],
                             clip_size=config['clip_size'],
                             nclips=config['nclips_train'],
                             step_size=config['step_size_train'],
                             is_val=False,
                             transform_pre=transform_train_pre,
                             transform_post=transform_post,
                             #augmentation_mappings_json=config['augmentation_mappings_json'],
                             #augmentation_types_todo=config['augmentation_types_todo'],
                             get_item_id=False,
                             )

print(len(train_data))
train_data = Subset(train_data, np.arange(10000))
print(train_data.dataset.classes)
print(len(train_data))

In [None]:
print(" > Using {} processes for data loader.".format(
        config["num_workers"]))

 > Using 0 processes for data loader.


In [None]:
#def my_collate(batch):
#    "Puts each data field into a tensor with outer dimension batch size"
#    batch = filter (lambda x:x is not None, batch)
#    return torch.utils.data.dataloader.default_collate(list(batch))

In [None]:
train_loader = torch.utils.data.DataLoader(
        train_data,
        batch_size=config['batch_size'], shuffle=False,
        num_workers=config['num_workers'], pin_memory=True,
        drop_last=True)

In [None]:
val_data = VideoFolder(root=config['data_folder'],
                           json_file_input=config['json_data_val'],
                           json_file_labels=config['json_file_labels'],
                           clip_size=config['clip_size'],
                           nclips=config['nclips_val'],
                           step_size=config['step_size_val'],
                           is_val=True,
                           transform_pre=transform_eval_pre,
                           transform_post=transform_post,
                           get_item_id=True,
                           )

val_data = Subset(val_data, np.arange(100))

In [None]:
val_loader = torch.utils.data.DataLoader(
        val_data,
        batch_size=config['batch_size'], shuffle=False,
        num_workers=config['num_workers'], pin_memory=True,
        drop_last=False)

In [None]:
test_data = VideoFolder(root=config['data_folder'],
                            json_file_input=config['json_data_test'],
                            json_file_labels=config['json_file_labels'],
                            clip_size=config['clip_size'],
                            nclips=config['nclips_val'],
                            step_size=config['step_size_val'],
                            is_val=True,
                            transform_pre=transform_eval_pre,
                            transform_post=transform_post,
                            get_item_id=True,
                            is_test=True,
                            )

test_data = Subset(test_data, np.arange(100))

In [None]:
test_loader = torch.utils.data.DataLoader(
        test_data,
        batch_size=config['batch_size'], shuffle=False,
        num_workers=config['num_workers'], pin_memory=True,
        drop_last=False)

In [None]:
print(" > Number of dataset classes : {}".format(len(train_data.dataset.classes)))
assert len(train_data.dataset.classes) == config["num_classes"]

 > Number of dataset classes : 174


In [None]:
# define loss function (criterion)
criterion = nn.CrossEntropyLoss().to(device)

In [None]:
# define optimizer
lr = config["lr"]
last_lr = config["last_lr"]
momentum = config['momentum']
weight_decay = config['weight_decay']
optimizer = torch.optim.SGD(model.parameters(), lr,
                            momentum=momentum,
                            weight_decay=weight_decay)

In [None]:
# **************************Only Validate***********************
if config["mode"] == "validate":
        validate(test_loader, model, criterion, train_data.dataset.classes_dict)
        print(" > Evaluation DONE !")

In [None]:
# set callbacks
plotter = PlotLearning(os.path.join(
    save_dir, "plots"), config["num_classes"])
lr_decayer = torch.optim.lr_scheduler.ReduceLROnPlateau(
                    optimizer, 'min', factor=0.5, patience=2, verbose=True)
val_loss = float('Inf')

In [None]:
# set end condition by num epochs
num_epochs = int(config["num_epochs"])
if num_epochs == -1:
    num_epochs = 999999

In [None]:
print(" > Training is getting started...")
print(" > Training takes {} epochs.".format(num_epochs))
start_epoch = config["start_epoch"] #args.start_epoch if args.resume else 0

 > Training is getting started...
 > Training takes 1 epochs.


In [None]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)

        if config['nclips_train'] > 1:
            input_var = list(input.split(config['clip_size'], 2))
            for idx, inp in enumerate(input_var):
                input_var[idx] = inp.to(device)
        else:
            input_var = [input.to(device)]

        target = target.to(device)

        model.zero_grad()

        # compute output and loss
        output = model(input_var)
        loss = criterion(output, target)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.detach().cpu(), target.detach().cpu(), topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % config["print_freq"] == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                      epoch, i, len(train_loader), batch_time=batch_time,
                      data_time=data_time, loss=losses, top1=top1, top5=top5))
    return losses.avg, top1.avg, top5.avg

In [None]:
def validate(val_loader, model, criterion, class_to_idx=None):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    logits_matrix = []
    features_matrix = []
    targets_list = []
    item_id_list = []

    end = time.time()
    with torch.no_grad():
        for i, (input, target, item_id) in enumerate(val_loader):

            if config['nclips_val'] > 1:
                input_var = list(input.split(config['clip_size'], 2))
                for idx, inp in enumerate(input_var):
                    input_var[idx] = inp.to(device)
            else:
                input_var = [input.to(device)]

            target = target.to(device)

            # compute output and loss
            output, features = model(input_var, config['save_features'])
            loss = criterion(output, target)

            if config["mode"] == 'validate':
                logits_matrix.append(output.cpu().data.numpy())
                features_matrix.append(features.cpu().data.numpy())
                targets_list.append(target.cpu().numpy())
                item_id_list.append(item_id)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output.detach().cpu(), target.detach().cpu(), topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(prec1.item(), input.size(0))
            top5.update(prec5.item(), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % config["print_freq"] == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i, len(val_loader), batch_time=batch_time, loss=losses,
                          top1=top1, top5=top5))

    print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
          .format(top1=top1, top5=top5))

    if config["mode"] == 'validate':
        logits_matrix = np.concatenate(logits_matrix)
        features_matrix = np.concatenate(features_matrix)
        targets_list = np.concatenate(targets_list)
        item_id_list = np.concatenate(item_id_list)
        print(logits_matrix.shape, targets_list.shape, item_id_list.shape)
        save_results(logits_matrix, features_matrix, targets_list,
                     item_id_list, class_to_idx, config)
        get_submission(logits_matrix, item_id_list, class_to_idx, config)
    return losses.avg, top1.avg, top5.avg

In [None]:
for epoch in range(start_epoch, num_epochs):

    lrs = [params['lr'] for params in optimizer.param_groups]
    print(" > Current LR(s) -- {}".format(lrs))
    if np.max(lr) < last_lr and last_lr > 0:
        print(" > Training is DONE by learning rate {}".format(last_lr))
        break

    # train for one epoch
    train_loss, train_top1, train_top5 = train(
        train_loader, model, criterion, optimizer, epoch)

    # evaluate on validation set
    val_loss, val_top1, val_top5 = validate(val_loader, model, criterion)

    # set learning rate
    lr_decayer.step(val_loss, epoch)

    # plot learning
    plotter_dict = {}
    plotter_dict['loss'] = train_loss
    plotter_dict['val_loss'] = val_loss
    plotter_dict['acc'] = train_top1 / 100
    plotter_dict['val_acc'] = val_top1 / 100
    plotter_dict['learning_rate'] = lr
    plotter.plot(plotter_dict)

    print(" > Validation loss after epoch {} = {}".format(epoch, val_loss))

    # remember best loss and save the checkpoint
    is_best = val_loss < best_loss
    best_loss = min(val_loss, best_loss)
    save_checkpoint({
        'epoch': epoch + 1,
        'arch': "Conv4Col",
        'state_dict': model.state_dict(),
        'best_loss': best_loss,
    }, is_best, config)

 > Current LR(s) -- [0.008]


  imgs = [f.to_rgb().to_nd_array() for f in reader.decode(video=0)]


Train loop: input var size:1
Train loop: input size:torch.Size([10, 3, 60, 64, 64])
Train loop: target size:torch.Size([10])
0. Initial Input list size:1
1. Initial Input size:torch.Size([10, 3, 60, 64, 64])
2. Number of columns:1
3. Inputs size before permute:torch.Size([10, 3, 60, 64, 64])
3. Inputs size after permute:torch.Size([10, 60, 3, 64, 64])
DeepSITH x1 inp: torch.Size([10, 60, 3, 64, 64])
DeepSITH x1 x: torch.Size([10, 60, 3, 64, 64])
DeepSITH encoded seq size: 60
DeepSITH encoded seq size stacked: torch.Size([10, 60, 123008])
DeepSITH encoded seq size unsqueezed: torch.Size([10, 1, 60, 123008])
DeepSITH encoded seq size permuted: torch.Size([10, 1, 123008, 60])
Enumerate: 0
***
DeepSITHCORE x1: torch.Size([10, 5, 123008, 60])
DeepSITHCORE x2: torch.Size([10, 60, 5, 123008])
DeepSITHCORE x3: torch.Size([10, 60, 615040])
DeepSITHCORE x4: torch.Size([10, 60, 10])
DeepSITH x2: torch.Size([10, 60, 10])
DeepSITH x3: torch.Size([10, 60, 10])
DeepSITH x4: torch.Size([10, 1, 10, 60]