<a href="https://colab.research.google.com/github/mldl2020/FirstPersonActionRecognition/blob/master/EgoRNN_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!pip3 install 'torch==1.4.0'
!pip3 install 'torchvision==0.5.0'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm'
!pip3 install 'wandb'
import os 
os._exit(00)

In [0]:
!nvidia-smi

In [0]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader, random_split
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import resnet34

from PIL import Image
from tqdm import tqdm

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import wandb
from datetime import datetime

In [2]:
import os 
if not os.path.isdir('./FirstPersonActionRecognition'):
    !git clone https://github.com/mldl2020/FirstPersonActionRecognition.git
    !cp ./FirstPersonActionRecognition/*.py ./

if not os.path.isdir('./GTEA61'):
    !git clone https://github.com/MauriVass/GTEA61

if not os.path.isdir("models"):
    os.mkdir("models")

Cloning into 'FirstPersonActionRecognition'...
remote: Enumerating objects: 46, done.[K
remote: Counting objects:   2% (1/46)[Kremote: Counting objects:   4% (2/46)[Kremote: Counting objects:   6% (3/46)[Kremote: Counting objects:   8% (4/46)[Kremote: Counting objects:  10% (5/46)[Kremote: Counting objects:  13% (6/46)[Kremote: Counting objects:  15% (7/46)[Kremote: Counting objects:  17% (8/46)[Kremote: Counting objects:  19% (9/46)[Kremote: Counting objects:  21% (10/46)[Kremote: Counting objects:  23% (11/46)[Kremote: Counting objects:  26% (12/46)[Kremote: Counting objects:  28% (13/46)[Kremote: Counting objects:  30% (14/46)[Kremote: Counting objects:  32% (15/46)[Kremote: Counting objects:  34% (16/46)[Kremote: Counting objects:  36% (17/46)[Kremote: Counting objects:  39% (18/46)[Kremote: Counting objects:  41% (19/46)[Kremote: Counting objects:  43% (20/46)[Kremote: Counting objects:  45% (21/46)[Kremote: Counting objects:  47% (22/46)

In [0]:
# !wandb login ***

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[32mSuccessfully logged in to Weights & Biases![0m


# RGB Training

In [0]:
from utils import Config

config_stage1 = Config({"stage": 1,
                        "num_classes": 61,
                        "batch_size": 32,
                        "lstm_mem_size": 512,
                        "lr": 1e-3,
                        "optimizer": "adam",
                        "epochs": 200,
                        "decay_steps": [25, 75, 150],
                        "decay_factor": 0.1,
                        "weight_decay": 5e-5,
                        "val_frequency": 3,
                        "models_dir": "models",
                        "seq_len": 7,
                        "training_user_split": [1, 3, 4],
                        "val_user_split": [2]})

config_stage2 = Config({"stage": 2,
                        "num_classes": 61,
                        "batch_size": 32,
                        "lstm_mem_size": 512,
                        "lr": 1e-4,
                        "optimizer": "adam",
                        "epochs": 150,
                        "decay_steps": [25, 75],
                        "decay_factor": 0.1,
                        "weight_decay": 5e-5,
                        "val_frequency": 3,
                        "models_dir": "models",
                        "seq_len": 7,
                        "training_user_split": [1, 3, 4],
                        "val_user_split": [2]})

In [0]:
def prepare_training_rgb(config):
    train_params = []
    if config.stage == 1:

        model = attentionModel(num_classes=config.num_classes, mem_size=config.lstm_mem_size)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False

    else:

        model = attentionModel(num_classes=config.num_classes, mem_size=config.lstm_mem_size)
        stage1_dict = config.models_dir + '/best_model_rgb_state_dict.pth'
        model.load_state_dict(torch.load(stage1_dict))
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
        #
        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.fc.parameters():
            params.requires_grad = True
            train_params += [params]

        model.resNet.layer4[0].conv1.train(True)
        model.resNet.layer4[0].conv2.train(True)
        model.resNet.layer4[1].conv1.train(True)
        model.resNet.layer4[1].conv2.train(True)
        model.resNet.layer4[2].conv1.train(True)
        model.resNet.layer4[2].conv2.train(True)
        model.resNet.fc.train(True)

    for params in model.lstm_cell.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    return model, train_params

In [0]:
def training_rgb(model, config, train_loader, val_loader):
    wandb.watch(model, log="all")
    train_iter = 0
    best_accuracy = 0
    train = []
    val = []
    for epoch in range(config.epochs):
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0
        model.lstm_cell.train(True)
        model.classifier.train(True)
        # writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1)
        if config.stage == 2:
            model.resNet.layer4[0].conv1.train(True)
            model.resNet.layer4[0].conv2.train(True)
            model.resNet.layer4[1].conv1.train(True)
            model.resNet.layer4[1].conv2.train(True)
            model.resNet.layer4[2].conv1.train(True)
            model.resNet.layer4[2].conv2.train(True)
            model.resNet.fc.train(True)
        for inputs, labels in train_loader:
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            trainSamples += inputs.size(0)
            inputs = inputs.permute(1, 0, 2, 3, 4).to(config.device)  # but why?
            labels = labels.to(config.device)
            output_label, _ = model(inputs)
            loss = loss_fn(output_label, labels)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)

            predicted = predicted.to(config.device)
            numCorrTrain += torch.sum(predicted == labels).data.item()

            # numCorrTrain += (predicted == targets.cuda()).sum()
            epoch_loss += loss.item()
        optim_scheduler.step()
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = (numCorrTrain / trainSamples)

        print('Train: Epoch = {}/{} | Loss = {} | Accuracy = {}'.format(epoch + 1, config.epochs, avg_loss, trainAccuracy))

        max_loss = 6
        avg_loss_normalized = avg_loss if avg_loss < max_loss else max_loss
        train.append((trainAccuracy, avg_loss_normalized))
        wandb.log({"train_loss": avg_loss_normalized,
                   "train_accuracy": trainAccuracy,
                   "eopch": (epoch + 1)})

        if (epoch + 1) % config.val_frequency == 0:
            with torch.no_grad():
                model.eval()
                val_loss_epoch = 0
                val_iter = 0
                val_samples = 0
                numCorr = 0
                for inputs, labels in val_loader:
                    val_iter += 1
                    val_samples += inputs.size(0)
                    inputs = inputs.permute(1, 0, 2, 3, 4).to(config.device)
                    labels = labels.to(config.device)
                    output_label, _ = model(inputs)
                    val_loss = loss_fn(output_label, labels)
                    val_loss_epoch += val_loss.item()
                    _, predicted = torch.max(output_label.data, 1)
                    numCorr += torch.sum(predicted == labels).data.item()
            val_accuracy = (numCorr / val_samples)
            avg_val_loss = val_loss_epoch / val_iter
            print('*****  Val: Epoch = {} | Loss {} | Accuracy = {} *****'.format(epoch + 1, avg_val_loss, val_accuracy))

            avg_val_loss_normalized = avg_val_loss if avg_val_loss < max_loss else max_loss
            val.append((val_accuracy, avg_val_loss_normalized))
            wandb.log({"valid_loss": avg_val_loss_normalized,
                       "valid_accuracy": val_accuracy,
                       "eopch": (epoch + 1)})

            if val_accuracy > best_accuracy:
                save_path_model = (config.models_dir + '/best_model_rgb_state_dict.pth')
                torch.save(model.state_dict(), save_path_model)
                best_accuracy = val_accuracy
        else:
            if (epoch + 1) % 10 == 0:
                save_path_model = (config.models_dir + '/model_rgb_state_dict_epoch' + str(epoch + 1) + '.pth')
                # torch.save(model.state_dict(), save_path_model)
    wandb.run.summary["best_valid_accuracy"] = best_accuracy
    return train, val

Prepare Datasets

In [0]:
from gtea_dataset import gtea61
from spatial_transforms import *
from objectAttentionModelConvLSTM import *

normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                             ToTensor(), normalize])

gtea_root = "GTEA61"
config = config_stage1
train_dataset = gtea61("rgb", gtea_root, split="train", user_split=config.training_user_split, seq_len_rgb=config.seq_len, transform_rgb=spatial_transform, preload=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=4, pin_memory=True)

val_transform = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize])
val_dataset = gtea61("rgb", gtea_root, split="test", user_split=config.val_user_split, seq_len_rgb=config.seq_len, transform_rgb=val_transform, preload=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=config.batch_size, shuffle=True, num_workers=4, pin_memory=True)

print(f"Train dataset: {len(train_dataset)} videos")
print(f"Valid dataset: {len(val_dataset)} videos")

Train Stage 1

In [0]:
config = config_stage1

model, train_params = prepare_training_rgb(config)
model.to(config.device)

loss_fn = nn.CrossEntropyLoss()
optimizer_fn = torch.optim.Adam(train_params, lr=config.lr, weight_decay=config.weight_decay, eps=1e-4)
optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=config.decay_steps, gamma=config.decay_factor)

training_time = datetime.now().strftime("%d-%b_%H-%M")
wandb.init(config=config, group=f"{config.seq_len}f", name=f"{training_time} Stage1, {config.seq_len}f, T{str(config.training_user_split).replace(' ', '')}", project="mldl-fpar")

train_rgb, val_rgb = training_rgb(model, config, train_loader, val_loader)

Train Stage 2

In [0]:
config = config_stage2

model, train_params2 = prepare_training_rgb(config)
model.lstm_cell.train(True)
model.classifier.train(True)
model.to(config.device)

loss_fn = nn.CrossEntropyLoss()
optimizer_fn = torch.optim.Adam(train_params2, lr=config.lr, weight_decay=config.weight_decay, eps=1e-4)
optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=config.decay_steps, gamma=config.decay_factor)


training_time = datetime.now().strftime("%d-%b_%H-%M")
wandb.init(config=config, group=f"{config.seq_len}f", name=f"{training_time} Stage2, {config.seq_len}f, T{str(config.training_user_split).replace(' ', '')}", project="mldl-fpar")

train_rgb, val_rgb = training_rgb(model, config, train_loader, val_loader)

# Flow Training

In [0]:
config_flow = Config({"stage": "flow",
                      "num_classes": 61,
                      "batch_size": 32,
                      "lstm_mem_size": 512,
                      "lr": 1e-2,
                      "optimizer": "sgd",
                      "sgd_momentum": 0.9,
                      "epochs": 750,
                      "decay_steps": [150, 300, 500],
                      "decay_factor": 0.5,
                      "weight_decay": 5e-4,
                      "val_frequency": 3,
                      "models_dir": "models",
                      "seq_len_flow": 5,
                      "training_user_split": [1, 3, 4],
                      "val_user_split": [2]})

In [0]:
def training_flow(model, config, train_loader, val_loader):
    wandb.watch(model, log="all")
    train_iter = 0
    best_accuracy = 0
    train = []
    val = []
    for epoch in range(config.epochs):
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0
        model.train(True)
        for inputs, labels in train_loader:
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            trainSamples += inputs.size(0)
            inputs = inputs.to(config.device)
            labels = labels.to(config.device)
            output_label, _ = model(inputs)
            loss = loss_fn(output_label, labels)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += torch.sum(predicted == labels).data.item()
            epoch_loss += loss.item()
        optim_scheduler.step()
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = (numCorrTrain / trainSamples)
        print('Train: Epoch = {}/{} | Loss = {} | Accuracy = {}'.format(epoch + 1, config.epochs, avg_loss, trainAccuracy))

        max_loss = 6
        avg_loss_normalized = avg_loss if avg_loss < max_loss else max_loss
        train.append((trainAccuracy, avg_loss_normalized))
        wandb.log({"train_loss": avg_loss_normalized,
                   "train_accuracy": trainAccuracy,
                   "eopch": (epoch + 1)})

        if (epoch + 1) % config.val_frequency == 0:
            model.eval()
            val_loss_epoch = 0
            val_iter = 0
            val_samples = 0
            numCorr = 0
            with torch.no_grad():
                for inputs, labels in val_loader:
                    val_iter += 1
                    val_samples += inputs.size(0)
                    inputs = inputs.to(config.device)
                    labels = labels.to(config.device)
                    output_label, _ = model(inputs)
                    val_loss = loss_fn(output_label, labels)
                    val_loss_epoch += val_loss.item()
                    _, predicted = torch.max(output_label.data, 1)
                    numCorr += torch.sum(predicted == labels).data.item()
            val_accuracy = (numCorr / val_samples)
            avg_val_loss = val_loss_epoch / val_iter
            print('*****  Validation: Epoch = {} | Loss = {} | Accuracy = {}  *****'.format(epoch + 1, avg_val_loss, val_accuracy))
            avg_val_loss_normalized = avg_val_loss if avg_val_loss < max_loss else max_loss
            val.append((val_accuracy, avg_val_loss_normalized))
            wandb.log({"valid_loss": avg_val_loss_normalized,
                       "valid_accuracy": val_accuracy,
                       "eopch": (epoch + 1)})
            if val_accuracy > best_accuracy:
                save_path_model = os.path.join(config.models_dir, "best_model_flow_state_dict.pth")
                torch.save(model.state_dict(), save_path_model)
                best_accuracy = val_accuracy
        else:
            if (epoch + 1) % 10 == 0:
                save_path_model = os.path.join(config.models_dir, 'model_flow_state_dict_epoch' + str(epoch + 1) + '.pth')
                # torch.save(model.state_dict(), save_path_model)
    wandb.run.summary["best_valid_accuracy"] = best_accuracy
    return train, val

Prepare Dataset

In [8]:
from gtea_dataset import gtea61
from spatial_transforms import *
from objectAttentionModelConvLSTM import *

normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                             ToTensor(), normalize])

gtea_root = "GTEA61"
config = config_flow
train_dataset = gtea61("flow", gtea_root, split="train", user_split=config.training_user_split, seq_len_flow=config.seq_len_flow, transform_flow=spatial_transform, preload=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, sampler=None, num_workers=4, pin_memory=True)

val_transform = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize])
val_dataset = gtea61("flow", gtea_root, split="test", user_split=config.val_user_split, seq_len_flow=config.seq_len_flow, transform_flow=val_transform, preload=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=2, pin_memory=True)

print(f"Train dataset: {len(train_dataset)} videos")
print(f"Valid dataset: {len(val_dataset)} videos")

Train dataset: 341 videos
Valid dataset: 116 videos


Flow Train

In [0]:
from flow_resnet import flow_resnet34

model = flow_resnet34(True, channels=2 * config.seq_len_flow, num_classes=config.num_classes)
model.train(True)
train_params = list(model.parameters())
model.to(config.device)

loss_fn = nn.CrossEntropyLoss()
optimizer_fn = torch.optim.SGD(train_params, lr=config.lr, momentum=config.sgd_momentum, weight_decay=config.weight_decay)
optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=config.decay_steps, gamma=config.decay_factor)

training_time = datetime.now().strftime("%d-%b_%H-%M")
wandb.init(config=config, group="flow", name=f"{training_time} Flow, T{str(config.training_user_split).replace(' ', '')}", project="mldl-fpar")

train_flow, val_flow = training_flow(model, config, train_loader, val_loader)

# TwoStream Training

In [0]:
from spatial_transforms import (Compose, ToTensor, CenterCrop, Scale, Normalize, MultiScaleCornerCrop,
                                RandomHorizontalFlip)
from twoStreamModel import *
from torch.utils.data.sampler import WeightedRandomSampler
import sys

In [0]:
config_two_stream = Config({"stage": "two_stream",
                            "num_classes": 61,
                            "batch_size": 32,
                            "lstm_mem_size": 512,
                            "lr": 1e-2,
                            "optimizer": "adam",
                            "epochs": 250,
                            "decay_step": 1,
                            "decay_factor": 0.99,
                            "weight_decay": 5e-4,
                            "val_frequency": 3,
                            "sgd_momentum": 0.9,
                            "models_dir": "models",
                            "seq_len": 7,
                            "seq_len_flow": 5,
                            "training_user_split": [1, 3, 4],
                            "val_user_split": [2],
                            "two_stream_method": "average"})  # two_stream_method is either average of joint

In [0]:
def prepare_training_two_streams(config):
    rgb_model_path = os.path.join(config.models_dir, "best_model_rgb_state_dict.pth")
    flow_model_path = os.path.join(config.models_dir, "best_model_flow_state_dict.pth")
    model = twoStreamAttentionModel(flow_model_path=flow_model_path, rgb_model_path=rgb_model_path, seq_len_flow=config.seq_len_flow, mem_size=config.lstm_mem_size, num_classes=config.num_classes, join_method=config.two_stream_method)

    for params in model.parameters():
        params.requires_grad = False

    model.train(False)
    train_params = []

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.rgb_model.lstm_cell.parameters():
        train_params += [params]
        params.requires_grad = True

    for params in model.rgb_model.resNet.layer4[0].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.rgb_model.resNet.layer4[0].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.rgb_model.resNet.layer4[1].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.rgb_model.resNet.layer4[1].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.rgb_model.resNet.layer4[2].conv1.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.rgb_model.resNet.layer4[2].conv2.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.rgb_model.resNet.fc.parameters():
        params.requires_grad = True
        train_params += [params]

    base_params = []
    for params in model.flow_model.layer4.parameters():
        base_params += [params]
        params.requires_grad = True

    return model, train_params, base_params

def training_two_streams(model, config, train_dataset, train_loader, val_dataset, val_loader):
    train_iter = 0
    train_samples = len(train_dataset)
    val_samples = len(val_dataset)
    best_accuracy = 0
    for epoch in range(config.epochs):
        epoch_loss = 0
        numCorrTrain = 0
        iterPerEpoch = 0
        model.classifier.train(True)
        model.flow_model.layer4.train(True)
        for input_flow, input_rgb, labels in train_loader:
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            input_flow = input_flow.to(config.device)
            input_rgb = input_rgb.permute(1, 0, 2, 3, 4).to(config.device)
            labels = labels.to(config.device)
            output_label = model(input_flow, input_rgb)
            loss = loss_fn(F.log_softmax(output_label, dim=1), labels)
            # loss = loss_fn(output_label, labels)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += torch.sum(predicted == labels).data.item()
            epoch_loss += loss.item()  # loss.data[0]
        optim_scheduler.step()
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = (numCorrTrain / train_samples) * 100
        print('Train: Epoch = {}/{} | Loss = {} | Accuracy = {}'.format(epoch + 1, config.epochs, avg_loss, trainAccuracy))
        wandb.log({"train_loss": avg_loss,
                   "train_accuracy": trainAccuracy,
                   "eopch": (epoch + 1)})
        if (epoch + 1) % config.val_frequency == 0:
            model.eval()
            val_loss_epoch = 0
            val_iter = 0
            numCorr = 0
            for input_flow, input_rgb, labels in val_loader:
                val_iter += 1
                input_flow = input_flow.to(config.device)
                input_rgb = input_rgb.permute(1, 0, 2, 3, 4).to(config.device)
                labels = labels.to(config.device)
                output_label = model(input_flow, input_rgb)
                loss = loss_fn(F.log_softmax(output_label, dim=1), labels)
                val_loss_epoch += loss.item()  # loss.data[0]
                _, predicted = torch.max(output_label.data, 1)
                numCorr += torch.sum(predicted == labels).data.item()
            val_accuracy = (numCorr / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_iter
            print('*****  Validation: Epoch = {} | Loss = {} | Accuracy = {}  *****'.format(epoch + 1, avg_val_loss, val_accuracy))
            wandb.log({"valid_loss": avg_val_loss,
                       "valid_accuracy": val_accuracy,
                       "eopch": (epoch + 1)})
            if val_accuracy > best_accuracy:
                save_path_model = os.path.join(config.models_dir, 'best_model_twoStream_state_dict.pth')
                torch.save(model.state_dict(), save_path_model)
                best_accuracy = val_accuracy
        else:
            if (epoch + 1) % 10 == 0:
                save_path_model = os.path.join(config.models_dir, 'model_twoStream_state_dict_epoch' + str(epoch + 1) + '.pth')
                # torch.save(model.state_dict(), save_path_model)
    wandb.run.summary["best_valid_accuracy"] = best_accuracy

Prepare Dataset

In [0]:
from gtea_dataset import gtea61
from spatial_transforms import *
from objectAttentionModelConvLSTM import *

normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                                 ToTensor(), normalize])

gtea_root = "GTEA61"
config = config_two_stream
train_dataset = gtea61("joint", gtea_root, split="train", user_split=config.training_user_split, seq_len_rgb=config.seq_len, seq_len_flow=config.seq_len_flow, transform_rgb=spatial_transform, transform_flow=spatial_transform, preload=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=4, pin_memory=True)

val_transform = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize])
val_dataset = gtea61("joint", gtea_root, split="test", user_split=config.val_user_split, seq_len_rgb=config.seq_len, seq_len_flow=config.seq_len_flow, transform_rgb=val_transform, transform_flow=val_transform, preload=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=config.batch_size, shuffle=True, num_workers=4, pin_memory=True)

2Stream Training

In [0]:
config = config_two_stream
model, train_params, base_params = prepare_training_two_streams(config)
model.to(config.device)

loss_fn = nn.CrossEntropyLoss()
optimizer_fn = torch.optim.SGD([{'params': train_params}, {'params': base_params, 'lr': 1e-4}],
                               lr=config.lr, momentum=config.sgd_momentum, weight_decay=config.weight_decay)
optim_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_fn, step_size=config.decay_step, gamma=config.decay_factor)

training_time = datetime.now().strftime("%d-%b_%H-%M")
wandb.init(config=config, group=f"{config.seq_len}f", name=f"{training_time} 2Stream, {config.seq_len}f, T{str(config.training_user_split).replace(' ', '')}", project="mldl-fpar")

training_two_streams(model, config, train_dataset, train_loader, val_dataset, val_loader)

Generation Activation Maps

In [0]:
config_gen_map = Config({"stage": "rgb",
                            "num_classes": 61,
                            "lstm_mem_size": 512})  # two_stream_method is either average of joint

In [41]:
from objectAttentionModelConvLSTM import *
from attentionMapModel import attentionMap
import cv2

# Path to the weights of the pre-trained model
model_state_dict = 'Models/best_model_state_dict_rgb_split2.pth' 

model = attentionModel(num_classes=config_gen_map.num_classes, mem_size=config_gen_map.lstm_mem_size)
model.load_state_dict(torch.load(model_state_dict))
model_backbone = model.resNet
attentionMapModel = attentionMap(model_backbone).to(config.device)
attentionMapModel.train(False)
for params in attentionMapModel.parameters():
    params.requires_grad = False

normalize = transforms.Normalize(
   mean=[0.485, 0.456, 0.406],
   std=[0.229, 0.224, 0.225]
)
preprocess1 = transforms.Compose([ transforms.Scale(256), transforms.CenterCrop(224) ])

preprocess2 = transforms.Compose([ transforms.ToTensor(), normalize])

path_image = 'S1/close_chocolate/1/rgb/rgb0001.png'
fl_name_in = 'GTEA61/processed_frames2/' + path_image

output_folder = 'Images'
if not os.path.isdir(output_folder):
    os.mkdir(output_folder)
fl_name_out = output_folder + '/' + path_image[-11:]

img_pil = Image.open(fl_name_in)
img_pil1 = preprocess1(img_pil)
img_size = img_pil1.size
size_upsample = (img_size[0], img_size[1])
img_tensor = preprocess2(img_pil1)
img_variable = Variable(img_tensor.unsqueeze(0).to(config.device))
img = np.asarray(img_pil1)
attentionMap_image = attentionMapModel(img_variable, img, size_upsample)
cv2.imwrite(fl_name_out, attentionMap_image)

  "please use transforms.Resize instead.")


True