In [16]:
import os
import logging

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import resnet34

from PIL import Image
from tqdm import tqdm

from gtea_dataset import GTEA61, GTEA61_flow, GTEA61_2Stream
from objectAttentionModelConvLSTM import attentionModel
from flow_resnet import flow_resnet34
from twoStreamModel import twoStreamAttentionModel
from spatial_transforms import (Compose, ToTensor, CenterCrop, Scale, Normalize, MultiScaleCornerCrop,
                                RandomHorizontalFlip)

In [11]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 61 # 101 + 1: There is am extra Background class that should be removed 

BATCH_SIZE = 32     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 0.001            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 4e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 300      # Total number of training epochs (iterations over dataset)
STEP_SIZE = [25, 75, 150] # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down
MEM_SIZE = 512
STACK_SIZE = 5
SEQ_LEN = 7

LOG_FREQUENCY = 20

model_folder = 'saved_models'

In [4]:
# Data loader
normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                             ToTensor(), normalize])

In [5]:
DATA_DIR = '/home/utente/Scaricati/program/ML_DL/FPAR/GTEA61'

# Prepare Pytorch train/test Datasets
train_dataset = GTEA61(DATA_DIR, split='train', transform=spatial_transform, seq_len=SEQ_LEN)
test_dataset = GTEA61(DATA_DIR, split='test', transform=spatial_transform, seq_len=SEQ_LEN)

#train_indexes, val_indexes  = train_dataset.split_indices(0.6)

#val_dataset = Subset(train_dataset, val_indexes)
#train_dataset = Subset(train_dataset, train_indexes)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
#print('Valid Dataset: {}'.format(len(val_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

Train Dataset: 341
Test Dataset: 116


In [6]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
#val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
#test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

In [None]:
train_iter = 0
min_accuracy = 0
epoch_loss = 0
numCorrTrain = 0
trainSamples = 0
trainSamples_2 = 0
iterPerEpoch = 0
for i, (inputs, targets) in enumerate(train_loader):
    train_iter += 1
    iterPerEpoch += 1
    trainSamples_2 += inputs.size(0)
    inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
    labelVariable = targets.to(DEVICE)
    trainSamples += inputs.size(0)
    print(trainSamples, trainSamples_2, inputs.shape, inputVariable.shape, len(targets))

# Prepare stage 1

In [7]:
validate = True

model = attentionModel(num_classes=NUM_CLASSES, mem_size=MEM_SIZE)
model.train(False)
for params in model.parameters():
    params.requires_grad = False

for params in model.lstm_cell.parameters():
    params.requires_grad = True

for params in model.classifier.parameters():
    params.requires_grad = True
model.lstm_cell.train(True)
model.classifier.train(True)
model.to(DEVICE)

trainable_params = [p for p in model.parameters() if p.requires_grad]

loss_fn = nn.CrossEntropyLoss()

optimizer_fn = optim.Adam(trainable_params, lr=LR, weight_decay=WEIGHT_DECAY, eps=1e-4)

optim_scheduler = optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=STEP_SIZE, gamma=GAMMA)

# Stage 1

In [8]:
train_iter = 0
min_accuracy = 0
cudnn.benchmark

for epoch in range(NUM_EPOCHS):
    epoch_loss = 0
    numCorrTrain = 0
    trainSamples = 0
    iterPerEpoch = 0
    model.lstm_cell.train(True)
    model.classifier.train(True)
        
    for i, (inputs, targets) in enumerate(train_loader):
        train_iter += 1
        iterPerEpoch += 1
        optimizer_fn.zero_grad()
        inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
        labelVariable = targets.to(DEVICE)
        trainSamples += inputs.size(0)
        output_label, _ = model(inputVariable)
        
        loss = loss_fn(output_label, labelVariable)
        loss.backward()
        optimizer_fn.step()
            
        _, predicted = torch.max(output_label.data, 1)
        numCorrTrain += torch.sum(predicted == labelVariable.data).data.item()
        epoch_loss += loss.data.item()
        
    avg_loss = epoch_loss/iterPerEpoch
    trainAccuracy = (numCorrTrain / trainSamples) * 100

    print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch+1, avg_loss, trainAccuracy))
    if validate:
        if (epoch+1) % 2 == 0:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            val_samples = 0
            numCorr = 0
            for j, (inputs, targets) in enumerate(val_loader):
                val_iter += 1
                val_samples += inputs.size(0)
                
                inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
                labelVariable = targets.to(DEVICE)
                
                output_label, _ = model(inputVariable)
                val_loss = loss_fn(output_label, labelVariable)
                val_loss_epoch += val_loss.data.item()
                _, predicted = torch.max(output_label.data, 1)
                numCorr += torch.sum(predicted == labelVariable.data).data.item()
                    
            val_accuracy = (numCorr / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_iter
            print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy))
            if val_accuracy > min_accuracy:
                save_path_model = (model_folder + '/model_rgb_state_dict.pth')
                #torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
            else:
                if (epoch+1) % 10 == 0:
                    save_path_model = (model_folder + '/model_rgb_state_dict_epoch' + str(epoch+1) + '.pth')
                    #torch.save(model.state_dict(), save_path_model)
    
    optim_scheduler.step()

Train: Epoch = 1 | Loss = 4.116372776031494 | Accuracy = 4.375
Train: Epoch = 2 | Loss = 4.030084347724914 | Accuracy = 5.0
Val: Epoch = 2 | Loss 3.9895098209381104 | Accuracy = 4.310344827586207
Train: Epoch = 3 | Loss = 3.971208190917969 | Accuracy = 6.25
Train: Epoch = 4 | Loss = 3.905468201637268 | Accuracy = 7.5
Val: Epoch = 4 | Loss 3.9058555364608765 | Accuracy = 11.206896551724139
Train: Epoch = 5 | Loss = 3.767634916305542 | Accuracy = 8.75
Train: Epoch = 6 | Loss = 3.690426754951477 | Accuracy = 10.625
Val: Epoch = 6 | Loss 3.698900878429413 | Accuracy = 8.620689655172415
Train: Epoch = 7 | Loss = 3.659843897819519 | Accuracy = 13.125
Train: Epoch = 8 | Loss = 3.544969987869263 | Accuracy = 12.1875
Val: Epoch = 8 | Loss 3.6253132820129395 | Accuracy = 17.24137931034483
Train: Epoch = 9 | Loss = 3.5026852607727053 | Accuracy = 13.125
Train: Epoch = 10 | Loss = 3.4243183851242067 | Accuracy = 13.750000000000002
Val: Epoch = 10 | Loss 3.686962604522705 | Accuracy = 6.89655172413

KeyboardInterrupt: 

# Prepare stage 2

In [None]:
stage1_dict = "best_model_dict.pth"
validate = True

model = attentionModel(num_classes=NUM_CLASSES, mem_size=MEM_SIZE)
model.load_state_dict(torch.load(stage1_dict))
model.train(False)
for params in model.parameters():
    params.requires_grad = False
        
for params in model.resNet.layer4[0].conv1.parameters():
    params.requires_grad = True
    train_params += [params]

for params in model.resNet.layer4[0].conv2.parameters():
    params.requires_grad = True
    train_params += [params]

for params in model.resNet.layer4[1].conv1.parameters():
    params.requires_grad = True
    train_params += [params]

for params in model.resNet.layer4[1].conv2.parameters():
    params.requires_grad = True
    train_params += [params]

for params in model.resNet.layer4[2].conv1.parameters():
    params.requires_grad = True
    train_params += [params]
#
for params in model.resNet.layer4[2].conv2.parameters():
    params.requires_grad = True
    train_params += [params]
#
for params in model.resNet.fc.parameters():
    params.requires_grad = True
    train_params += [params]

model.resNet.layer4[0].conv1.train(True)
model.resNet.layer4[0].conv2.train(True)
model.resNet.layer4[1].conv1.train(True)
model.resNet.layer4[1].conv2.train(True)
model.resNet.layer4[2].conv1.train(True)
model.resNet.layer4[2].conv2.train(True)
model.resNet.fc.train(True)

for params in model.lstm_cell.parameters():
    params.requires_grad = True

for params in model.classifier.parameters():
    params.requires_grad = True


model.lstm_cell.train(True)
model.classifier.train(True)

model.to(DEVICE)

loss_fn = nn.CrossEntropyLoss()

trainable_params = [p for p in model.parameters() if p.requires_grad]
optimizer_fn = torch.optim.Adam(trainable_params, lr=LR, weight_decay=WEIGHT_DECAY, eps=1e-4)

optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=STEP_SIZE, gamma=GAMMA)

# Stage 2

In [None]:
train_iter = 0
min_accuracy = 0
cudnn.benchmark

for epoch in range(NUM_EPOCHS):
    epoch_loss = 0
    numCorrTrain = 0
    trainSamples = 0
    iterPerEpoch = 0
    
    model.lstm_cell.train(True)
    model.classifier.train(True)
    model.resNet.layer4[0].conv1.train(True)
    model.resNet.layer4[0].conv2.train(True)
    model.resNet.layer4[1].conv1.train(True)
    model.resNet.layer4[1].conv2.train(True)
    model.resNet.layer4[2].conv1.train(True)
    model.resNet.layer4[2].conv2.train(True)
    model.resNet.fc.train(True)
    
    for i, (inputs, targets) in enumerate(train_loader):
        train_iter += 1
        iterPerEpoch += 1
        optimizer_fn.zero_grad()
        inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
        labelVariable = targets.to(DEVICE)
        trainSamples += inputs.size(0)
        output_label, _ = model(inputVariable)
        
        loss = loss_fn(output_label, labelVariable)
        loss.backward()
        optimizer_fn.step()
        
        _, predicted = torch.max(output_label.data, 1)
        numCorrTrain += torch.sum(predicted == labelVariable.data).data.item()
        epoch_loss += loss.data.item()
            
        
    avg_loss = epoch_loss/iterPerEpoch
    trainAccuracy = (numCorrTrain / trainSamples) * 100

    print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch+1, avg_loss, trainAccuracy))
    if validate is not None:
        if (epoch+1) % 2 == 0:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            val_samples = 0
            numCorr = 0
            for j, (inputs, targets) in enumerate(val_loader):
                val_iter += 1
                inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
                labelVariable = targets.to(DEVICE)
                val_samples += inputs.size(0)
                
                output_label, _ = model(inputVariable)
                val_loss = loss_fn(output_label, labelVariable)
                val_loss_epoch += val_loss.data.item()
                _, predicted = torch.max(output_label.data, 1)
                numCorr += torch.sum(predicted == labelVariable.data).data.item()
                
            val_accuracy = (numCorr / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_iter
            print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy))
            
            if val_accuracy > min_accuracy:
                save_path_model = (model_folder + '/model_rgb_state_dict.pth')
                #torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
            else:
                if (epoch+1) % 10 == 0:
                    #save_path_model = (model_folder + '/model_rgb_state_dict_epoch' + str(epoch+1) + '.pth')
                    torch.save(model.state_dict(), save_path_model)
    optim_scheduler.step()

# Train temporal network

In [4]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 61 # 101 + 1: There is am extra Background class that should be removed 

BATCH_SIZE = 32     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 0.01            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-4  # Regularization, you can keep this at the default

NUM_EPOCHS = 750      # Total number of training epochs (iterations over dataset)
STEP_SIZE = [150, 300, 500] # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.5          # Multiplicative factor for learning rate step-down
STACK_SIZE = 5

LOG_FREQUENCY = 20

model_folder = 'saved_models'

In [5]:
# Data loader
normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
spatial_transform_train = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                             ToTensor(), normalize])
spatial_transform_val = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize])

In [6]:
DATA_DIR = '/home/utente/Scaricati/program/ML_DL/FPAR/GTEA61'

# Prepare Pytorch train/test Datasets
train_dataset = GTEA61_flow(DATA_DIR, split='train', transform=spatial_transform_train, seq_len=STACK_SIZE)
test_dataset = GTEA61_flow(DATA_DIR, split='test', transform=spatial_transform_val, seq_len=STACK_SIZE)

#train_indexes, val_indexes  = train_dataset.split_indices(0.6)

#val_dataset = Subset(train_dataset, val_indexes)
#train_dataset = Subset(train_dataset, train_indexes)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
#print('Valid Dataset: {}'.format(len(val_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

Train Dataset: 341
Test Dataset: 116


In [7]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

In [8]:
validate = True

model = flow_resnet34(True, channels=2*STACK_SIZE, num_classes=NUM_CLASSES)
model.train(True)
train_params = list(model.parameters())

model.to(DEVICE)

loss_fn = nn.CrossEntropyLoss()

optimizer_fn = torch.optim.SGD(train_params, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

optim_scheduler = optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=STEP_SIZE, gamma=GAMMA)

In [9]:
train_iter = 0
min_accuracy = 0
cudnn.benchmark

for epoch in range(NUM_EPOCHS):
    epoch_loss = 0
    numCorrTrain = 0
    trainSamples = 0
    iterPerEpoch = 0
    model.train(True)
    for i, (inputs, targets) in enumerate(train_loader):
        train_iter += 1
        iterPerEpoch += 1
        optimizer_fn.zero_grad()
        inputVariable = inputs.to(DEVICE)
        labelVariable = targets.to(DEVICE)
        
        trainSamples += inputs.size(0)
        output_label, _ = model(inputVariable)
        loss = loss_fn(output_label, labelVariable)
        loss.backward()
        optimizer_fn.step()
        
        _, predicted = torch.max(output_label.data, 1)
        numCorrTrain += torch.sum(predicted == labelVariable.data).data.item()
        epoch_loss += loss.data.item()
        
    avg_loss = epoch_loss/iterPerEpoch
    trainAccuracy = (numCorrTrain / trainSamples) * 100
    print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch + 1, avg_loss, trainAccuracy))
    
    if validate:
        if (epoch+1) % 2 == 0:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            val_samples = 0
            numCorr = 0
            for j, (inputs, targets) in enumerate(val_loader):
                val_iter += 1
                val_samples += inputs.size(0)
                inputVariable = inputs.to(DEVICE)
                labelVariable = targets.to(DEVICE)
                
                output_label, _ = model(inputVariable)
                val_loss = loss_fn(output_label, labelVariable)
                val_loss_epoch += val_loss.data.item()
                _, predicted = torch.max(output_label.data, 1)
                numCorr += torch.sum(predicted == labelVariable.data).data.item()
                
            val_accuracy = (numCorr / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_iter
            print('Validation: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy))
            if val_accuracy > min_accuracy:
                save_path_model = (model_folder + '/model_flow_state_dict.pth')
                #torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
            else:
                if (epoch+1) % 10 == 0:
                    #save_path_model = (model_folder + '/model_flow_state_dict_epoch' + str(epoch+1) + '.pth')
                    torch.save(model.state_dict(), save_path_model)
    optim_scheduler.step()

tensor([[ 0.4084,  0.1433,  0.7386,  ..., -0.2264,  0.9150,  0.5325],
        [ 1.5826,  1.0154, -1.4570,  ..., -0.7125,  1.0177,  1.7363],
        [ 0.7942, -0.3806, -1.0351,  ..., -0.8075,  1.0047,  0.5475],
        ...,
        [ 1.5178, -1.5933,  0.3368,  ..., -1.9098,  1.4702, -0.5169],
        [ 0.1974,  0.8592,  0.0053,  ...,  0.0618, -0.6048,  1.0245],
        [ 1.0563, -2.2363,  0.9363,  ..., -1.7047, -0.5733, -1.5339]],
       device='cuda:0')
Train: Epoch = 1 | Loss = 0.0 | Accuracy = 0.0




tensor([[-1.0424, -1.5736,  0.6324,  ..., -0.9721, -0.7348,  0.1115],
        [ 0.3321, -0.6927, -0.3757,  ...,  0.1693, -0.2389,  0.1666],
        [ 3.6670, -1.3592,  0.0680,  ..., -1.3195,  1.4804,  1.5152],
        ...,
        [-0.5035, -0.5004,  0.7729,  ..., -0.2500,  0.8514, -0.6975],
        [ 2.0861,  0.8493, -0.8812,  ...,  0.4804, -0.7253, -0.5652],
        [ 0.5314, -1.2240, -1.2776,  ..., -1.0670,  0.3493, -0.3978]],
       device='cuda:0')
Train: Epoch = 2 | Loss = 0.0 | Accuracy = 0.0


IndexError: Caught IndexError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/utente/Programmi/anaconda3/envs/ml_env/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/utente/Programmi/anaconda3/envs/ml_env/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/utente/Programmi/anaconda3/envs/ml_env/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/utente/Scaricati/program/ML_DL/FPAR/gtea_dataset.py", line 145, in __getitem__
    select_x_frames = frames_x[select_indices]
IndexError: arrays used as indices must be of integer (or boolean) type


# 2 Stream joint training

In [2]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 61 # 101 + 1: There is am extra Background class that should be removed 

BATCH_SIZE = 32     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 0.01            # The initial Learning Rate
LR_FLOW = 0.0001
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-4  # Regularization, you can keep this at the default

NUM_EPOCHS = 250      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 1 # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.99          # Multiplicative factor for learning rate step-down
MEM_SIZE = 512
STACK_SIZE = 5
SEQ_LEN = 7

LOG_FREQUENCY = 20

model_folder = 'saved_models'

In [3]:
# Data loader
normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
spatial_transform_train = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                             ToTensor(), normalize])
spatial_transform_val = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize])

In [4]:
DATA_DIR = '/home/utente/Scaricati/program/ML_DL/FPAR/GTEA61'

# Prepare Pytorch train/test Datasets
train_dataset = GTEA61_2Stream(DATA_DIR, split='train', transform=spatial_transform_train, seq_len=SEQ_LEN, 
                               stack_size=STACK_SIZE)
test_dataset = GTEA61_2Stream(DATA_DIR, split='test', transform=spatial_transform_val, seq_len=SEQ_LEN, 
                              stack_size=STACK_SIZE)

#train_indexes, val_indexes  = train_dataset.split_indices(0.6)

#val_dataset = Subset(train_dataset, val_indexes)
#train_dataset = Subset(train_dataset, train_indexes)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
#print('Valid Dataset: {}'.format(len(val_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

Train Dataset: 341
Test Dataset: 116


In [5]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

# Prepare joint training

In [None]:
flowModel = "best_flow_model.pth"
rgbModel = "best_rgb_model.pth"
validate = True

model = twoStreamAttentionModel(flowModel=flowModel, frameModel=rgbModel, stackSize=STACK_SIZE, memSize=MEM_SIZE,
                                    num_classes=NUM_CLASSES)

for params in model.parameters():
    params.requires_grad = False
model.train(False)

for params in model.classifier.parameters():
    params.requires_grad = True

for params in model.frameModel.lstm_cell.parameters():
    params.requires_grad = True

for params in model.frameModel.resNet.layer4[0].conv1.parameters():
    params.requires_grad = True

for params in model.frameModel.resNet.layer4[0].conv2.parameters():
    params.requires_grad = True
       
for params in model.frameModel.resNet.layer4[1].conv1.parameters():
    params.requires_grad = True     

for params in model.frameModel.resNet.layer4[1].conv2.parameters():
    params.requires_grad = True
       
for params in model.frameModel.resNet.layer4[2].conv1.parameters():
    params.requires_grad = True

for params in model.frameModel.resNet.layer4[2].conv2.parameters():
    params.requires_grad = True
            
for params in model.frameModel.resNet.fc.parameters():
    params.requires_grad = True

for params in model.flowModel.layer4.parameters():
    params.requires_grad = True


model.classifier.train(True)
model.flowModel.layer4.train(True)
model.frameModel.lstm_cell.train(True)
model.frameModel.classifier.train(True)

frame_trainable_params = [p for p in model.frameModel.parameters() if p.requires_grad]
flow_trainable_params = [p for p in model.flowModel.parameters() if p.requires_grad]

model.to(DEVICE)

loss_fn = nn.CrossEntropyLoss()

optimizer_fn = torch.optim.SGD([
        {'params': frame_trainable_params},
        {'params': flow_trainable_params, 'lr': LR_FLOW},
    ], lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

optim_scheduler = optim.lr_scheduler.StepLR(optimizer_fn, step_size=STEP_SIZE, gamma=GAMMA)

In [None]:
train_iter = 0
min_accuracy = 0
cudnn.benchmark

for epoch in range(NUM_EPOCHS):
    epoch_loss = 0
    numCorrTrain = 0
    iterPerEpoch = 0
    model.classifier.train(True)
    model.flowModel.layer4.train(True)
    model.frameModel.lstm_cell.train(True)
    model.frameModel.classifier.train(True)
    for j, (inputFlow, inputFrame, targets) in enumerate(train_loader):
        train_iter += 1
        iterPerEpoch += 1
        optimizer_fn.zero_grad()
        
        inputVariableFlow = inputFlow.to(DEVICE)
        inputVariableFrame = inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE)
        labelVariable = targets.to(DEVICE)
        
        output_label = model(inputVariableFlow, inputVariableFrame)
        loss = loss_fn(F.log_softmax(output_label, dim=1), labelVariable)
        loss.backward()
        optimizer_fn.step()
        
         _, predicted = torch.max(output_label.data, 1)
        numCorrTrain += torch.sum(predicted == labelVariable.data).data.item()
        epoch_loss += loss.data.item()
        
    avg_loss = epoch_loss / iterPerEpoch
    trainAccuracy = (numCorrTrain / trainSamples) * 100
    print('Average training loss after {} epoch = {} '.format(epoch + 1, avg_loss))
    print('Training accuracy after {} epoch = {}% '.format(epoch + 1, trainAccuracy))
    
    if validate:
        if (epoch + 1) % 2 == 0:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            numCorr = 0
            for j, (inputFlow, inputFrame, targets) in enumerate(val_loader):
                val_iter += 1
                
                inputVariableFlow = inputFlow.to(DEVICE)
                inputVariableFrame = inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE)
                labelVariable = targets.to(DEVICE)
                 
                output_label = model(inputVariableFlow, inputVariableFrame)
                loss = loss_fn(F.log_softmax(output_label, dim=1), labelVariable)
                
                val_loss_epoch += val_loss.data.item()
                _, predicted = torch.max(output_label.data, 1)
                numCorr += torch.sum(predicted == labelVariable.data).data.item()
                    
            val_accuracy = (numCorr / valSamples) * 100
            avg_val_loss = val_loss_epoch / val_iter
            print('Val Loss after {} epochs, loss = {}'.format(epoch + 1, avg_val_loss))
            print('Val Accuracy after {} epochs = {}%'.format(epoch + 1, val_accuracy))
            if val_accuracy > min_accuracy:
                save_path_model = (model_folder + '/model_twoStream_state_dict.pth')
                #torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
            else:
                if (epoch + 1) % 10 == 0:
                    save_path_model = (model_folder + '/model_twoStream_state_dict_epoch' + str(epoch + 1) + '.pth')
                    #torch.save(model.state_dict(), save_path_model)
    optim_scheduler.step()