In [None]:
import os
import argparse
import numpy as np
import random
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
import torch

from trainer import Trainer  # Import the Trainer class from the file you have edited
from model import * # Import the model from the file you have edited
from utils import BatchGenerator
from utils import func_eval

print(os.environ["CUDA_VISIBLE_DEVICES"])

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print("Using device:", device)
seed = 19988563
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
 
action = 'train'
dataset = 'gtea'
split = '1'
 
num_epochs = 150 # number of epochs to train

lr = 0.0005
num_layers = 9 # number of layers in the one encoder or decoder block
num_f_maps = 64
features_dim = 2048 # dimension of the input features, 2048 for i3d features
bz = 8 # batch size, 1 for gtea
 
channel_mask_rate = 0.3


# use the full temporal resolution @ 15fps
sample_rate = 2
# sample input features @ 15fps instead of 30 fps
# for 50salads, and up-sample the output to 30 fps
if dataset == "50salads":
    sample_rate = 2

# To prevent over-fitting for GTEA. Early stopping & large dropout rate
if dataset == "gtea":
    channel_mask_rate = 0.5
    
if dataset == 'breakfast':
    lr = 0.0001

# no need to change the following paths if you are using the same dataset
vid_list_file = "files_data/train.split1.bundle"
vid_list_file_tst = "files_data/test.split1.bundle"
features_path = "data_i3d/gtea/features"
gt_path = "data_i3d/gtea/groundTruth"

mapping_file = "mapping.txt"
 # If you have made changes to the model, change the model_dir and results_dir paths accordingly.
model_dir = "results/"+dataset+"/split_"+split
results_dir = "results/"
 

 
if not os.path.exists(model_dir):
    os.makedirs(model_dir)
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
 
 
file_ptr = open(mapping_file, 'r')
actions = file_ptr.read().split('\n')[:-1]
file_ptr.close()
actions_dict = dict()
for a in actions:
    actions_dict[a.split()[1]] = int(a.split()[0])
index2label = dict()
for k,v in actions_dict.items():
    index2label[v] = k
num_classes = len(actions_dict)

# Create the model and trainer
trainer = Trainer(num_layers, 2, 2, num_f_maps, features_dim, num_classes, channel_mask_rate)

# By running this cell, you can train the model.
if action == "train":
    batch_gen = BatchGenerator(num_classes, actions_dict, gt_path, features_path, sample_rate, features_dim)
    batch_gen.read_data(vid_list_file)

    batch_gen_tst = BatchGenerator(num_classes, actions_dict, gt_path, features_path, sample_rate, features_dim)
    batch_gen_tst.read_data(vid_list_file_tst)

    trainer.train(model_dir, batch_gen, num_epochs, bz, lr, batch_gen_tst)



  from .autonotebook import tqdm as notebook_tqdm


2
Modular loss function initialized with: λ1=1.0, λ2=0.15, τ=4.0
Model initialized with 4 attention heads.
Model Size: 2,735,980
LR: 0.0005
[epoch 1]: loss = 2.4093, acc = 0.1083
[epoch 2]: loss = 2.0279, acc = 0.1331
[epoch 3]: loss = 1.8290, acc = 0.1538
[epoch 4]: loss = 1.6522, acc = 0.1733
[epoch 5]: loss = 1.5604, acc = 0.1905
---[epoch 5]---: tst acc = 0.2460
[epoch 6]: loss = 1.4667, acc = 0.2117
[epoch 7]: loss = 1.4031, acc = 0.2317
[epoch 8]: loss = 1.3389, acc = 0.2687
[epoch 9]: loss = 1.2644, acc = 0.3106
[epoch 10]: loss = 1.2230, acc = 0.3448
---[epoch 10]---: tst acc = 0.3669
[epoch 11]: loss = 1.1730, acc = 0.3680
[epoch 12]: loss = 1.1395, acc = 0.3882
[epoch 13]: loss = 1.0777, acc = 0.4178
[epoch 14]: loss = 1.0217, acc = 0.4445
[epoch 15]: loss = 0.9882, acc = 0.4708
---[epoch 15]---: tst acc = 0.4632
[epoch 16]: loss = 0.9452, acc = 0.4949
[epoch 17]: loss = 0.9285, acc = 0.5209
[epoch 18]: loss = 0.8988, acc = 0.5480
[epoch 19]: loss = 0.8575, acc = 0.5853
[epoc

In [3]:
# By running the following cell, you can predict the results on the test set. The results will be saved in the results_dir.
# You can then evaluate the results using the eval.ipynb script.
action = "predict"
if action == "predict":
    batch_gen_tst = BatchGenerator(num_classes, actions_dict, gt_path, features_path, sample_rate, features_dim)
    batch_gen_tst.read_data(vid_list_file_tst)
    trainer.predict(model_dir, results_dir, features_path, batch_gen_tst, num_epochs, actions_dict, sample_rate)

Prediction completed in 12.88 seconds
