In [None]:
from google.colab import drive
drive.mount('/content/drive')
# Ensure we have downloaded the files correctly
!ls drive/MyDrive

### Clone the Episodic Memory Baseline Repository

In [None]:
%%bash

git clone https://github.com/danielefam/episodic-memory
cd episodic-memory
git pull

# Prepare Dataset

The NLQ baseline repository for VSLNet requires you to prepare the data for training and evaluation purposes. From the [README.md](https://github.com/EGO4D/episodic-memory/blob/main/NLQ/VSLNet/README.md#preparation) we need to run the `prepare_ego4d_dataset.py` script.

### Setup Environment Variables for NLQ

First let's setup some environment variables and setup the paths as NLQ's scripts will expect.

Run the following if **VSLNet**

In [None]:
with open("vars.sh", "w") as out_f:
  out_f.write("""
export NAME=egovlp_fp16
export TASK_NAME=nlq_official_v1_$NAME
export BASE_DIR=data/dataset/nlq_official_v1_$NAME
export FEATURE_BASE_DIR=data/features/nlq_official_v1_$NAME
export FEATURE_DIR=$FEATURE_BASE_DIR/video_features
export MODEL_BASE_DIR=/content/nlq_official_v1/checkpoints/
export GLOVE_DICTIONARY=data/features/glove.840B.300d.txt
export ANNOTATION_PREPARED=data/dataset/nlq_official_v1_$NAME
export VIDEO_PREPARED=$FEATURE_BASE_DIR/official
export GDRIVE_PREPARED_ANNOTATION=/content/drive/MyDrive/prepared_features/$TASK_NAME/
export GDRIVE_PREPARED_VIDEO=/content/drive/MyDrive/prepared_features/official/
export DATASET_PATH=data/dataset

cd episodic-memory/NLQ/VSLNet
"""
  )

In [None]:
%%bash

source vars.sh

echo $FEATURE_BASE_DIR
mkdir -p $FEATURE_BASE_DIR
mkdir -p $DATASET_PATH
ln -s /content/drive/MyDrive/egovlp_fp16/ $FEATURE_DIR
ln -s /content/drive/MyDrive/glove.840B.300d.txt $GLOVE_DICTIONARY
ln -s $GDRIVE_PREPARED_ANNOTATION $ANNOTATION_PREPARED
ln -s $GDRIVE_PREPARED_VIDEO $FEATURE_BASE_DIR

data/features/nlq_official_v1_egovlp_fp16


In [None]:
%%bash
%%capture

source vars.sh
pip install nltk submitit torch torchaudio torchvision tqdm transformers tensorboard Pillow terminaltables

# Train and Evaluation

## Train a Model

Please note:
1. These are *not* the parameters for the original baseline model in the Ego4D whitepaper.
2. Omnivore video features are used (slowfast was originally used), and their FP16 variant. This is due to free colab constraints (100GB disk space).

Omnivore video features do out-perform slowfast features.

In [None]:
%load_ext tensorboard

You may have to re-run this cell after you run the training script. You can try to reload data.

In [None]:
!mkdir -p /content/episodic-memory/NLQ/VSLNet/runs/
%tensorboard --logdir /content/episodic-memory/NLQ/VSLNet/runs/

<IPython.core.display.Javascript object>

Run the following if **VSLNet**

In [None]:
# run this if you use glove
import nltk

nltk.download('punkt_tab')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

In [None]:
%%bash

source vars.sh

# machine parameters
export DATALOADER_WORKERS=1
export NUM_WORKERS=2
export VAL_JSON_PATH="/content/drive/MyDrive/annotations/nlq_val.json"

# hyper parameters
export BATCH_SIZE=16
export DIM=128
export NUM_EPOCH=30
export MAX_POS_LEN=128
export INIT_LR=0.0006

export TB_LOG_NAME="${NAME}_bs${BATCH_SIZE}_dim${DIM}_epoch${NUM_EPOCH}_ilr${INIT_LR}"

python main.py \
    --task $TASK_NAME \
    --predictor glove \
    --dim $DIM \
    --mode train \
    --video_feature_dim 256 \
    --max_pos_len $MAX_POS_LEN \
    --init_lr $INIT_LR \
    --epochs $NUM_EPOCH \
    --batch_size $BATCH_SIZE \
    --fv official \
    --num_workers $NUM_WORKERS \
    --data_loader_workers $DATALOADER_WORKERS \
    --model_dir $MODEL_BASE_DIR/$NAME \
    --eval_gt_json $VAL_JSON_PATH \
    --log_to_tensorboard $TB_LOG_NAME \
    --tb_log_freq 5 \
    --remove_empty_queries_from train

Da rivedere:

In [None]:
# To predict on test set.
python main.py \
    --task nlq_official_v1 \
    --predictor bert \
    --mode test \
    --video_feature_dim 2304 \
    --max_pos_len 128 \
    --fv official \
    --model_dir checkpoints/

Search parameters

In [None]:
%%bash

source vars.sh

# machine parameters
export DATALOADER_WORKERS=1
export NUM_WORKERS=2
export VAL_JSON_PATH="/content/drive/MyDrive/annotations/nlq_val.json"
BATCH_SIZEs=(16 32)
DIMs=(128)
INIT_LRs=(0.001 0.0025)

for BATCH_SIZE_it in ${BATCH_SIZEs[@]}; do
  for DIM_it in ${DIMs[@]}; do
    for INIT_LR_it in ${INIT_LRs[@]}; do

      export BATCH_SIZE=$BATCH_SIZE_it
      export DIM=$DIM_it
      export INIT_LR=$INIT_LR_it
      export NUM_EPOCH=2
      export MAX_POS_LEN=128

      export TB_LOG_NAME="${NAME}_bs${BATCH_SIZE}_dim${DIM}_epoch${NUM_EPOCH}_ilr${INIT_LR}"

      python main.py \
          --task $TASK_NAME \
          --predictor bert \
          --dim $DIM \
          --mode train \
          --video_feature_dim 256 \
          --max_pos_len $MAX_POS_LEN \
          --init_lr $INIT_LR \
          --epochs $NUM_EPOCH \
          --batch_size $BATCH_SIZE \
          --fv official \
          --num_workers $NUM_WORKERS \
          --data_loader_workers $DATALOADER_WORKERS \
          --model_dir $MODEL_BASE_DIR/$NAME \
          --eval_gt_json $VAL_JSON_PATH \
          --log_to_tensorboard $TB_LOG_NAME \
          --tb_log_freq 5 \
          --remove_empty_queries_from train

    done
  done
done


plots

In [None]:
!mkdir -p /content/episodic-memory/NLQ/VSLBase/runs/
%tensorboard --logdir /content/episodic-memory/NLQ/VSLNet/runs/

Reusing TensorBoard on port 6006 (pid 3580), started 0:43:22 ago. (Use '!kill 3580' to kill it.)

<IPython.core.display.Javascript object>