# ***MSTCT + Pytorch-I3D***
This Colab will contain the code for the 2nd algorithm, to do activity detection for charades. 

The feature extraction based on piergiaj/pytorch-i3d will also be implemented.

###Set up Google Drive
All files from this Colab will be stored in a Google Drivefolder named MSTCT.

In [None]:
from google.colab import drive

drive.mount('/content/drive')

In [None]:
%cd /content/drive/MyDrive/MSTCT

## **1. Setup**
Run the cell below to import and install the dependencies needed for the project.

In [None]:
#@title
!pip uninstall torch -y
!pip uninstall torchvision -y
!pip uninstall torchaudio -y
!pip install timm==0.4.12
!pip install torch==1.9.0+cu102 torchvision==0.10.0+cu102 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html

Run the cell to create UI templates

In [None]:
#@title
# imports for UI widgets
from IPython.display import HTML, display, Markdown, Video, clear_output
from ipywidgets import Layout
import ipywidgets as widgets

# layout init.
btn_layout = widgets.Layout(width='45%', height='40px')
btn_sm_layout = widgets.Layout(width='20%', height='40px') 
label_layout = Layout(width='200px',height='auto')
ddl_layout = widgets.Layout(width='45%', height= 'auto')

###Check if CUDA is enabled
* Check if your device has a CUDA-supported GPU and the cuda version is 11.2. 
* If the cuda version is mismatched, install the corresponding pytorch tools via pip.

In [None]:
#@title
# Check if NVIDIA GPU is enabled
!nvidia-smi
!nvcc --version

### Install Python 3.8 and pip
Upgrade python ver in Colab from 3.7 to 3.8 and install pip

In [None]:
# install python 3.8
!sudo apt-get update -y
!sudo apt-get install python3.8

# change alternatives
!sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.7 1
!sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 2

!sudo apt install python-pip

!sudo apt install python3.8-distutils

### Install pip library
This step updates pip to the version according to python version.

In [None]:
!curl -sSL https://bootstrap.pypa.io/get-pip.py -o get-pip.py
! python get-pip.py

### Check if python 3.8 in installed and pip is updated

In [None]:
#@title
#check python version
!python --version
#check pip version
!pip --version
!sudo update-alternatives --install /usr/bin/python python /usr/bin/python3.8 1

###Install the pip dependencies

In [None]:
!pip install timm==0.4.12
!pip install pytorch==1.9.0
!pip install torch==1.9.0+cu102 torchvision==0.10.0+cu102 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html
# !pip install pickle5 # not needed in 3.8
!pip install scikit-learn
!pip install numpy
!pip install h5py
!pip install opencv-python
!pip install tqdm

In [None]:
import torch
print(torch.__version__)

#### Set up Video Captioning for MSTCT

In [None]:
#@title
import pandas
import cv2
import re
import os
import json
import csv
import math
import numpy as np

class CaptioningMST:
    def __init__(self, caption_filepath = None):
        self.annotation_path = ['./annotations/Charades_v1_train.csv', './annotations/Charades_v1_test.csv']    
        self.videopaths = []
        self.video_type = ''
        self.time = 0
        self.output_path = "./videosCaptionOutput/"
        self.classes = []
        self.dataset = {}
        self.feature = {}
        self.caption_path = caption_filepath
        self.accuracydata = {}
    
    def get_classes(self):
        with open('./annotations/Charades_v1_classes.txt', newline='') as classfile:
            lines = classfile.readlines()
            self.classes.append(lines)

    def get_video_names(self):
        with open('./annotations/json_data.json', 'r') as f:
            self.accuracydata = json.load(f)
        self.videopaths = self.accuracydata.keys()

    def get_feature(self):
        for k,v in self.dataset.items():
            listOfFeatures = []
            listBeforeConcat = v[9].split(';')
            for item in listBeforeConcat:
                listOfFeatures.append(item.split(' '))
            self.feature[k]=listOfFeatures

    def getGroundTruth(self):
        self.get_video_names()

        with open(self.annotation_path[0], newline='') as csvfile:
            spamreader = csv.reader(csvfile)
            for row in spamreader:
                for item in self.videopaths:
                    if(row[0] == item):
                        self.dataset[row[0]]=row
                        self.video_type = 'test'

        if(not self.dataset):
            with open(self.annotation_path[1], newline='') as csvfile:
                spamreader = csv.reader(csvfile)
                for row in spamreader:
                    for item in self.videopaths:
                        if(row[0] == item):
                            self.dataset[row[0]]=row
                            self.video_type = 'train'
        
        if(not self.dataset): return print('Video not found in dataset') 

        self.get_classes()
        self.get_feature()
        self.createVideo()

    def createVideo(self):
        if not os.path.exists('./videosCaptionOutput/'):
            os.makedirs('./videosCaptionOutput/')
        for video_name in self.videopaths:
            cap = cv2.VideoCapture('data/rgbVideos/'+video_name+'.mp4')

            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            out = cv2.VideoWriter(self.output_path+video_name+'.mp4', fourcc, 25, (int(cap.get(3)),int(cap.get(4))))
            if (cap.isOpened() == False):
                print("Error opening video stream or file")
                return

            while (cap.isOpened()):
                ret, frame = cap.read()

                if not ret:
                    print(f'Video annotation for {video_name} is process complete.')
                    break
                
                height, width, channels = frame.shape
                cv2.rectangle(frame, (0, int(height * 0.8)), (int(width), int(height*0.98)), (0,0,0), -1)
                cv2.putText(frame, "Ground truth", (int(width*0.1),int(height*0.85)), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255,255,255), 1)
                cv2.putText(frame, "Accuracy", (int(width*0.75),int(height*0.85)), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255,255,255), 1)
                cv2.putText(frame, "Prediction", (int(width*0.4),int(height*0.85)), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255,255,255), 1)
                
                self.time = int(cap.get(cv2.CAP_PROP_POS_MSEC))

                listofGroundTruths = []
                for key, value in self.feature.items():
                    if(key == video_name):
                        for item in value:
                            if(self.time > float(item[1])*1000 and self.time < float(item[2])*1000):
                                listofGroundTruths.append(item[0][1:])
                
                feature_split = np.array_split(listofGroundTruths,2)
                cv2.putText(frame, ', '.join(feature_split[0]), (int(width*0.1),int(height*0.90)), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255,255,255), 1)
                cv2.putText(frame, ', '.join(feature_split[1]), (int(width*0.1),int(height*0.95)), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255,255,255), 1)

                # if want to see the full text
                # for index, item in enumerate(listofGroundTruths):
                #     cv2.putText(frame, item, (int(width*0.2),int(height*(0.30+(index*0.05)))), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0,0,0), 1)
                #     cv2.putText(frame, self.classes[0][int(item)], (int(width*0.22),int(height*(0.30+(index*0.05)))), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0,0,0), 1)
                    
                listOfActivities = []
                accuraciesOfActivities = []
                for item in self.accuracydata[video_name]['actions']:
                    if(self.time > item[1]*1000 and self.time < item[2]*1000):
                        listOfActivities.append(item[0])
                        accuraciesOfActivities.append(item[3])

                activities_split = np.array_split(listOfActivities, 2)
                prediction_split = np.array_split(accuraciesOfActivities, 2)
                
                if(str(feature_split[0])==str(activities_split[0])):
                    cv2.putText(frame, ', '.join([str(item) for item in activities_split[0]]), (int(width*0.4),int(height*0.9)), cv2.FONT_HERSHEY_DUPLEX, 0.4, (67,181,75), 1)
                else:
                    cv2.putText(frame, ', '.join([str(item) for item in activities_split[0]]), (int(width*0.4),int(height*0.9)), cv2.FONT_HERSHEY_DUPLEX, 0.4, (255,255,255), 1)
                
                if(str(feature_split[1])==str(activities_split[1])):   
                    cv2.putText(frame, ', '.join([str(item) for item in activities_split[1]]), (int(width*0.4),int(height*0.95)), cv2.FONT_HERSHEY_DUPLEX, 0.4, (67,181,75), 1)
                else:
                    cv2.putText(frame, ', '.join([str(item) for item in activities_split[1]]), (int(width*0.4),int(height*0.95)), cv2.FONT_HERSHEY_DUPLEX, 0.4, (255,255,255), 1)
                    
                cv2.putText(frame, ', '.join([str(item) for item in prediction_split[0]]), (int(width*0.75),int(height*0.90)), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255,255,255), 1)
                cv2.putText(frame, ', '.join([str(item) for item in prediction_split[1]]), (int(width*0.75),int(height*0.95)), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255,255,255), 1)
                
                
                out.write(frame)
                # cv2.imshow('frame',frame)
                key = cv2.waitKey(1)
                # define the key to
                # close the window
                if key == 'q' or key == 27:
                    break

            cap.release()
            out.release()
            cv2.destroyAllWindows()


## **2. Feature Extraction**
This section performs feature extraction from jpeg folders of videos via [pytorch-i3d](https://github.com/piergiaj/pytorch-i3d).

### Run the feature extraction file
Step 1: Add the videos jpeg folder you want to extract into `/content/drive/MyDrive/MST/pytorch-i3d/Charades_v1_rgb`

Step 2: Select `rgb` stream and `rgb_charades` 

Step 3: Click on `Extact Features` button

Step 4: Retrieve the numpy files from `/content/drive/MyDrive/MST/pytorch-i3d/output`

In [None]:
#@title
%cd /content/drive/MyDrive/MSTCT/pytorch-i3d
# to be replaced with files with /models/ later
models_list = ['flow_charades','flow_imagenet','rgb_charades','rgb_imagenet']

label_stream = widgets.Label("Select Stream:", layout = label_layout)
ddl_stream = widgets.Select(
    options=['rgb', 'flow', 'both'],
    value='rgb',
    layout = ddl_layout)

label_models = widgets.Label("Select model:", layout = label_layout)
ddl_models = widgets.Dropdown(
    options= models_list,
    value='rgb_charades',
    layout = ddl_layout)

btn_extract = widgets.Button(description="Extract Features", layout = btn_layout, button_style='info')

extraction_output = widgets.Output()

def extract_feature(b):
    with extraction_output:
        extraction_output.clear_output()
        print('run extraction ...')
        %cd /content/drive/MyDrive/MSTCT/pytorch-i3d
        %run ./extract_features.py -mode $ddl_stream.value -gpu "0" -root "./Charades_v1_rgb" -save_dir "./output" -load_model "./models/"$ddl_models.value
 
btn_extract.on_click(extract_feature)

# Display
feature_box = widgets.VBox([widgets.HBox([label_stream, ddl_stream]), widgets.HBox([label_models, ddl_models]), btn_extract, extraction_output])
feature_box  

## MSTCT Repository

### Step 2: Run the training instead of shell script
Step 1: Configure the training options using the widgets

Step 2: Rename the `pickle output file` to be used later

Step 3: Click on `Train Charades` to run training model

Step 4: Pickle file is saved in the `/content/drive/MyDrive/MST/MST/save_logit/`

In [None]:
#@title
%cd /content/drive/MyDrive/MSTCT/MS-TCT

# to be replaced with files with /models/ later
models_list = ['MS_TCT']

# # select training t/f
# label_train = widgets.Label("Run training", layout = label_layout)
# ddl_train = widgets.Select(
#     options=['True','False'],
#     value='True',
#     layout = ddl_layout)

# select model
label_models = widgets.Label("Select model:", layout = label_layout)
ddl_models_training = widgets.Dropdown(
    options= models_list,
    value = "MS_TCT",
    layout = ddl_layout)

# select stream
label_stream = widgets.Label("Select Stream:", layout = label_layout)
ddl_stream_training = widgets.Select(
    options=['rgb', 'flow'],
    value='rgb',
    layout = ddl_layout)


# select batch size
label_batch_size = widgets.Label("batch_size:", layout = label_layout)
batch_size_training = widgets.BoundedIntText(
    min=1,
    # max=1000, # to be defined and added as necessary
    step=1,
    value=32,
    disabled=False
)

# select epoch
label_epochs = widgets.Label("epoch:", layout = label_layout)
epochs_training = widgets.BoundedIntText(
    min=1,
    # max=1000, # to be defined and added as necessary
    step=1,
    value=50,
    disabled=False
)

# select unisize
label_unisize= widgets.Label("unisize:", layout = label_layout)
ddl_unisize = widgets.Select(
    options=[True,False],
    value=True,
    layout = ddl_layout)

# select alpha_1
label_alpha= widgets.Label("alpha_1:", layout = label_layout)
text_alpha = widgets.BoundedIntText(
    placeholder='Specify alpha_1 value',
    value = 1,
    disabled=False
)

# select beta_1
label_beta= widgets.Label("beta_1:", layout = label_layout)
text_beta = widgets.BoundedFloatText(
    placeholder='Specify beta_1 value',
    value = 0.05,
    disabled=False
)

# select comp_info
label_comp_info= widgets.Label("comp_info:", layout = label_layout)
ddl_comp_info = widgets.Select(
    options=[True,False],
    value=True,
    layout = ddl_layout)

# select skip
label_skip= widgets.Label("skip:", layout = label_layout)
ddl_skip = widgets.Select(
    options=[0,1],
    value=0,
    layout = ddl_layout)

# select lr
label_lr= widgets.Label("lr:", layout = label_layout)
lr_text_lr = widgets.BoundedFloatText(
    placeholder='Specify lr value',
    value=0.0001,
    disabled=False
)

label_save_file = widgets.Label("file name (output):", layout = label_layout)
text_save_file = widgets.Text(
    placeholder='Specify file name',
    disabled=False
)

training_Confirm_Button  = widgets.Button(description="Train Charades", 
                                          layout = btn_layout, 
                                          button_style='info')

mstct_training_output = widgets.Output()


def train_features(b):
    with mstct_training_output:
        mstct_training_output.clear_output()
        %cd /content/drive/MyDrive/MSTCT/MS-TCT
        if text_save_file.value == '':
          print('Make a unique file name')
          return
        print('run training ...')
        %run train.py -dataset "charades" -gpu 0 -mode $ddl_stream_training.value -model $ddl_models_training.value -train True -num_clips 256 -skip $ddl_skip.value -lr $lr_text_lr.value -comp_info $ddl_comp_info.value -epoch $epochs_training.value -unisize $ddl_unisize.value -alpha_l $text_beta.value -beta_l $text_beta.value -batch_size $batch_size_training.value -save_file_name $text_save_file.value
        CaptioningMST().getGroundTruth()
        
training_Confirm_Button.on_click(train_features)


# Display
feature_box = widgets.VBox([widgets.HBox([label_models, ddl_models_training]),
                            widgets.HBox([label_stream, ddl_stream_training]), 
                            widgets.HBox([label_batch_size, batch_size_training]), 
                            widgets.HBox([label_epochs, epochs_training]), 
                            widgets.HBox([label_unisize, ddl_unisize]), 
                            widgets.HBox([label_alpha, text_alpha]), 
                            widgets.HBox([label_beta, text_beta]), 
                            widgets.HBox([label_comp_info, ddl_comp_info]), 
                            widgets.HBox([label_skip, ddl_skip]), 
                            widgets.HBox([label_lr, lr_text_lr]), 
                            widgets.HBox([label_save_file, text_save_file]), 
                            training_Confirm_Button,
                            mstct_training_output])
feature_box
    

Testing for MSTCT

Step 1: Select Pickle file to test using the drop down selection list referencing the `/content/drive/MyDrive/MST/MST/save_logit/` directory

Step 2 : Run testing and see the probability of each video

Step 3: Put videos into `/content/drive/MyDrive/MST/MST/data/rgbVideos`

Step 4 : Accuracies of each batch of video will be stored into `/content/drive/MyDrive/MST/MST/annotations/json_data.json`

Step 5: Wait for videos to generate to `/content/drive/MyDrive/MST/MST/videosCaptionOutput`

In [None]:
#@title


def populateList(fileDirectory, fileType):
    folder_files = os.listdir("/content/drive/MyDrive/MSTCT/MS-TCT/save_logit") #You can also use full path.
    print("This Folder contains {len_folder} file(s).".format(len_folder=len(folder_files)))
    fileList = []
    for file in folder_files:
        if file.endswith(fileType):
            fileList.append(file)
    return fileList

pickleList = populateList('MS-TCT/save_logit', ".pkl")
        
## dropdown list from the ./save_logit/ to allow user to select the output files
label_pickleList = widgets.Label("Select Pickle file:", layout = label_layout)
ddl_pickleList = widgets.Dropdown(
    options= pickleList,
    value= pickleList[0],
    layout = ddl_layout)

button_execute = widgets.Button(description="Generate Video", 
                                          layout = btn_layout, 
                                          button_style='info')


generate_video_output = widgets.Output()

# To add onclick function here
def execute(b):
    with generate_video_output:
      generate_video_output.clear_output()
      %cd /content/drive/MyDrive/MSTCT/MS-TCT
      %run Evaluation.py -pkl_path ./save_logit/$ddl_pickleList.value
        
button_execute.on_click(execute)

# Display
feature_box = widgets.VBox([widgets.HBox([label_pickleList, ddl_pickleList]),
                            button_execute, generate_video_output])
feature_box