### **1. Import libraries**

In [1]:
# all necessary imports
import numpy as np 
import pandas as pd 
import re
from collections import Counter
import os
import json
import string
import spacy
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from nltk.tokenize import word_tokenize
import transformers
import torch
from torch.utils.data import Dataset
from datasets import Dataset
from dataclasses import dataclass
from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
from typing import Optional, Union
import torch as T
import numpy as np
from transformers import AutoModelForMultipleChoice, TrainingArguments, Trainer
import torch.nn as nn
from transformers import AutoTokenizer
from torch.utils.data import DataLoader
from argparse import Namespace
from transformers import BertModel
from transformers import AdamW
from PIL import Image
import requests
import cv2 
from transformers import CLIPProcessor, CLIPModel
import shutil 
import math
import torch.optim as optim
from PIL import Image
import cv2 
from transformers import CLIPProcessor, CLIPModel
import pickle 

In [2]:
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [3]:
!pip install datasets --upgrade

  pid, fd = os.forkpty()




In [4]:
!pip install ipywidgets --upgrade
!jupyter nbextension enable --py widgetsnbextension

Enabling notebook extension jupyter-js-widgets/extension...
Paths used for configuration of notebook: 
    	/root/.jupyter/nbconfig/notebook.json
Paths used for configuration of notebook: 
    	
      - Validating: [32mOK[0m
Paths used for configuration of notebook: 
    	/root/.jupyter/nbconfig/notebook.json


### **2.MSRVTT-RETRIEVAL PREPROCESSING**

In [5]:
MSRVTT_MC = "/kaggle/input/msrvtt-retrieval/mc_test.jsonl"
MSRVTT_TRAIN = "/kaggle/input/msrvtt-retrieval/train.jsonl"
MSRVTT_VAL = "/kaggle/input/msrvtt-retrieval/val.jsonl"
MSRVTT_TEST = "/kaggle/input/msrvtt-retrieval/test.jsonl"
# MSRVTT_TRAIN_VAL_VIDEO = "/kaggle/input/msrvttqa/archive/TrainValVideo"
MSRVTT_TEST_VIDEO = "/kaggle/input/msrvtt-test-video/MSRVTT/videos/all"
MSRVTT_OUTPUT = "/kaggle/working/msrvtt"
MSRVTT_VIDEO_PREPROCESSED = "/kaggle/working/msrvtt_video"

In [6]:
try:
    os.makedirs(MSRVTT_OUTPUT)
    print(f"Directory '{MSRVTT_OUTPUT}' created successfully.")
except FileExistsError:
    print(f"Directory '{MSRVTT_OUTPUT}' already exists.")

Directory '/kaggle/working/msrvtt' created successfully.


In [7]:
def convert_jsonl_to_json(input_jsonl_file, output_json_folder):
    # Ensure the output folder exists
    os.makedirs(output_json_folder, exist_ok=True)
    
    # Determine the output JSON filename
    base_name = os.path.splitext(os.path.basename(input_jsonl_file))[0]
    output_json_file = os.path.join(output_json_folder, base_name + '.json')
    
    # Read the JSONL file and aggregate the data
    data = []
    with open(input_jsonl_file, 'r') as jsonl_file:
        for line_number, line in enumerate(jsonl_file, start=1):
            line = line.strip()
            if not line:  # Skip empty lines
                continue
            try:
                data.append(json.loads(line))
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON on line {line_number}: {e}")
                continue
    
    # Write to the JSON file
    with open(output_json_file, 'w') as json_file:
        json.dump(data, json_file, indent=4)
    
    print(f"Converted {input_jsonl_file} to {output_json_file}")
    return output_json_file


In [8]:
MSRVTT_MC_JSON = convert_jsonl_to_json(MSRVTT_MC, MSRVTT_OUTPUT)
MSRVTT_TRAIN_JSON = convert_jsonl_to_json(MSRVTT_TRAIN, MSRVTT_OUTPUT)
MSRVTT_VAL_JSON = convert_jsonl_to_json(MSRVTT_VAL, MSRVTT_OUTPUT)
MSRVTT_TEST_JSON = convert_jsonl_to_json(MSRVTT_TEST, MSRVTT_OUTPUT)

Converted /kaggle/input/msrvtt-retrieval/mc_test.jsonl to /kaggle/working/msrvtt/mc_test.json
Converted /kaggle/input/msrvtt-retrieval/train.jsonl to /kaggle/working/msrvtt/train.json
Converted /kaggle/input/msrvtt-retrieval/val.jsonl to /kaggle/working/msrvtt/val.json
Converted /kaggle/input/msrvtt-retrieval/test.jsonl to /kaggle/working/msrvtt/test.json


In [9]:
# Loading JSON Files 
with open(MSRVTT_MC_JSON, 'r') as msrvtt_mc, open(MSRVTT_TRAIN_JSON, 'r') as msrvtt_train, open(MSRVTT_VAL_JSON, 'r') as msrvtt_val, open(MSRVTT_TEST_JSON, 'r') as msrvtt_test :
    msrvtt_mc = json.load(msrvtt_mc)
    msrvtt_train = json.load(msrvtt_train)
    msrvtt_val = json.load(msrvtt_val)
    msrvtt_test = json.load(msrvtt_test)
#print(msrvtt)

# Converting JSON Files to DataFrame 
msrvtt_mc = pd.DataFrame(msrvtt_mc)
msrvtt_train = pd.DataFrame(msrvtt_train)
msrvtt_val = pd.DataFrame(msrvtt_val)
msrvtt_test = pd.DataFrame(msrvtt_test)

msrvtt_mc

Unnamed: 0,qid,clip_name,title,answer,options
0,mc0,video9770,msr143897,0,"[the boy is trying to fix the problem, a movie..."
1,mc1,video9771,msr169115,2,[a man dismisses a group of soldiers and a sce...
2,mc2,video7020,msr160593,2,[a wining team celebrates their victory at the...
3,mc3,video9773,msr145898,3,"[a man yells at a faucet then punches it, peop..."
4,mc4,video7026,msr162877,2,"[basically humans have helping intensity, a ro..."
...,...,...,...,...,...
2985,mc2985,video7937,msr195060,2,[a male singer performs before a live audience...
2986,mc2986,video7930,msr183709,4,"[a journalist talking to a guest, a woman head..."
2987,mc2987,video7931,msr174474,2,[animated characters from the beauty and the b...
2988,mc2988,video7932,msr178667,1,[a man is laying in bed next to a woman and sh...


In [10]:
# Converting msrvtt_mc to OUR STANDARD FORMAT 

msrvtt = msrvtt_mc.copy() 
# Split the 'options' column into 5 separate columns 'a1', 'a2', 'a3', 'a4', 'a5'
msrvtt[['a1', 'a2', 'a3', 'a4', 'a5']] = pd.DataFrame(msrvtt['options'].tolist(), index=msrvtt.index)

# Rename the 'qid' column to 'id' (as specified in the new order)
msrvtt = msrvtt.rename(columns={'qid': 'id'})

# Reorder the columns as requested
msrvtt = msrvtt[['title','id', 'clip_name', 'a1', 'a2', 'a3', 'a4', 'a5', 'answer']]

# Print the reordered DataFrame
msrvtt

Unnamed: 0,title,id,clip_name,a1,a2,a3,a4,a5,answer
0,msr143897,mc0,video9770,the boy is trying to fix the problem,a movie trailer shows various scenes from a movie,asian man discusses technology in the younger ...,two men on wave runner in ocean rescuing a surfer,a group is dancing,0
1,msr169115,mc1,video9771,a man dismisses a group of soldiers and a scen...,a woman pushing a stroller,a young girl is in the gym,a woman is putting items into a miniature toy ...,a game show host hosting a game,2
2,msr160593,mc2,video7020,a wining team celebrates their victory at the ...,halo warriors music video,a woman wraps a baby doll in some fake leaves,people are playingg match,a person solving the rubik s cube,2
3,msr145898,mc3,video9773,a man yells at a faucet then punches it,people on a video laughing,a group of people on stage on the voice,an advertisement for a driving video game,a person is playing a video game,3
4,msr162877,mc4,video7026,basically humans have helping intensity,a rock band preforming a song,the announcer talks about the interior feature...,many women are walking on a runway in brown an...,a man carries a green block,2
...,...,...,...,...,...,...,...,...,...
2985,msr195060,mc2985,video7937,a male singer performs before a live audience,a male narrating a video game in the image,a curious gerbil peeks out of a white tube loo...,a group of women preform on stage,several people taste testing something,2
2986,msr183709,mc2986,video7930,a journalist talking to a guest,a woman headbutts a man,two girls going in a taxi while both speaking ...,a cartoon character is carrying a gun,a young woman in a red and white striped shirt...,4
2987,msr174474,mc2987,video7931,animated characters from the beauty and the be...,the man talks about how 7 million people are l...,the person talks about the woman,a boy is describing the back of a computer,someone is making food,2
2988,msr178667,mc2988,video7932,a man is laying in bed next to a woman and she...,a promo for a tv show or movie with spoken wor...,a young man in an ill fitting suit gives a pre...,there is a brown hair woman talking from the k...,a person zooms into the welcome board of a pla...,1


In [11]:
# Concatenating both val and test 
msrvtt_total = pd.concat([msrvtt_val,msrvtt_test])

# Cross Join between msrvtt and msrvtt_total so to associate each caption to its answers 
final_msrvtt = pd.merge(msrvtt, msrvtt_total[['clip_name', 'caption']], on='clip_name', how='left')
print(final_msrvtt.isnull().sum())
final_msrvtt

title          0
id             0
clip_name      0
a1             0
a2             0
a3             0
a4             0
a5             0
answer         0
caption      990
dtype: int64


Unnamed: 0,title,id,clip_name,a1,a2,a3,a4,a5,answer,caption
0,msr143897,mc0,video9770,the boy is trying to fix the problem,a movie trailer shows various scenes from a movie,asian man discusses technology in the younger ...,two men on wave runner in ocean rescuing a surfer,a group is dancing,0,a person is connecting something to system
1,msr169115,mc1,video9771,a man dismisses a group of soldiers and a scen...,a woman pushing a stroller,a young girl is in the gym,a woman is putting items into a miniature toy ...,a game show host hosting a game,2,a little girl does gymnastics
2,msr160593,mc2,video7020,a wining team celebrates their victory at the ...,halo warriors music video,a woman wraps a baby doll in some fake leaves,people are playingg match,a person solving the rubik s cube,2,a woman creating a fondant baby and flower
3,msr145898,mc3,video9773,a man yells at a faucet then punches it,people on a video laughing,a group of people on stage on the voice,an advertisement for a driving video game,a person is playing a video game,3,a boy plays grand theft auto 5
4,msr162877,mc4,video7026,basically humans have helping intensity,a rock band preforming a song,the announcer talks about the interior feature...,many women are walking on a runway in brown an...,a man carries a green block,2,a man is giving a review on a vehicle
...,...,...,...,...,...,...,...,...,...,...
2985,msr195060,mc2985,video7937,a male singer performs before a live audience,a male narrating a video game in the image,a curious gerbil peeks out of a white tube loo...,a group of women preform on stage,several people taste testing something,2,
2986,msr183709,mc2986,video7930,a journalist talking to a guest,a woman headbutts a man,two girls going in a taxi while both speaking ...,a cartoon character is carrying a gun,a young woman in a red and white striped shirt...,4,
2987,msr174474,mc2987,video7931,animated characters from the beauty and the be...,the man talks about how 7 million people are l...,the person talks about the woman,a boy is describing the back of a computer,someone is making food,2,
2988,msr178667,mc2988,video7932,a man is laying in bed next to a woman and she...,a promo for a tv show or movie with spoken wor...,a young man in an ill fitting suit gives a pre...,there is a brown hair woman talking from the k...,a person zooms into the welcome board of a pla...,1,


In [12]:
# Final Dataframe, completed with Caption -> dropped Videos with Nan Captions (990 videos )
final_msrvtt = final_msrvtt.rename(columns={'caption': 'question'})
final_msrvtt = final_msrvtt.dropna(subset=["question"])

# Reorder the columns as requested
final_msrvtt = final_msrvtt[['title', 'id', 'clip_name', 'question', 'a1', 'a2', 'a3', 'a4', 'a5', 'answer']]

print(final_msrvtt.isnull().sum())
final_msrvtt

title        0
id           0
clip_name    0
question     0
a1           0
a2           0
a3           0
a4           0
a5           0
answer       0
dtype: int64


Unnamed: 0,title,id,clip_name,question,a1,a2,a3,a4,a5,answer
0,msr143897,mc0,video9770,a person is connecting something to system,the boy is trying to fix the problem,a movie trailer shows various scenes from a movie,asian man discusses technology in the younger ...,two men on wave runner in ocean rescuing a surfer,a group is dancing,0
1,msr169115,mc1,video9771,a little girl does gymnastics,a man dismisses a group of soldiers and a scen...,a woman pushing a stroller,a young girl is in the gym,a woman is putting items into a miniature toy ...,a game show host hosting a game,2
2,msr160593,mc2,video7020,a woman creating a fondant baby and flower,a wining team celebrates their victory at the ...,halo warriors music video,a woman wraps a baby doll in some fake leaves,people are playingg match,a person solving the rubik s cube,2
3,msr145898,mc3,video9773,a boy plays grand theft auto 5,a man yells at a faucet then punches it,people on a video laughing,a group of people on stage on the voice,an advertisement for a driving video game,a person is playing a video game,3
4,msr162877,mc4,video7026,a man is giving a review on a vehicle,basically humans have helping intensity,a rock band preforming a song,the announcer talks about the interior feature...,many women are walking on a runway in brown an...,a man carries a green block,2
...,...,...,...,...,...,...,...,...,...,...
2972,msr141250,mc2972,video9737,a news clip from rt of a terror attack,a bloomberg analyst talking about growth in ma...,three girls at a public place and young man sh...,the girl is inside the gaming side and the jol...,a news video showing the aftermath of a terror...,an education analyst speaks about the data sci...,3
2977,msr152848,mc2977,video8002,a man is folding a piece of paper,a person is folding origami,a man explains about a disease called kuru dis...,shrimp is being shown and it seems that they a...,slide show of couples in love,nba star tracy mcgrady is being interviewed in...,0
2981,msr159368,mc2981,video8006,a person flying in a helicopter and is going t...,a woman talking about the movie brooklyn nine ...,a person is preparing a food on a pan,a reel of sports highlights,a video game plane is flying over water,a person is folding paper,3
2982,msr158401,mc2982,video7934,some women are dancing on a stage,a video game about car race and shooting,red headed woman is travelling in europe by ai...,a girl group performance on stage,chefs and waiters move about in a very large k...,game scene is shown here,2


In [13]:
# Checking for empty strings 
print(final_msrvtt.equals(""))

False


In [14]:
# Converting to Lowercase
text_columns = ['title','id', 'clip_name','question', 'a1', 'a2', 'a3', 'a4', 'a5']

final_msrvtt[text_columns] = final_msrvtt[text_columns].apply(lambda x: x.str.lower())

final_msrvtt
# stop_words = set(stopwords.words('english'))
#cleaned_msrvtt = " ".join([word for word in text.split() if word not in stop_words])

Unnamed: 0,title,id,clip_name,question,a1,a2,a3,a4,a5,answer
0,msr143897,mc0,video9770,a person is connecting something to system,the boy is trying to fix the problem,a movie trailer shows various scenes from a movie,asian man discusses technology in the younger ...,two men on wave runner in ocean rescuing a surfer,a group is dancing,0
1,msr169115,mc1,video9771,a little girl does gymnastics,a man dismisses a group of soldiers and a scen...,a woman pushing a stroller,a young girl is in the gym,a woman is putting items into a miniature toy ...,a game show host hosting a game,2
2,msr160593,mc2,video7020,a woman creating a fondant baby and flower,a wining team celebrates their victory at the ...,halo warriors music video,a woman wraps a baby doll in some fake leaves,people are playingg match,a person solving the rubik s cube,2
3,msr145898,mc3,video9773,a boy plays grand theft auto 5,a man yells at a faucet then punches it,people on a video laughing,a group of people on stage on the voice,an advertisement for a driving video game,a person is playing a video game,3
4,msr162877,mc4,video7026,a man is giving a review on a vehicle,basically humans have helping intensity,a rock band preforming a song,the announcer talks about the interior feature...,many women are walking on a runway in brown an...,a man carries a green block,2
...,...,...,...,...,...,...,...,...,...,...
2972,msr141250,mc2972,video9737,a news clip from rt of a terror attack,a bloomberg analyst talking about growth in ma...,three girls at a public place and young man sh...,the girl is inside the gaming side and the jol...,a news video showing the aftermath of a terror...,an education analyst speaks about the data sci...,3
2977,msr152848,mc2977,video8002,a man is folding a piece of paper,a person is folding origami,a man explains about a disease called kuru dis...,shrimp is being shown and it seems that they a...,slide show of couples in love,nba star tracy mcgrady is being interviewed in...,0
2981,msr159368,mc2981,video8006,a person flying in a helicopter and is going t...,a woman talking about the movie brooklyn nine ...,a person is preparing a food on a pan,a reel of sports highlights,a video game plane is flying over water,a person is folding paper,3
2982,msr158401,mc2982,video7934,some women are dancing on a stage,a video game about car race and shooting,red headed woman is travelling in europe by ai...,a girl group performance on stage,chefs and waiters move about in a very large k...,game scene is shown here,2


In [15]:
# Removing White Spaces 
final_msrvtt[text_columns] = final_msrvtt[text_columns].apply(lambda x: x.str.strip())

# Removing Punctuations 
PUNCT_TO_REMOVE = string.punctuation
def remove_punctuation(text):
    return text.translate(str.maketrans('', '', PUNCT_TO_REMOVE))

final_msrvtt[text_columns] = final_msrvtt[text_columns].map(remove_punctuation)
final_msrvtt.head()

Unnamed: 0,title,id,clip_name,question,a1,a2,a3,a4,a5,answer
0,msr143897,mc0,video9770,a person is connecting something to system,the boy is trying to fix the problem,a movie trailer shows various scenes from a movie,asian man discusses technology in the younger ...,two men on wave runner in ocean rescuing a surfer,a group is dancing,0
1,msr169115,mc1,video9771,a little girl does gymnastics,a man dismisses a group of soldiers and a scen...,a woman pushing a stroller,a young girl is in the gym,a woman is putting items into a miniature toy ...,a game show host hosting a game,2
2,msr160593,mc2,video7020,a woman creating a fondant baby and flower,a wining team celebrates their victory at the ...,halo warriors music video,a woman wraps a baby doll in some fake leaves,people are playingg match,a person solving the rubik s cube,2
3,msr145898,mc3,video9773,a boy plays grand theft auto 5,a man yells at a faucet then punches it,people on a video laughing,a group of people on stage on the voice,an advertisement for a driving video game,a person is playing a video game,3
4,msr162877,mc4,video7026,a man is giving a review on a vehicle,basically humans have helping intensity,a rock band preforming a song,the announcer talks about the interior feature...,many women are walking on a runway in brown an...,a man carries a green block,2


In [16]:
# Text Cleaning 

# Check if any cell in the DataFrame contains a special character
final_msrvtt.apply(lambda x: x.str.contains('[^a-zA-Z0-9_\- ]', na=False))

# Extract rows with special characters 
final_msrvtt[final_msrvtt.apply(lambda x: x.str.contains('[^a-zA-Z0-9_\- ]', na=False)).any(axis=1)]

# Remove special characters from all columns
final_msrvtt = final_msrvtt.apply(lambda x: x.str.replace('[^a-zA-Z0-9_\- ]', '', regex=True))

# To check if special characters were removed, then try to extract again rows that contain special characters -> if None is found, then the special character has been correctly replaced 
# final_msrvtt[final_msrvtt.apply(lambda x: x.str.contains('[^a-zA-Z0-9_\- ]', na=False)).any(axis=1)]

In [17]:
# Removing Stopwords 
stop = set(stopwords.words('english'))
def remove_stopwords(text):
    return ' '.join([word for word in text.split() if word not in stop])
final_msrvtt[['title','id', 'clip_name','question', 'a1', 'a2', 'a3', 'a4', 'a5']] = final_msrvtt[['title','id', 'clip_name','question', 'a1', 'a2', 'a3', 'a4', 'a5']].map(remove_stopwords)

In [18]:
# Resetting the index 
last_msrvtt = final_msrvtt.copy() 
last_msrvtt.reset_index(drop=True, inplace=True)
last_msrvtt

Unnamed: 0,title,id,clip_name,question,a1,a2,a3,a4,a5,answer
0,msr143897,mc0,video9770,person connecting something system,boy trying fix problem,movie trailer shows various scenes movie,asian man discusses technology younger generat...,two men wave runner ocean rescuing surfer,group dancing,0
1,msr169115,mc1,video9771,little girl gymnastics,man dismisses group soldiers scene soldiers wa...,woman pushing stroller,young girl gym,woman putting items miniature toy oven,game show host hosting game,2
2,msr160593,mc2,video7020,woman creating fondant baby flower,wining team celebrates victory end game,halo warriors music video,woman wraps baby doll fake leaves,people playingg match,person solving rubik cube,2
3,msr145898,mc3,video9773,boy plays grand theft auto 5,man yells faucet punches,people video laughing,group people stage voice,advertisement driving video game,person playing video game,3
4,msr162877,mc4,video7026,man giving review vehicle,basically humans helping intensity,rock band preforming song,announcer talks interior features car,many women walking runway brown red dresses,man carries green block,2
...,...,...,...,...,...,...,...,...,...,...
1995,msr141250,mc2972,video9737,news clip rt terror attack,bloomberg analyst talking growth major cities,three girls public place young man shouts medi...,girl inside gaming side jolly mood,news video showing aftermath terrorist attack ...,education analyst speaks data science,3
1996,msr152848,mc2977,video8002,man folding piece paper,person folding origami,man explains disease called kuru disease,shrimp shown seems cooked,slide show couples love,nba star tracy mcgrady interviewed beyond glor...,0
1997,msr159368,mc2981,video8006,person flying helicopter going shoot people,woman talking movie brooklyn nine nine,person preparing food pan,reel sports highlights,video game plane flying water,person folding paper,3
1998,msr158401,mc2982,video7934,women dancing stage,video game car race shooting,red headed woman travelling europe airplane,girl group performance stage,chefs waiters move large kitchen,game scene shown,2


In [19]:
# Dictionary to store frequency of words 
combined_text = last_msrvtt[['question', 'a1', 'a2', 'a3', 'a4', 'a5']].apply(lambda x: ' '.join(x), axis=1)
word_counts = Counter(' '.join(combined_text).split())
word_count_dict = dict(word_counts)
#word_count_dict 

# Sort the dictionary by the word counts in descending order and convert back to a dictionary
sorted_word_count_dict = dict(sorted(word_counts.items(), key=lambda x: x[1], reverse=True))
#sorted_word_count_dict

In [20]:
tokenized_text = last_msrvtt.copy() 
def tokenize_text(text):
    if isinstance(text, str):  # Ensure the input is a string
        return word_tokenize(text)
    return []  # Return empty list for non-string input


tokenized_text[text_columns] = tokenized_text[text_columns].map(tokenize_text)

tokenized_text


Unnamed: 0,title,id,clip_name,question,a1,a2,a3,a4,a5,answer
0,[msr143897],[mc0],[video9770],"[person, connecting, something, system]","[boy, trying, fix, problem]","[movie, trailer, shows, various, scenes, movie]","[asian, man, discusses, technology, younger, g...","[two, men, wave, runner, ocean, rescuing, surfer]","[group, dancing]",0
1,[msr169115],[mc1],[video9771],"[little, girl, gymnastics]","[man, dismisses, group, soldiers, scene, soldi...","[woman, pushing, stroller]","[young, girl, gym]","[woman, putting, items, miniature, toy, oven]","[game, show, host, hosting, game]",2
2,[msr160593],[mc2],[video7020],"[woman, creating, fondant, baby, flower]","[wining, team, celebrates, victory, end, game]","[halo, warriors, music, video]","[woman, wraps, baby, doll, fake, leaves]","[people, playingg, match]","[person, solving, rubik, cube]",2
3,[msr145898],[mc3],[video9773],"[boy, plays, grand, theft, auto, 5]","[man, yells, faucet, punches]","[people, video, laughing]","[group, people, stage, voice]","[advertisement, driving, video, game]","[person, playing, video, game]",3
4,[msr162877],[mc4],[video7026],"[man, giving, review, vehicle]","[basically, humans, helping, intensity]","[rock, band, preforming, song]","[announcer, talks, interior, features, car]","[many, women, walking, runway, brown, red, dre...","[man, carries, green, block]",2
...,...,...,...,...,...,...,...,...,...,...
1995,[msr141250],[mc2972],[video9737],"[news, clip, rt, terror, attack]","[bloomberg, analyst, talking, growth, major, c...","[three, girls, public, place, young, man, shou...","[girl, inside, gaming, side, jolly, mood]","[news, video, showing, aftermath, terrorist, a...","[education, analyst, speaks, data, science]",3
1996,[msr152848],[mc2977],[video8002],"[man, folding, piece, paper]","[person, folding, origami]","[man, explains, disease, called, kuru, disease]","[shrimp, shown, seems, cooked]","[slide, show, couples, love]","[nba, star, tracy, mcgrady, interviewed, beyon...",0
1997,[msr159368],[mc2981],[video8006],"[person, flying, helicopter, going, shoot, peo...","[woman, talking, movie, brooklyn, nine, nine]","[person, preparing, food, pan]","[reel, sports, highlights]","[video, game, plane, flying, water]","[person, folding, paper]",3
1998,[msr158401],[mc2982],[video7934],"[women, dancing, stage]","[video, game, car, race, shooting]","[red, headed, woman, travelling, europe, airpl...","[girl, group, performance, stage]","[chefs, waiters, move, large, kitchen]","[game, scene, shown]",2


In [21]:
last_msrvtt.iloc[1999]

title                                         msr166104
id                                               mc2984
clip_name                                     video7936
question        soccer players hugging celebrating goal
a1           old footage wwf wrestling match hulk hogan
a2                                person preparing food
a3                                 soccer game progress
a4                               old clip baseball game
a5                              man garage showing cars
answer                                                2
Name: 1999, dtype: object

#### **2.1 Creation of Matching Dataset**

In [22]:
print(len(os.listdir("/kaggle/input/msrvtt-test-video/MSRVTT/videos/all")))

2990


In [23]:
try:
    os.makedirs(MSRVTT_VIDEO_PREPROCESSED)
    print(f"Directory '{MSRVTT_VIDEO_PREPROCESSED}' created successfully.")
except FileExistsError:
    print(f"Directory '{MSRVTT_VIDEO_PREPROCESSED}' already exists.")

Directory '/kaggle/working/msrvtt_video' created successfully.


In [24]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [25]:
def move_matched_video(src_folder, des_folder):
    count = 0 
    for video in os.listdir(src_folder):
        video_path = src_folder + "/" + video
        if 'mp4' in video:
            video_name = video_path.replace("/kaggle/input/msrvtt-test-video/MSRVTT/videos/all/video", "video")[:-4]
            #print("video_name", video_name)
            if video_name in last_msrvtt["clip_name"].values :
                des_video = des_folder + "/" + video_name + ".mp4"
                shutil.copy(video_path, des_video)
                count +=1 
                #print(f"Moved: {video_name}")
            #else:
                #print(f"Skipped: {video_name}")
    print("count",count)

In [26]:
move_matched_video(MSRVTT_TEST_VIDEO, MSRVTT_VIDEO_PREPROCESSED)

count 2000
