In [1]:
import tensorflow.compat.v1 as tf
import pandas as pd
import numpy as np
import os
import sys
import ipdb
import time
import cv2
from tensorflow.keras.preprocessing import sequence
import matplotlib.pyplot as plt
from tensorflow.keras.layers import LSTM
import fire
from elapsedtimer import ElapsedTimer
from pathlib import Path
print('tensorflow version:',tf.__version__)
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Concatenate, TimeDistributed, BatchNormalization, Activation, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.sparse import SparseTensor

from tqdm import tqdm
from tensorflow.keras.applications.inception_v3 import InceptionV3
import json
import pickle
from sklearn.utils import shuffle

import argparse
tf.compat.v1.disable_eager_execution()
tf.disable_v2_behavior()

tensorflow version: 2.11.0
Instructions for updating:
non-resource variables are not supported in the long term


In [2]:
def get_data(text_path, feat_path):
    text_data = pd.read_csv(text_path, sep=',')

    # Filter by English language
    text_data = text_data[text_data['Language'] == 'English']

    # Construct video names as concatenation of VideoID, Start, and End features
    text_data['video_name'] = text_data['VideoID'] + '_' + text_data['Start'].astype(int).astype(str) + '_' + text_data['End'].astype(int).astype(str) + '.npy'

    # Filter by videos with available feature files
    npy_files = [f for f in os.listdir(feat_path) if os.path.isfile(os.path.join(feat_path, f)) and f.endswith('.npy')]
    text_data = text_data[text_data['video_name'].isin(npy_files)]

    # Filter out rows with non-string descriptions
    text_data = text_data[text_data['Description'].map(lambda x: isinstance(x, str))]

    # Add video_path column
    text_data['video_path'] = text_data['video_name'].map(lambda x: os.path.join(feat_path, x))

    return text_data


In [3]:
class VideoCaptioning:
    
    
    def __init__(self,path_prj,caption_file,dfeat_dir,
                 cnn_feat_dim=4096,h_dim=512,
                 lstm_steps=80,video_steps=80,
                 out_steps=20, frame_step=80,
                 batch_size=8,learning_rate=1e-4,
                 epochs=10,model_path=None,
                 mode='train'):

        self.dim_image = cnn_feat_dim
        self.dim_hidden = h_dim
        self.batch_size = batch_size
        self.lstm_steps = lstm_steps
        self.video_lstm_step=video_steps
        self.caption_lstm_step=out_steps
        self.path_prj = Path(path_prj)
        self.mode = mode
        if mode == 'train':
            self.train_text_path = self.path_prj / caption_file
            self.train_feat_path = self.path_prj / dfeat_dir
        else:
            self.test_text_path = self.path_prj / caption_file
            self.test_feat_path = self.path_prj / dfeat_dir
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.frame_step = frame_step
        
        self.model_path = model_path



        
    def build_model(self):
        graph = tf.Graph()
        with graph.as_default():
            # Defining the weights associated with the Network
            with tf.device('/cpu:0'): 
                self.word_emb = tf.Variable(tf.random.uniform([self.n_words, self.dim_hidden], -0.1, 0.1), name='word_emb')
                #print('shape self.word_emb ',self.word_emb.shape)
            self.lstm1 = tf.keras.layers.LSTMCell(self.dim_hidden)
            self.lstm2 = tf.keras.layers.LSTMCell(self.dim_hidden)
            self.encode_W = tf.Variable(tf.random.uniform([self.dim_image,self.dim_hidden], -0.1, 0.1), name='encode_W')
            self.encode_b = tf.Variable(tf.zeros([self.dim_hidden]), name='encode_b')

            self.word_emb_W = tf.Variable(tf.random.uniform([self.dim_hidden,self.n_words], -0.1,0.1), name='word_emb_W')
            self.word_emb_b = tf.Variable(tf.zeros([self.n_words]), name='word_emb_b')

            # Defining the Placeholders for the Model
            video = tf.compat.v1.placeholder(tf.float32, [self.batch_size, self.lstm_steps, self.dim_image])
            video_mask = tf.compat.v1.placeholder(tf.float32, [self.batch_size, self.lstm_steps])
            caption = tf.compat.v1.placeholder(tf.int32, [self.batch_size, self.lstm_steps+1])
            caption_mask = tf.compat.v1.placeholder(tf.float32, [self.batch_size, self.lstm_steps+1])
    
            init = tf.compat.v1.global_variables_initializer()
            sess = tf.compat.v1.InteractiveSession(graph=graph)
            sess.run(init)
    
            # Encoding the Video
            video_flat = tf.reshape(video, [-1, self.dim_image])
            image_emb = tf.matmul(video_flat, self.encode_W) + self.encode_b         
            image_emb = tf.reshape(image_emb, [self.batch_size, self.lstm_steps, self.dim_hidden])


            # Initializing the States and Padding for the LSTM Cells
            state1 = self.lstm1.get_initial_state(batch_size=self.batch_size, dtype=tf.float32)
            state2 = self.lstm2.get_initial_state(batch_size=self.batch_size, dtype=tf.float32)
            padding = tf.zeros([self.batch_size, self.dim_hidden])

            probs = []
            loss = 0.0

            #  Encoding Stage 
            for i in range(self.lstm_steps):
                if i > 0:
                    tf.compat.v1.get_variable_scope().reuse_variables()

                output1, state1 = self.lstm1(image_emb[:, i, :], state1)
                output2, state2 = self.lstm2(tf.concat([padding, output1], 1), state2)
                #print('shape output1',output1.shape)
                # Decoding Stage to generate Captions
            for i in range(self.caption_lstm_step):
                with tf.device("/cpu:0"):
                    current_embed = tf.nn.embedding_lookup(self.word_emb, caption[:, i])
                    #print('shape current_embed',current_embed.shape)
                with tf.compat.v1.variable_scope("LSTM1", reuse=tf.compat.v1.AUTO_REUSE):
                    output1, state1 = self.lstm1(padding, state1)

                with tf.compat.v1.variable_scope("LSTM2", reuse=tf.compat.v1.AUTO_REUSE):
                    output2, state2 = self.lstm2(tf.concat([current_embed, output1], axis=1), state2)

                labels = caption[:, i+1]
                onehot_labels = tf.one_hot(labels, depth=self.n_words)

                logit_words = tf.matmul(output2, self.word_emb_W) + self.word_emb_b

                # Computing the loss     
                loss += tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logit_words))


                probs.append(logit_words)
            optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=self.learning_rate)
            with tf.GradientTape() as tape:
                gradients = optimizer.compute_gradients(loss, var_list=tf.compat.v1.trainable_variables())
            with tf.compat.v1.variable_scope(tf.compat.v1.get_variable_scope(), reuse=tf.compat.v1.AUTO_REUSE):
                train_op = optimizer.apply_gradients(gradients)

            return loss, video, video_mask, caption, caption_mask, probs, train_op


    def build_generator(self):
        graph = tf.Graph()
        with graph.as_default():
            with tf.device('/cpu:0'): 
                self.word_emb = tf.Variable(tf.random.uniform([self.n_words, self.dim_hidden], -0.1, 0.1), name='word_emb')
                print('shape self.word_emb ',self.word_emb.shape)
            self.lstm1 = tf.keras.layers.LSTMCell(self.dim_hidden)
            self.lstm2 = tf.keras.layers.LSTMCell(self.dim_hidden)
            
            self.encode_W = tf.Variable(tf.random.uniform([self.dim_image,self.dim_hidden], -0.1, 0.1), name='encode_W')
            self.encode_b = tf.Variable(tf.zeros([self.dim_hidden]), name='encode_b')

            self.word_emb_W = tf.Variable(tf.random.uniform([self.dim_hidden,self.n_words], -0.1,0.1), name='word_emb_W')
            self.word_emb_b = tf.Variable(tf.zeros([self.n_words]), name='word_emb_b')

            # Defining the Placeholders for the Model
            video = tf.compat.v1.placeholder(tf.float32, [self.batch_size, self.lstm_steps, self.dim_image])
            video_mask = tf.compat.v1.placeholder(tf.float32, [self.batch_size, self.lstm_steps])
            
            init = tf.compat.v1.global_variables_initializer()
            sess = tf.compat.v1.InteractiveSession(graph=graph)
            sess.run(init)

            video_flat = tf.reshape(video, [-1, self.dim_image])
            image_emb = tf.matmul(video_flat, self.encode_W) + self.encode_b         
            image_emb = tf.reshape(image_emb, [self.batch_size, self.lstm_steps, self.dim_hidden])

            state1 = self.lstm1.get_initial_state(batch_size=self.batch_size, dtype=tf.float32)
            state2 = self.lstm2.get_initial_state(batch_size=self.batch_size, dtype=tf.float32)
            padding = tf.zeros([self.batch_size, self.dim_hidden])

            generated_words = []

            probs = []
            embeds = []

            for i in range(0, self.video_lstm_step):
                if i > 0:
                    tf.compat.v1.get_variable_scope().reuse_variables()

                output1, state1 = self.lstm1(image_emb[:, i, :], state1)
                output2, state2 = self.lstm2(tf.concat([padding, output1], 1), state2)

            for i in range(0, self.caption_lstm_step):
                tf.compat.v1.get_variable_scope().reuse_variables()

                if i == 0:
                    with tf.device('/cpu:0'):
                        current_embed = tf.nn.embedding_lookup(self.word_emb, tf.ones([self.batch_size], dtype=tf.int64))

                with tf.compat.v1.variable_scope("LSTM1"):
                    output1, state1 = self.lstm1(padding, state1)

                with tf.compat.v1.variable_scope("LSTM2"):
                    output2, state2 = self.lstm2(tf.concat([current_embed, output1], axis=1), state2)

                logit_words = tf.matmul(output2, self.word_emb_W) + self.word_emb_b
                max_prob_index = tf.argmax(logit_words, 1)[0]
                generated_words.append(max_prob_index)
                probs.append(logit_words)

                with tf.device("/cpu:0"):
                    current_embed = tf.nn.embedding_lookup(self.word_emb, max_prob_index)
                    current_embed = tf.expand_dims(current_embed, 0)

                embeds.append(current_embed)

            return video, video_mask, generated_words, probs, embeds


    def get_data(text_path, feat_path):
        text_data = pd.read_csv(text_path, sep=',')

        # Filter by English language
        text_data = text_data[text_data['Language'] == 'English']

        # Construct video names as concatenation of VideoID, Start, and End features
        text_data['video_name'] = text_data['VideoID'] + '_' + text_data['Start'].astype(int).astype(str) + '_' + text_data['End'].astype(int).astype(str) + '.npy'

        # Filter by videos with available feature files
        npy_files = [f for f in os.listdir(feat_path) if os.path.isfile(os.path.join(feat_path, f)) and f.endswith('.npy')]
        text_data = text_data[text_data['video_name'].isin(npy_files)]

        # Filter out rows with non-string descriptions
        text_data = text_data[text_data['Description'].map(lambda x: isinstance(x, str))]

        # Add video_path column
        text_data['video_path'] = text_data['video_name'].map(lambda x: os.path.join(feat_path, x))

        return text_data

    def train_test_split(self,data,test_frac=0.2):
        indices = np.arange(len(data))
        np.random.shuffle(indices)
        train_indices_rec = int((1 - test_frac)*len(data))
        indices_train = indices[:train_indices_rec]
        indices_test = indices[train_indices_rec:]
        data_train, data_test = data.iloc[indices_train],data.iloc[indices_test]
        print(data_train.head())
        print(data_test.head())
        data_train.reset_index(inplace=True)
        data_test.reset_index(inplace=True)
        return data_train,data_test

    def get_test_data(self,text_path,feat_path):
        text_data = pd.read_csv(text_path, sep=',')
        text_data = text_data[text_data['Language'] == 'English']
        text_data['video_path'] = text_data.apply(lambda row: row['VideoID']+'_'+str(int(row['Start']))+'_'+str(int(row['End']))+'.npy', axis=1)
        text_data['video_path'] = text_data['video_path'].map(lambda x: os.path.join(feat_path, x))
        text_data = text_data[text_data['video_path'].map(lambda x: os.path.exists( x ))]
        text_data = text_data[text_data['Description'].map(lambda x: isinstance(x, str))]
    
        unique_filenames = sorted(text_data['video_path'].unique())
        test_data = text_data[text_data['video_path'].map(lambda x: x in unique_filenames)]
        return test_data       
        
    def create_word_dict(self,sentence_iterator, word_count_threshold=5):
        
        word_counts = {}
        sent_cnt = 0
        
        for sent in sentence_iterator:
            sent_cnt += 1
            for w in sent.lower().split(' '):
                word_counts[w] = word_counts.get(w, 0) + 1
        vocab = [w for w in word_counts if word_counts[w] >= word_count_threshold]
        
        idx2word = {}
        idx2word[0] = '<pad>'
        idx2word[1] = '<bos>'
        idx2word[2] = '<eos>'
        idx2word[3] = '<unk>'
    
        word2idx = {}
        word2idx['<pad>'] = 0
        word2idx['<bos>'] = 1
        word2idx['<eos>'] = 2
        word2idx['<unk>'] = 3
    
        for idx, w in enumerate(vocab):
            word2idx[w] = idx+4
            idx2word[idx+4] = w
    
        word_counts['<pad>'] = sent_cnt
        word_counts['<bos>'] = sent_cnt
        word_counts['<eos>'] = sent_cnt
        word_counts['<unk>'] = sent_cnt
    
        return word2idx,idx2word
        
        
        
    
    def train(self):
        data = get_data(r'C:\Users\User\Documents\P2M\test1\video_corpus.csv', r'C:\Users\User\Documents\P2M\test1\feature')
        #for idx, row in data.iterrows():
            #feature_path = os.path.join('path/to/feature_files/', row['video_name'])
        #data = self.get_data(self.train_text_path,self.train_feat_path)
        self.train_data,self.test_data = self.train_test_split(data,test_frac=0.2)
        self.train_data.to_csv(f'{self.path_prj}\\train.csv',index=False)
        self.test_data.to_csv(f'{self.path_prj}\\test.csv',index=False)

        print(f'Processed train file written to {self.path_prj}\train_corpus.csv')
        print(f'Processed test file written to {self.path_prj}\test_corpus.csv')
                

        train_captions = self.train_data['Description'].values
        test_captions = self.test_data['Description'].values
    
        captions_list = list(train_captions) 
        captions = np.asarray(captions_list, dtype=np.object)
    
        captions = list(map(lambda x: x.replace('.', ''), captions))
        captions = list(map(lambda x: x.replace(',', ''), captions))
        captions = list(map(lambda x: x.replace('"', ''), captions))
        captions = list(map(lambda x: x.replace('\n', ''), captions))
        captions = list(map(lambda x: x.replace('?', ''), captions))
        captions = list(map(lambda x: x.replace('!', ''), captions))
        captions = list(map(lambda x: x.replace('\\', ''), captions))
        captions = list(map(lambda x: x.replace('/', ''), captions))
    
        self.word2idx,self.idx2word = self.create_word_dict(captions, word_count_threshold=0)
        
        np.save(self.path_prj/ "word2idx",self.word2idx)
        np.save(self.path_prj/ "idx2word" ,self.idx2word)
        self.n_words = len(self.word2idx)
    
        tf_loss, tf_video,tf_video_mask,tf_caption,tf_caption_mask, tf_probs,train_op= self.build_model()
        sess = tf.compat.v1.InteractiveSession()
        
        saver = tf.compat.v1.train.Saver(max_to_keep=100, write_version=1)
        tf.compat.v1.global_variables_initializer().run()
    
    
        loss_out = open('loss.txt', 'w')
        val_loss = []
    
        for epoch in range(0,self.epochs):
            val_loss_epoch = []
    
            index = np.arange(len(self.train_data))

            self.train_data.reset_index()
            np.random.shuffle(index)
            self.train_data = self.train_data.loc[index]
    
            current_train_data = self.train_data.groupby(['video_path']).first().reset_index()

    
            for start, end in zip(
                    range(0, len(current_train_data),self.batch_size),
                    range(self.batch_size,len(current_train_data),self.batch_size)):
    
                start_time = time.time()
    
                current_batch = current_train_data[start:end]
                current_videos = current_batch['video_path'].values
    
                current_feats = np.zeros((self.batch_size, self.video_lstm_step,self.dim_image))
                current_feats_vals = list(map(lambda vid: np.load(vid),current_videos))
                current_feats_vals = np.array(current_feats_vals) 
    
                current_video_masks = np.zeros((self.batch_size,self.video_lstm_step))
    
                for ind,feat in enumerate(current_feats_vals):
                    current_feats[ind][:len(current_feats_vals[ind])] = feat
                    current_video_masks[ind][:len(current_feats_vals[ind])] = 1
    
                current_captions = current_batch['Description'].values
                current_captions = list(map(lambda x: '<bos> ' + x, current_captions))
                current_captions = list(map(lambda x: x.replace('.', ''), current_captions))
                current_captions = list(map(lambda x: x.replace(',', ''), current_captions))
                current_captions = list(map(lambda x: x.replace('"', ''), current_captions))
                current_captions = list(map(lambda x: x.replace('\n', ''), current_captions))
                current_captions = list(map(lambda x: x.replace('?', ''), current_captions))
                current_captions = list(map(lambda x: x.replace('!', ''), current_captions))
                current_captions = list(map(lambda x: x.replace('\\', ''), current_captions))
                current_captions = list(map(lambda x: x.replace('/', ''), current_captions))

    
                for idx, each_cap in enumerate(current_captions):
                    word = each_cap.lower().split(' ')
                    if len(word) < self.caption_lstm_step:
                        current_captions[idx] = current_captions[idx] + ' <eos>'
                    else:
                        new_word = ''
                        for i in range(self.caption_lstm_step-1):
                            new_word = new_word + word[i] + ' '
                        current_captions[idx] = new_word + '<eos>'
    
                current_caption_ind = []
                for cap in current_captions:
                    current_word_ind = []
                    for word in cap.lower().split(' '):
                        if word in self.word2idx:
                            current_word_ind.append(self.word2idx[word])
                        else:
                            current_word_ind.append(self.word2idx['<unk>'])
                    current_caption_ind.append(current_word_ind)
    
                current_caption_matrix = sequence.pad_sequences(current_caption_ind, padding='post', maxlen=self.lstm_steps)
                current_caption_matrix = np.hstack( [current_caption_matrix, np.zeros( [len(current_caption_matrix), 1] ) ] ).astype(int)
                current_caption_masks = np.zeros( (current_caption_matrix.shape[0], current_caption_matrix.shape[1]) )
                nonzeros = np.array( list(map(lambda x: (x != 0).sum() + 1, current_caption_matrix ) ))
    
                for ind, row in enumerate(current_caption_masks):
                    row[:nonzeros[ind]] = 1
    
                probs_val = sess.run(tf_probs, feed_dict={
                    tf_video:current_feats,
                    tf_caption: current_caption_matrix
                    })
    
                _, loss_val = sess.run(
                        [train_op, tf_loss],
                        feed_dict={
                            tf_video: current_feats,
                            tf_video_mask : current_video_masks,
                            tf_caption: current_caption_matrix,
                            tf_caption_mask: current_caption_masks
                            })
                val_loss_epoch.append(loss_val)
    
                print('Batch starting index: ', start, " Epoch: ", epoch, " loss: ", loss_val, ' Elapsed time: ', str((time.time() - start_time)))
                loss_out.write('epoch ' + str(epoch) + ' loss ' + str(loss_val) + '\n')
    
            # draw loss curve every epoch
            val_loss.append(np.mean(val_loss_epoch))
            plt_save_dir = self.path_prj / "loss_imgs"
            plt_save_img_name = str(epoch) + '.png'
            plt.plot(range(len(val_loss)),val_loss, color='g')
            plt.grid(True)
            plt.savefig(os.path.join('C:/Users/User/Documents/P2M/test1', plt_save_img_name))
    
            if np.mod(epoch, 9) == 0:
                print("Epoch ", epoch, " is done. Saving the model ...")
                saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
                saver.save(sess, os.path.join(self.path_prj, 'model'), global_step=epoch)
                print("Epoch ", epoch, " model Saved!")
        loss_out.close()
        
        
    
    def inference(self):
        self.test_data = self.get_test_data(self.test_text_path,self.test_feat_path)
        test_videos = self.test_data['video_path'].unique()
    
        self.idx2word = pd.Series(np.load(self.path_prj / "idx2word.npy", allow_pickle=True).tolist())
    
        self.n_words = len(self.idx2word)
        video_tf, video_mask_tf, caption_tf, probs_tf, last_embed_tf = self.build_generator()
    
        sess = tf.compat.v1.InteractiveSession()
    
        saver = tf.compat.v1.train.Saver()
        saver.restore(sess,self.model_path)
    
        f = open(f'{self.path_prj}/video_captioning_results.txt', 'w')
        for idx, video_feat_path in enumerate(test_videos):
            video_feat = np.load(video_feat_path)[None,...]
            if video_feat.shape[1] == self.frame_step:
                video_mask = np.ones((video_feat.shape[0], video_feat.shape[1]))
            else:
                continue
    
            gen_word_idx = sess.run(caption_tf, feed_dict={video_tf:video_feat, video_mask_tf:video_mask})
            gen_words = self.idx2word[gen_word_idx]
    
            punct = np.argmax(np.array(gen_words) == '<eos>') + 1
            gen_words = gen_words[:punct]
    
            gen_sent = ' '.join(gen_words)
            gen_sent = gen_sent.replace('<bos> ', '')
            gen_sent = gen_sent.replace(' <eos>', '')
            print(f'Video path {video_feat_path} : Generated Caption {gen_sent}')
            print(gen_sent,'\n')
            f.write(video_feat_path + '\n')
            f.write(gen_sent + '\n\n')

    def process_main(self):
        if self.mode == 'train':
            self.train()
        else:
            self.inference()
            #aaa
       


# a modifier model path

In [None]:
if __name__ == '__main__':
    with ElapsedTimer('Video Captioning'):
        VideoCaptioning(path_prj='C:/Users/User/Documents/P2M/test1', caption_file='video_corpus.csv', dfeat_dir='feature',
                        cnn_feat_dim=4096, h_dim=512, lstm_steps=80, video_steps=80, out_steps=20,
                        frame_step=80, batch_size=1, learning_rate=1e-4, epochs=100, model_path=r"C:\Users\User\Documents\P2M\test1\model-99",
                        mode='test').process_main()

shape self.word_emb  (12022, 512)




INFO:tensorflow:Restoring parameters from C:\Users\User\Documents\P2M\test1\model-99
Video path C:\Users\User\Documents\P2M\test1\feature\mv89psg6zh4_33_46.npy : Generated Caption a bird is running in a sink
a bird is running in a sink 

Video path C:\Users\User\Documents\P2M\test1\feature\ZbzDGXEwtGc_6_15.npy : Generated Caption a plane is flying
a plane is flying 

Video path C:\Users\User\Documents\P2M\test1\feature\g36ho6UrBz0_5_20.npy : Generated Caption a man is playing a guitar
a man is playing a guitar 

Video path C:\Users\User\Documents\P2M\test1\feature\04Gt01vatkk_248_265.npy : Generated Caption a woman is cutting an onion
a woman is cutting an onion 

Video path C:\Users\User\Documents\P2M\test1\feature\buJ5HDCinrM_150_166.npy : Generated Caption a woman is applying makeup to her face
a woman is applying makeup to her face 

Video path C:\Users\User\Documents\P2M\test1\feature\M_TP6UTrh30_343_358.npy : Generated Caption a band is performing on stage
a band is performing on

Video path C:\Users\User\Documents\P2M\test1\feature\3SKyc0aKx70_46_52.npy : Generated Caption a man is doing a rope
a man is doing a rope 

Video path C:\Users\User\Documents\P2M\test1\feature\6t0BpjwYKco_46_53.npy : Generated Caption a woman is peeling an apple
a woman is peeling an apple 

Video path C:\Users\User\Documents\P2M\test1\feature\PuQVs2Ch1LY_5_15.npy : Generated Caption a girl is playing the flute
a girl is playing the flute 

Video path C:\Users\User\Documents\P2M\test1\feature\elQqQfux7Po_12_22.npy : Generated Caption a dog is eating watermelon
a dog is eating watermelon 

Video path C:\Users\User\Documents\P2M\test1\feature\zFIn8DeV5PM_20_33.npy : Generated Caption a man is playing a piano
a man is playing a piano 

Video path C:\Users\User\Documents\P2M\test1\feature\dEn5E-TNezw_13_23.npy : Generated Caption a hedgehog is playing with a towel
a hedgehog is playing with a towel 

Video path C:\Users\User\Documents\P2M\test1\feature\J3_hkgu6MGc_43_52.npy : Generated Ca

Video path C:\Users\User\Documents\P2M\test1\feature\yC4eEuURH8c_19_28.npy : Generated Caption a cat is meowing
a cat is meowing 

Video path C:\Users\User\Documents\P2M\test1\feature\30GeJHYoerk_80_85.npy : Generated Caption a man is doing tricks with his feet
a man is doing tricks with his feet 

Video path C:\Users\User\Documents\P2M\test1\feature\aRo8x3xXeEA_3_8.npy : Generated Caption a man is riding a skateboard
a man is riding a skateboard 

Video path C:\Users\User\Documents\P2M\test1\feature\-rvjK0lE3z4_33_43.npy : Generated Caption a woman is cutting a potato
a woman is cutting a potato 

Video path C:\Users\User\Documents\P2M\test1\feature\yyxtyCaEVqk_329_360.npy : Generated Caption a man is putting butter on a tortilla
a man is putting butter on a tortilla 

Video path C:\Users\User\Documents\P2M\test1\feature\Q-soOsOLIGQ_0_22.npy : Generated Caption a man is playing with a tripod
a man is playing with a tripod 

Video path C:\Users\User\Documents\P2M\test1\feature\msCidKHO

Video path C:\Users\User\Documents\P2M\test1\feature\hW8TKz2Aea4_5_12.npy : Generated Caption a person is holding a hedgehog
a person is holding a hedgehog 

Video path C:\Users\User\Documents\P2M\test1\feature\V6Fxclwdfxc_56_66.npy : Generated Caption a parrot is playing with a ball
a parrot is playing with a ball 

Video path C:\Users\User\Documents\P2M\test1\feature\jlahRlo4jlU_30_36.npy : Generated Caption a man is walking down the road
a man is walking down the road 

Video path C:\Users\User\Documents\P2M\test1\feature\QT8iCDc7NGU_18_23.npy : Generated Caption a woman is cleaning a freezer
a woman is cleaning a freezer 

Video path C:\Users\User\Documents\P2M\test1\feature\p69d3UBdpR8_98_109.npy : Generated Caption a girl is applying makeup
a girl is applying makeup 

Video path C:\Users\User\Documents\P2M\test1\feature\nBFhvrAOFqY_89_103.npy : Generated Caption a monkey is sitting off his head
a monkey is sitting off his head 

Video path C:\Users\User\Documents\P2M\test1\featur

Video path C:\Users\User\Documents\P2M\test1\feature\6t0BpjwYKco_53_59.npy : Generated Caption a woman is cutting a potato
a woman is cutting a potato 

Video path C:\Users\User\Documents\P2M\test1\feature\KPPCwmU5OHQ_480_486.npy : Generated Caption a person is stirring a pot of chili
a person is stirring a pot of chili 

Video path C:\Users\User\Documents\P2M\test1\feature\V--abChiVVc_30_40.npy : Generated Caption a man is dancing
a man is dancing 

Video path C:\Users\User\Documents\P2M\test1\feature\nBJV56WUDng_38_47.npy : Generated Caption a man is cutting a banana
a man is cutting a banana 

Video path C:\Users\User\Documents\P2M\test1\feature\77iDIp40m9E_3_9.npy : Generated Caption a dog is running on the ground
a dog is running on the ground 

Video path C:\Users\User\Documents\P2M\test1\feature\qhknaG9ifbs_122_127.npy : Generated Caption a man is playing with a sword
a man is playing with a sword 

Video path C:\Users\User\Documents\P2M\test1\feature\hNECyt6Bo0A_5_10.npy : Gene

Video path C:\Users\User\Documents\P2M\test1\feature\_0nX-El-ySo_83_93.npy : Generated Caption a man is cutting a paper
a man is cutting a paper 

Video path C:\Users\User\Documents\P2M\test1\feature\IhwPQL9dFYc_124_129.npy : Generated Caption a woman is cutting a carrot
a woman is cutting a carrot 

Video path C:\Users\User\Documents\P2M\test1\feature\D2FbgK_kkE8_121_151.npy : Generated Caption a woman is cutting a pineapple
a woman is cutting a pineapple 

Video path C:\Users\User\Documents\P2M\test1\feature\BIqVvRh_cEY_143_149.npy : Generated Caption a boy is running
a boy is running 

Video path C:\Users\User\Documents\P2M\test1\feature\GWQTAe64m-0_91_94.npy : Generated Caption a man is dancing
a man is dancing 

Video path C:\Users\User\Documents\P2M\test1\feature\TwQzFqBqgeA_0_10.npy : Generated Caption a man is sitting on the beach
a man is sitting on the beach 

Video path C:\Users\User\Documents\P2M\test1\feature\qPXynwa_2iM_15_25.npy : Generated Caption a dog is running
a dog

Video path C:\Users\User\Documents\P2M\test1\feature\KIEWo_xJ5nU_0_7.npy : Generated Caption a baby is feeding a baby animal
a baby is feeding a baby animal 

Video path C:\Users\User\Documents\P2M\test1\feature\6owu8Mow0_g_527_534.npy : Generated Caption a man is putting pizza on pizza
a man is putting pizza on pizza 

Video path C:\Users\User\Documents\P2M\test1\feature\nhm_APPwhWk_6_12.npy : Generated Caption a man is riding a motorcycle
a man is riding a motorcycle 

Video path C:\Users\User\Documents\P2M\test1\feature\qvg9eM4Hmzk_1_9.npy : Generated Caption a man is lifting a truck
a man is lifting a truck 

Video path C:\Users\User\Documents\P2M\test1\feature\FA3OfhJK0mI_195_203.npy : Generated Caption a man and woman are kissing
a man and woman are kissing 

Video path C:\Users\User\Documents\P2M\test1\feature\WpB4-eLyiBc_10_20.npy : Generated Caption a man is playing a guitar
a man is playing a guitar 

Video path C:\Users\User\Documents\P2M\test1\feature\9xVqwy_4pmQ_36_49.npy 

Video path C:\Users\User\Documents\P2M\test1\feature\rnawC5C8gSI_82_90.npy : Generated Caption a man is riding a skateboard
a man is riding a skateboard 

Video path C:\Users\User\Documents\P2M\test1\feature\FHwC2THZJfA_0_10.npy : Generated Caption a slow loris is playing
a slow loris is playing 

Video path C:\Users\User\Documents\P2M\test1\feature\Kf8oZGHDTt4_4_13.npy : Generated Caption two men are playing
two men are playing 

Video path C:\Users\User\Documents\P2M\test1\feature\bQJQGoJF7_k_162_169.npy : Generated Caption a man is putting some meat in a bag
a man is putting some meat in a bag 

Video path C:\Users\User\Documents\P2M\test1\feature\vMoOpQQy6sI_22_55.npy : Generated Caption two girls are dancing
two girls are dancing 

Video path C:\Users\User\Documents\P2M\test1\feature\EjAoBKagWQA_132_145.npy : Generated Caption a group of people are kissing
a group of people are kissing 

Video path C:\Users\User\Documents\P2M\test1\feature\IhwPQL9dFYc_143_147.npy : Generated Capti

Video path C:\Users\User\Documents\P2M\test1\feature\XNV7hKVu-Xg_40_47.npy : Generated Caption a group of people are riding horses
a group of people are riding horses 

Video path C:\Users\User\Documents\P2M\test1\feature\-t-ZWaJeH-o_0_15.npy : Generated Caption two girls are singing
two girls are singing 

Video path C:\Users\User\Documents\P2M\test1\feature\xgIIcPSh4EU_0_6.npy : Generated Caption a boat is moving through water
a boat is moving through water 

Video path C:\Users\User\Documents\P2M\test1\feature\GY5Dl00LrEI_2_13.npy : Generated Caption a woman is shooting a gun
a woman is shooting a gun 

Video path C:\Users\User\Documents\P2M\test1\feature\-uT_1VDvXok_8_15.npy : Generated Caption a baby rhino is walking
a baby rhino is walking 

Video path C:\Users\User\Documents\P2M\test1\feature\ptHkvYrH9fY_2_10.npy : Generated Caption a man is playing a flute
a man is playing a flute 

Video path C:\Users\User\Documents\P2M\test1\feature\Ted3imMggC0_48_58.npy : Generated Caption a

Video path C:\Users\User\Documents\P2M\test1\feature\pQYEZTwSVbQ_12_18.npy : Generated Caption a man is talking to the woman
a man is talking to the woman 

Video path C:\Users\User\Documents\P2M\test1\feature\fjDvKHkmxs0_72_87.npy : Generated Caption a man is pushing a car
a man is pushing a car 

Video path C:\Users\User\Documents\P2M\test1\feature\8MVo7fje_oE_125_130.npy : Generated Caption a man is pouring a lid on a plastic container
a man is pouring a lid on a plastic container 

Video path C:\Users\User\Documents\P2M\test1\feature\Zqmx-nXhLAg_0_10.npy : Generated Caption a man is holding a baby
a man is holding a baby 

Video path C:\Users\User\Documents\P2M\test1\feature\ao-9B8IV9_E_72_77.npy : Generated Caption a woman is riding a horse
a woman is riding a horse 

Video path C:\Users\User\Documents\P2M\test1\feature\qeKX-N1nKiM_52_59.npy : Generated Caption a woman is cutting a shrimp
a woman is cutting a shrimp 

Video path C:\Users\User\Documents\P2M\test1\feature\pUPKsHTDZT

Video path C:\Users\User\Documents\P2M\test1\feature\0vmoZEaN_-o_5_21.npy : Generated Caption a cat is playing with a watermelon
a cat is playing with a watermelon 

Video path C:\Users\User\Documents\P2M\test1\feature\Kxa0mnDj0bs_15_20.npy : Generated Caption a cat is playing with a dog
a cat is playing with a dog 

Video path C:\Users\User\Documents\P2M\test1\feature\yREFkmrrYiw_51_57.npy : Generated Caption a woman is peeling a potato
a woman is peeling a potato 

Video path C:\Users\User\Documents\P2M\test1\feature\KPPCwmU5OHQ_258_266.npy : Generated Caption a man is pouring oil into a pot
a man is pouring oil into a pot 

Video path C:\Users\User\Documents\P2M\test1\feature\SzEbtbNSg04_88_93.npy : Generated Caption a man is cutting a cucumber
a man is cutting a cucumber 

Video path C:\Users\User\Documents\P2M\test1\feature\9HTUcMjWB3g_122_128.npy : Generated Caption a woman is slicing a carrot
a woman is slicing a carrot 

Video path C:\Users\User\Documents\P2M\test1\feature\LuQ0

Video path C:\Users\User\Documents\P2M\test1\feature\RX6NSOuCCAE_13_27.npy : Generated Caption a man is shooting a gun
a man is shooting a gun 

Video path C:\Users\User\Documents\P2M\test1\feature\gHyXstpe_N8_95_100.npy : Generated Caption a man is opening a box of pizza
a man is opening a box of pizza 

Video path C:\Users\User\Documents\P2M\test1\feature\WTf5EgVY5uU_15_19.npy : Generated Caption a woman is cutting a piece of meat
a woman is cutting a piece of meat 

Video path C:\Users\User\Documents\P2M\test1\feature\PqSZ89FqpiY_65_75.npy : Generated Caption a person is cooking
a person is cooking 

Video path C:\Users\User\Documents\P2M\test1\feature\WEJfT-oB4v4_20_35.npy : Generated Caption a dog is playing with a puppy
a dog is playing with a puppy 

Video path C:\Users\User\Documents\P2M\test1\feature\Oq3FV_zdyy0_843_855.npy : Generated Caption a girl is singing
a girl is singing 

Video path C:\Users\User\Documents\P2M\test1\feature\ACOmKiJDkA4_57_65.npy : Generated Caption a 

Video path C:\Users\User\Documents\P2M\test1\feature\PeUHy0A1GF0_99_103.npy : Generated Caption a woman is cutting a vegetable
a woman is cutting a vegetable 

Video path C:\Users\User\Documents\P2M\test1\feature\r0E-0ntoNWo_20_30.npy : Generated Caption a hamster is eating
a hamster is eating 

Video path C:\Users\User\Documents\P2M\test1\feature\4JVpbYmqfcI_5_19.npy : Generated Caption a man is playing with cards
a man is playing with cards 

Video path C:\Users\User\Documents\P2M\test1\feature\g2IYQq7IkXc_124_132.npy : Generated Caption two bears are fighting
two bears are fighting 

Video path C:\Users\User\Documents\P2M\test1\feature\u_6tre9_99Q_13_25.npy : Generated Caption elephants are walking
elephants are walking 

Video path C:\Users\User\Documents\P2M\test1\feature\1N_Ic2pBM1o_2_23.npy : Generated Caption a woman is playing a guitar
a woman is playing a guitar 

Video path C:\Users\User\Documents\P2M\test1\feature\kWLNZzuo3do_192_196.npy : Generated Caption a woman is cutti

Video path C:\Users\User\Documents\P2M\test1\feature\kWLNZzuo3do_38_47.npy : Generated Caption a woman is slicing a vegetable
a woman is slicing a vegetable 

Video path C:\Users\User\Documents\P2M\test1\feature\0vmoZEaN_-o_4_12.npy : Generated Caption a cat is playing with a watermelon
a cat is playing with a watermelon 

Video path C:\Users\User\Documents\P2M\test1\feature\27mxaePkYB4_8_14.npy : Generated Caption a giraffe is walking
a giraffe is walking 

Video path C:\Users\User\Documents\P2M\test1\feature\kWLNZzuo3do_38_42.npy : Generated Caption a woman is cutting a carrot
a woman is cutting a carrot 

Video path C:\Users\User\Documents\P2M\test1\feature\LyYJiKxHZz0_0_10.npy : Generated Caption a group of people are running on a road
a group of people are running on a road 

Video path C:\Users\User\Documents\P2M\test1\feature\62I8I4sCPPQ_12_30.npy : Generated Caption a man is diving into a cliff
a man is diving into a cliff 

Video path C:\Users\User\Documents\P2M\test1\feature\

Video path C:\Users\User\Documents\P2M\test1\feature\PeUHy0A1GF0_28_32.npy : Generated Caption a woman is mixing flour
a woman is mixing flour 

Video path C:\Users\User\Documents\P2M\test1\feature\Jfu-WkuCgD0_140_150.npy : Generated Caption a man is slicing a potato
a man is slicing a potato 

Video path C:\Users\User\Documents\P2M\test1\feature\K1ZM0LSLVw8_481_487.npy : Generated Caption a man is cutting a cucumber
a man is cutting a cucumber 

Video path C:\Users\User\Documents\P2M\test1\feature\9Q0JfdP36kI_167_170.npy : Generated Caption a woman is putting meat on a grill
a woman is putting meat on a grill 

Video path C:\Users\User\Documents\P2M\test1\feature\ecVwxlXc1PQ_0_12.npy : Generated Caption a man is lifting weights
a man is lifting weights 

Video path C:\Users\User\Documents\P2M\test1\feature\s20OlIRK340_125_138.npy : Generated Caption a woman is peeling a potato
a woman is peeling a potato 

Video path C:\Users\User\Documents\P2M\test1\feature\UbmZAe5u5FI_111_115.npy : 

a man is cleaning his hands 

Video path C:\Users\User\Documents\P2M\test1\feature\5PpPOGU_0aA_65_75.npy : Generated Caption a man and woman are dancing
a man and woman are dancing 

Video path C:\Users\User\Documents\P2M\test1\feature\HkuOqWzXYVg_0_12.npy : Generated Caption a polar bear is sitting on the bed
a polar bear is sitting on the bed 

Video path C:\Users\User\Documents\P2M\test1\feature\_9iG5Ge01PM_3_11.npy : Generated Caption a kitten is sitting on its face
a kitten is sitting on its face 

Video path C:\Users\User\Documents\P2M\test1\feature\wON-YuA1GjA_3_63.npy : Generated Caption a woman is slicing a vegetable
a woman is slicing a vegetable 

Video path C:\Users\User\Documents\P2M\test1\feature\1pw5ZdRhiig_50_59.npy : Generated Caption a man is playing a flute
a man is playing a flute 

Video path C:\Users\User\Documents\P2M\test1\feature\TsagxA4DKuc_28_41.npy : Generated Caption a man is cleaning the floor
a man is cleaning the floor 

Video path C:\Users\User\Document

Video path C:\Users\User\Documents\P2M\test1\feature\i3cHNObcEh8_0_10.npy : Generated Caption a cat is eating watermelon
a cat is eating watermelon 

Video path C:\Users\User\Documents\P2M\test1\feature\OvRmRN1-O0Q_21_25.npy : Generated Caption two women are dancing
two women are dancing 

Video path C:\Users\User\Documents\P2M\test1\feature\O_NWtDShLeg_21_25.npy : Generated Caption a man is putting a car
a man is putting a car 

Video path C:\Users\User\Documents\P2M\test1\feature\EBWPZIjtnTM_1_6.npy : Generated Caption a man and a woman are talking on a stage
a man and a woman are talking on a stage 

Video path C:\Users\User\Documents\P2M\test1\feature\-DRy7rBg0IQ_31_37.npy : Generated Caption a woman is swimming in the water
a woman is swimming in the water 

Video path C:\Users\User\Documents\P2M\test1\feature\WTf5EgVY5uU_18_23.npy : Generated Caption a woman is cutting a piece of meat
a woman is cutting a piece of meat 

Video path C:\Users\User\Documents\P2M\test1\feature\4PcL6-

Video path C:\Users\User\Documents\P2M\test1\feature\MHWxjWwAbwM_10_25.npy : Generated Caption a dog is walking on the stairs
a dog is walking on the stairs 

Video path C:\Users\User\Documents\P2M\test1\feature\auFXfsCzAvQ_1_15.npy : Generated Caption a dog is walking
a dog is walking 

Video path C:\Users\User\Documents\P2M\test1\feature\ZL7P_XmSnEY_205_209.npy : Generated Caption a umbrella is floating in water
a umbrella is floating in water 

Video path C:\Users\User\Documents\P2M\test1\feature\TF6LNmwDr_0_2_10.npy : Generated Caption a cat is licking a lollipop
a cat is licking a lollipop 

Video path C:\Users\User\Documents\P2M\test1\feature\88DOMJ11q2M_34_43.npy : Generated Caption a woman is dancing
a woman is dancing 

Video path C:\Users\User\Documents\P2M\test1\feature\veE0E79dEEc_18_45.npy : Generated Caption a man is cutting a potato
a man is cutting a potato 

Video path C:\Users\User\Documents\P2M\test1\feature\6ZZkO14S7GY_5_15.npy : Generated Caption a girl is playing 

Video path C:\Users\User\Documents\P2M\test1\feature\W_iR4YVCxUY_8_15.npy : Generated Caption a dog is playing with a toy
a dog is playing with a toy 

Video path C:\Users\User\Documents\P2M\test1\feature\z0Si1XxMibg_0_30.npy : Generated Caption a baby is playing with a dog
a baby is playing with a dog 

Video path C:\Users\User\Documents\P2M\test1\feature\Rq_VfjGH7kg_104_110.npy : Generated Caption a man and woman are riding on a motorcycle
a man and woman are riding on a motorcycle 

Video path C:\Users\User\Documents\P2M\test1\feature\qLwgb3F0aPU_298_305.npy : Generated Caption a group of people are running in the field
a group of people are running in the field 

Video path C:\Users\User\Documents\P2M\test1\feature\pRpeEdMmmQ0_1_18.npy : Generated Caption a soccer player is kicking a soccer ball
a soccer player is kicking a soccer ball 

Video path C:\Users\User\Documents\P2M\test1\feature\fKqBnl8D1Qo_5_44.npy : Generated Caption people are dancing
people are dancing 

Video path C

a boy is playing with a window 

Video path C:\Users\User\Documents\P2M\test1\feature\lGk1MA6YP-M_36_48.npy : Generated Caption a woman is riding a horse
a woman is riding a horse 

Video path C:\Users\User\Documents\P2M\test1\feature\oeaVXK2GAyc_4_21.npy : Generated Caption a girl is talking on the phone
a girl is talking on the phone 

Video path C:\Users\User\Documents\P2M\test1\feature\fMXfphSi6Yw_7_12.npy : Generated Caption a girl is doing a boat
a girl is doing a boat 

Video path C:\Users\User\Documents\P2M\test1\feature\ejgwQqCHN1E_7_12.npy : Generated Caption a man is writing
a man is writing 

Video path C:\Users\User\Documents\P2M\test1\feature\4Hhdr1IPOGs_90_98.npy : Generated Caption a baby is sitting on the couch
a baby is sitting on the couch 

Video path C:\Users\User\Documents\P2M\test1\feature\Fe4tO5vW9_E_64_70.npy : Generated Caption a person is pouring water into a pot
a person is pouring water into a pot 

Video path C:\Users\User\Documents\P2M\test1\feature\_O9kW

Video path C:\Users\User\Documents\P2M\test1\feature\0xx13BuvVmo_25_36.npy : Generated Caption a girl is playing with a shower
a girl is playing with a shower 

Video path C:\Users\User\Documents\P2M\test1\feature\eb-Zp4pJLKk_2_19.npy : Generated Caption a baby is walking
a baby is walking 

Video path C:\Users\User\Documents\P2M\test1\feature\j1Z890_Q3so_131_138.npy : Generated Caption a woman is mixing a bowl
a woman is mixing a bowl 

Video path C:\Users\User\Documents\P2M\test1\feature\QsvBYKaCXEc_5_15.npy : Generated Caption a shark is swimming in the water
a shark is swimming in the water 

Video path C:\Users\User\Documents\P2M\test1\feature\7M-jsjLB20Y_11_30.npy : Generated Caption a woman is playing with a rabbit
a woman is playing with a rabbit 

Video path C:\Users\User\Documents\P2M\test1\feature\08pVpBq706k_175_212.npy : Generated Caption a cat is playing with a toy
a cat is playing with a toy 

Video path C:\Users\User\Documents\P2M\test1\feature\uiLr9bdOL0M_23_30.npy : G

Video path C:\Users\User\Documents\P2M\test1\feature\AJJ-iQkbRNE_97_109.npy : Generated Caption a man is cutting a pineapple
a man is cutting a pineapple 

Video path C:\Users\User\Documents\P2M\test1\feature\UqBafx3iRKw_22_29.npy : Generated Caption a baby tiger is playing with a ball
a baby tiger is playing with a ball 

Video path C:\Users\User\Documents\P2M\test1\feature\TdYN7cePiRI_6_13.npy : Generated Caption a man is lifting weights
a man is lifting weights 

Video path C:\Users\User\Documents\P2M\test1\feature\B4foOe9kUgY_0_8.npy : Generated Caption a panda is playing
a panda is playing 

Video path C:\Users\User\Documents\P2M\test1\feature\c_-eFL7Sfw4_19_35.npy : Generated Caption a man is riding a motorcycle
a man is riding a motorcycle 

Video path C:\Users\User\Documents\P2M\test1\feature\D1tTBncIsm8_198_205.npy : Generated Caption a man is lifting a bag
a man is lifting a bag 

Video path C:\Users\User\Documents\P2M\test1\feature\MW21lp833Vo_143_149.npy : Generated Caption

Video path C:\Users\User\Documents\P2M\test1\feature\4VLrxtf7Z_8_0_7.npy : Generated Caption a man is riding a horse
a man is riding a horse 

Video path C:\Users\User\Documents\P2M\test1\feature\tPkZK-PzeQE_10_28.npy : Generated Caption a man is slicing a tomato
a man is slicing a tomato 

Video path C:\Users\User\Documents\P2M\test1\feature\kIZanu909lw_67_80.npy : Generated Caption a man is cutting an onion
a man is cutting an onion 

Video path C:\Users\User\Documents\P2M\test1\feature\BtQEY2SL4g4_126_132.npy : Generated Caption a car is moving
a car is moving 

Video path C:\Users\User\Documents\P2M\test1\feature\kquB3rIgfGk_197_202.npy : Generated Caption a man is pouring a pitcher of water into a bowl
a man is pouring a pitcher of water into a bowl 

Video path C:\Users\User\Documents\P2M\test1\feature\zSPBC8EO6dY_97_110.npy : Generated Caption a man is pouring a liquid on the head of a man
a man is pouring a liquid on the head of a man 

Video path C:\Users\User\Documents\P2M\te

a man and woman are talking on a stage 

Video path C:\Users\User\Documents\P2M\test1\feature\k06Ge9ANKM8_5_16.npy : Generated Caption a dog is playing
a dog is playing 

Video path C:\Users\User\Documents\P2M\test1\feature\hJFBXHtxKIc_298_303.npy : Generated Caption a man is pouring sauce into a bowl of pasta
a man is pouring sauce into a bowl of pasta 

Video path C:\Users\User\Documents\P2M\test1\feature\8e0yXMa708Y_24_33.npy : Generated Caption a man is climbing a rock
a man is climbing a rock 

Video path C:\Users\User\Documents\P2M\test1\feature\lm0z7eLsbbw_9_23.npy : Generated Caption a monkey is smoking
a monkey is smoking 

Video path C:\Users\User\Documents\P2M\test1\feature\IY1Fn796MoY_7_10.npy : Generated Caption a man is cutting grass with scissors
a man is cutting grass with scissors 

Video path C:\Users\User\Documents\P2M\test1\feature\R24RktgUi-s_5_15.npy : Generated Caption a soccer player is playing a soccer ball
a soccer player is playing a soccer ball 

Video path 

Video path C:\Users\User\Documents\P2M\test1\feature\8PQiaurIiDM_247_255.npy : Generated Caption a man is cutting a sword
a man is cutting a sword 

Video path C:\Users\User\Documents\P2M\test1\feature\QTAqjSbMkXU_65_75.npy : Generated Caption a woman is putting some meat
a woman is putting some meat 

Video path C:\Users\User\Documents\P2M\test1\feature\-mAoVOhKy0c_4_9.npy : Generated Caption a train is moving on a track
a train is moving on a track 

Video path C:\Users\User\Documents\P2M\test1\feature\Z3rj9LbCOaM_0_12.npy : Generated Caption two baby otters are playing in the water
two baby otters are playing in the water 

Video path C:\Users\User\Documents\P2M\test1\feature\E4k0Aylzdyo_97_104.npy : Generated Caption a girl is riding a motorcycle
a girl is riding a motorcycle 

Video path C:\Users\User\Documents\P2M\test1\feature\D1X6qXkLjYE_41_49.npy : Generated Caption a man is dancing
a man is dancing 

Video path C:\Users\User\Documents\P2M\test1\feature\UbmZAe5u5FI_88_98.npy :

Video path C:\Users\User\Documents\P2M\test1\feature\umjc1CkO4JA_290_305.npy : Generated Caption a man is cutting a tree
a man is cutting a tree 

Video path C:\Users\User\Documents\P2M\test1\feature\9LHg5RUGukI_58_63.npy : Generated Caption a man is taking a box of a box
a man is taking a box of a box 

Video path C:\Users\User\Documents\P2M\test1\feature\k9Brw_0gncU_14_33.npy : Generated Caption a band is performing on stage
a band is performing on stage 

Video path C:\Users\User\Documents\P2M\test1\feature\ficwZQYmRLE_5_20.npy : Generated Caption a dog is playing with a toy
a dog is playing with a toy 

Video path C:\Users\User\Documents\P2M\test1\feature\hNPZmTlY_3Q_0_8.npy : Generated Caption a man is doing karate arts
a man is doing karate arts 

Video path C:\Users\User\Documents\P2M\test1\feature\0hyZ__3YhZc_380_384.npy : Generated Caption a man is pouring sauce into a pot
a man is pouring sauce into a pot 

Video path C:\Users\User\Documents\P2M\test1\feature\ul9Xvjt83eI_111_