In [1]:

# from torchsummary import summary
# !pip install opencv-python
import torchvision
import model.c2d as c2d
import os
import numpy as np
from torchvision.utils import save_image, make_grid
from torchvision.models import vgg19

import torch.nn as nn
import torch
import math
import cv2
from datetime import datetime
import time
import pandas as pd
from torch.utils.data import DataLoader

# torch.cuda.get_device_name()

In [2]:
class trafficvidset(torch.utils.data.Dataset):
    def __init__(self,vid_folder,audio_file, vid_fps, duration,cols=None,clip_delta=1,normalize_examples=320):
        self.vid_fps = vid_fps
        self.duration = duration
        self.df = pd.read_csv(audio_file,header=None)
        self.cols = cols
        self.norm_eg = 320
        self.curr_eg = 0
        self.running_norm = None
        self.running_std = None
        if self.cols is None:
            self.cols = self.df.columns[1:] 
        vid_files = []
        self.clips = []
        for vid_file in os.listdir(vid_folder):
            if vid_file[-4:] != '.mp4':
                continue
            vid_time_stamp = int(time.mktime(datetime.strptime(vid_file[0:-4], '%Y-%m-%d_%H-%M-%S').timetuple()))
            vid_obj = cv2.VideoCapture(os.path.join(vid_folder,vid_file)) 
            last_second = int(vid_obj.get(cv2.CAP_PROP_FRAME_COUNT)/vid_fps) - duration 
            vid_tuple = [vid_obj,vid_obj.get(cv2.CAP_PROP_FRAME_COUNT),vid_time_stamp,last_second]
            vid_files.append(vid_tuple)
            for i in range(0,last_second+1,clip_delta):
                self.clips.append([i,vid_tuple])
        print("no. of clips - " + str(len(self.clips)))
                    
    def __len__(self):
        return(len(self.clips))
    
    def __getitem__(self,id):
        
        found = False        
        while(not found):

            vid_tuple = self.clips[id][1]
            vid_start_second =  self.clips[id][0]
            audio_start_tstamp = vid_tuple[2] + vid_start_second + math.ceil(duration/2)
            video_snippet_start_index = vid_start_second*self.vid_fps
            video_snippet_end_index = video_snippet_start_index + (self.duration*self.vid_fps)
            vid_tuple[0].set(cv2.CAP_PROP_POS_FRAMES,video_snippet_start_index)
            y = self.df[self.df[0] == audio_start_tstamp][self.cols].to_numpy()
            
            if y.shape[0] < 1:
                if id < (len(self.clips)-1):
                    id += 1
                else:
                    id = 0
                continue

            frames = []
            for j in range(video_snippet_start_index,video_snippet_end_index):

                retval,frame = vid_tuple[0].read()
                if not retval:                    
                    print(retval,video_snippet_start_index,j,vid_tuple[1],vid_start_second,last_second)
                frames.append(frame)
#             x_arr = [np.concatenate(frames,axis=2)]
#             y_arr = [(y[0])]
            
#             break            
        
#         y_arr = np.array(y_arr).astype(np.float32)
#         x_arr = np.array(x_arr).astype(np.float32)
#         width = x_arr.shape[2]
#         x_arr = np.concatenate([x_arr[...,:int(width/2),:],x_arr[...,int(width/2):,:]],axis=3)
#         return np.transpose(x_arr[0],(2,0,1)),y_arr[0]
            x_arr = np.concatenate(frames,axis=2).astype(np.float32)
            
            break 
        width = x_arr.shape[1]
        x_arr = np.concatenate([x_arr[...,:int(width/2),:],x_arr[...,int(width/2):,:]],axis=2)
        return np.transpose(x_arr,(2,0,1)),y.astype(np.float32)[0] 
    

class trafficvidset_optflow(torch.utils.data.Dataset):
    def __init__(self,vid_folder,audio_file, vid_fps, duration,cols=None,clip_delta=1):
        self.vid_fps = vid_fps
        self.duration = duration
        self.df = pd.read_csv(audio_file,header=None)
        self.cols = cols
        if self.cols is None:
            self.cols = self.df.columns[1:] 
        vid_files = []
        self.clips = []
        for vid_file in os.listdir(vid_folder):
            if vid_file[-4:] != '.mp4':
                continue
            vid_time_stamp = int(time.mktime(datetime.strptime(vid_file[0:-4], '%Y-%m-%d_%H-%M-%S').timetuple()))
            vid_obj = cv2.VideoCapture(os.path.join(vid_folder,vid_file)) 
            last_second = int(vid_obj.get(cv2.CAP_PROP_FRAME_COUNT)/vid_fps) - duration 
            vid_tuple = [vid_obj,vid_obj.get(cv2.CAP_PROP_FRAME_COUNT),vid_time_stamp,last_second]
            vid_files.append(vid_tuple)
            for i in range(0,last_second+1,clip_delta):
                self.clips.append([i,vid_tuple])
        print("no. of clips - " + str(len(self.clips)))
                    
    def __len__(self):
        return(len(self.clips))
    
    def __getitem__(self,id):
        
        found = False        
        while(not found):

            vid_tuple = self.clips[id][1]
            vid_start_second =  self.clips[id][0]
            audio_start_tstamp = vid_tuple[2] + vid_start_second + math.ceil(duration/2)
            video_snippet_start_index = vid_start_second*self.vid_fps
            video_snippet_end_index = video_snippet_start_index + (self.duration*self.vid_fps)
            vid_tuple[0].set(cv2.CAP_PROP_POS_FRAMES,video_snippet_start_index)
            y = self.df[self.df[0] == audio_start_tstamp][self.cols].to_numpy()
            
            if y.shape[0] < 1:
                if id < (len(self.clips)-1):
                    id += 1
                else:
                    id = 0
                continue

            frames = []
            for j in range(video_snippet_start_index,video_snippet_end_index):

                retval,frame = vid_tuple[0].read()
                if not retval:                    
                    print(retval,video_snippet_start_index,j,vid_tuple[1],vid_start_second,last_second)
                frames.append(frame[...,[0,2]])
            x_arr = [np.concatenate(frames,axis=2)]
            y_arr = [(y[0])]
            
            break            
        
        y_arr = np.array(y_arr).astype(np.float32)
        x_arr = np.array(x_arr).astype(np.float32)
        width = x_arr.shape[2]
        x_arr = np.concatenate([x_arr[...,:int(width/2),:],x_arr[...,int(width/2):,:]],axis=3)
        return np.transpose(x_arr[0],(2,0,1)),y_arr[0] 
        

        
class trafficvidset(torch.utils.data.Dataset):
    def __init__(self,vid_folder,audio_file, vid_fps, duration,cols=None,clip_delta=1,normalize_clips=150,frames_per_clip=2):
        self.vid_fps = vid_fps
        self.duration = duration
        self.df = pd.read_csv(audio_file,header=None)
        self.cols = cols
        if self.cols is None:
            self.cols = self.df.columns[1:] 
        vid_files = []
        self.clips = []
        for vid_file in os.listdir(vid_folder):
            if vid_file[-4:] != '.mp4':
                continue
            vid_time_stamp = int(time.mktime(datetime.strptime(vid_file[0:-4], '%Y-%m-%d_%H-%M-%S').timetuple()))
            vid_obj = cv2.VideoCapture(os.path.join(vid_folder,vid_file)) 
            last_second = int(vid_obj.get(cv2.CAP_PROP_FRAME_COUNT)/vid_fps) - duration 
            vid_tuple = [vid_obj,vid_obj.get(cv2.CAP_PROP_FRAME_COUNT),vid_time_stamp,last_second]
            vid_files.append(vid_tuple)
            for i in range(0,last_second+1,clip_delta):
                self.clips.append([i,vid_tuple])
        print("no. of clips - " + str(len(self.clips)))
        metric_frames = []
        for id in np.random.randint(0,len(self.clips),normalize_clips):
            
            vid_tuple = self.clips[id][1]
            for i,frame_id in enumerate(np.random.randint(0,vid_tuple[0].get(cv2.CAP_PROP_FRAME_COUNT),frames_per_clip)):
                vid_tuple[0].set(cv2.CAP_PROP_POS_FRAMES,frame_id)
                retval,frame = vid_tuple[0].read()
                width = frame.shape[1]
                if retval is True:                    
                    metric_frames.append(frame) 
        metric_frames = np.array(metric_frames)
        self.norm1 = np.mean(metric_frames[...,:int(width/2),:],(0,1,2),keepdims=True)
        self.std1 = np.std(metric_frames[...,:int(width/2),:],(0,1,2),keepdims=True)
        self.norm2 = np.mean(metric_frames[...,int(width/2):,:],(0,1,2),keepdims=True)
        self.std2 = np.std(metric_frames[...,int(width/2):,:],(0,1,2),keepdims=True)
 
            
                    
    def __len__(self):
        return(len(self.clips))
    
    def __getitem__(self,id):
        
        found = False        
        while(not found):

            vid_tuple = self.clips[id][1]
            vid_start_second =  self.clips[id][0]
            audio_start_tstamp = vid_tuple[2] + vid_start_second + math.ceil(duration/2)
            video_snippet_start_index = vid_start_second*self.vid_fps
            video_snippet_end_index = video_snippet_start_index + (self.duration*self.vid_fps)
            vid_tuple[0].set(cv2.CAP_PROP_POS_FRAMES,video_snippet_start_index)
            y = self.df[self.df[0] == audio_start_tstamp][self.cols].to_numpy()
            
            if y.shape[0] < 1:
                if id < (len(self.clips)-1):
                    id += 1
                else:
                    id = 0
                continue

            frames = []
            for j in range(video_snippet_start_index,video_snippet_end_index):

                retval,frame = vid_tuple[0].read()
                if not retval:                    
                    print(retval,video_snippet_start_index,j,vid_tuple[1],vid_start_second,last_second)
                    
                frames.append(frame)
            x_arr = np.concatenate(frames,axis=2).astype(np.float32)
            
            break 
        width = x_arr.shape[1]
        x_arr = np.concatenate([x_arr[...,:int(width/2),:],x_arr[...,int(width/2):,:]],axis=2)
        return np.transpose(x_arr,(2,0,1)),y.astype(np.float32)[0]         

In [None]:
 
duration = 5
vid_fps = 5
batch_size = 16
# audio_file = '/home/s.saini/data/1653167477-1651577468.csv'
# train_vid_folder = '/blue/h.azad/s.saini/data/videos'
# test_vid_folder = '/home/s.saini/data/Videos/optflow/test'
audio_file = '/home/s.saini/data/Videos/audio/1649855468-1649880078.csv'
train_vid_folder = '/home/s.saini/data/Videos/train'
test_vid_folder = '/home/s.saini/data/Videos/test'
dset = trafficvidset(train_vid_folder,audio_file,vid_fps,duration,clip_delta=5)
tset = trafficvidset(test_vid_folder,audio_file,vid_fps,duration)
dloader = DataLoader(dset, batch_size=16, shuffle=True,num_workers=0)
tloader = DataLoader(tset, batch_size=16, shuffle=True,num_workers=0) 
for i, data in enumerate(tloader):
# for data in dloader:
    print(data[1].shape)
    break

In [3]:
duration = 5
vid_fps = 5
batch_size = 16
audio_file = '/home/s.saini/data/1653167477-1651577468.csv'
train_vid_folder = '/blue/h.azad/s.saini/data/videos'
test_vid_folder = '/home/s.saini/data/Videos/optflow/test'
cols = None
test_dset = trafficvidset(test_vid_folder,audio_file,vid_fps,duration,cols=cols)
train_dset = trafficvidset(train_vid_folder,audio_file,vid_fps,duration,cols=cols,clip_delta=4)

loader = DataLoader(train_dset, batch_size=16, shuffle=True,num_workers=0)
test_loader = DataLoader(test_dset, batch_size=16, shuffle=True,num_workers=0)
# loader = vid_dataloader(train_vid_folder,audio_file,vid_fps,duration,batch_size)

# test_loader = vid_dataloader(test_vid_folder,audio_file,vid_fps,duration,batch_size)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
in_channels = duration*vid_fps*2*3
out_dimension = 32

res = c2d.Resnet(in_channels,out_dimension).to(device)#[10,15,15,12]
# res.model.train()
criterion_loss = nn.MSELoss().to(device)
optimizer = torch.optim.Adam(res.model.parameters(), lr = 2e-4)

no. of clips - 1384
no. of clips - 31055
cuda


In [None]:
v_loss = []
t_loss = []
while(1):
    res.model.train()
    losses = []
    losses_freq = []
    y_archive = []
    for i, data in enumerate(loader):
        x = data[0].to(device)
        y = data[1].to(device)
#         print(y.shape)
        res.model.zero_grad()
        y_pred = res.model(x)
#         print(y_pred.shape)
        loss = criterion_loss(y_pred,y)
        losses.append(loss.item())
        losses_freq.append(np.array(torch.mean(torch.square(y-y_pred),axis=0).cpu().detach()))
        y_archive.append(np.array(y.cpu().detach()))
    
        loss.backward()
        optimizer.step()
        if not i%200:
            print(i)
            losses_freq_mean = np.mean(np.array(losses_freq),axis=0)
#             print(losses_freq_mean)
            t_loss.append(np.mean(losses))
            print('Train' + ' ' + str(t_loss[-1]))
#################
            y_epoch = np.concatenate(y_archive)
            y_mean = np.mean(y_epoch,axis=0)
            r2 = 1 - (losses_freq_mean / np.mean(np.square(y_epoch - y_mean),axis=0))
            print(r2)
            y_archive = []
#####################        
            losses = []
            losses_freq = []
#             test_loader = DataLoader(test_dset, batch_size=16, shuffle=True,num_workers=0)
#             for j,data in enumerate(test_loader):
#                 print(j)
#                 x = data[0].to(device)
#                 y = data[1].to(device)
#                 res.model.eval()
#                 with torch.no_grad():                    
#                     y_pred = res.model(x)
#                     losses.append(criterion_loss(y_pred,y).item())
#                 if j == 3:        
#                     break
#             v_loss.append(np.mean(losses))
#             print('Validation' + ' ' + str(v_loss[-1]))
#             losses = []    
            res.model.train()
                    
    print('epoch')
                
            



0
Train 1985.143798828125
[  -1.6094079   -4.5146437   -8.614045   -25.659155   -41.326275
  -88.88975    -81.40795    -80.88844    -52.20608    -40.87087
 -105.41285    -83.05992   -133.17926   -137.77327   -119.404884
 -147.99915   -120.47618   -130.72835   -132.69266   -160.33684
 -156.75934   -166.6955    -166.52518   -148.93886   -135.65538
  -77.92169    -61.75796    -41.996468   -26.11232    -17.446608
  -12.068918    -1.9822409]
200
Train 454.43415592193605
[ -0.19869065  -0.36822605  -1.2475212   -2.756872    -5.7569127
  -7.4303246  -13.254303   -10.257144   -12.745881   -18.956455
 -21.10823    -22.951708   -33.883343   -36.864017   -33.31846
 -31.278683   -28.896944   -29.827515   -26.90622    -24.450657
 -24.69064    -22.048666   -19.43655    -19.835896   -16.669313
 -11.217428    -8.479362    -5.5189877   -3.300562    -2.0782127
  -1.0868933   -0.2834972 ]
400
Train 32.59372864246368
[-0.00541949 -0.00417793  0.002675    0.00158006  0.00190622  0.0186621
  0.00721669  0.0

In [None]:
import seaborn as sns
sns.lineplot(t_loss,list(range(len(t_loss))))
sns.lineplot(v_loss,list(range(len(v_loss))))

In [None]:
import seaborn as sns
audio_file = '/home/s.saini/data/Videos/audio/1649855468-1649880078.csv'
df = pd.read_csv(audio_file,header=None)
for col in df.columns:
    print(np.std(df[col]))

In [None]:
import os
from operator import itemgetter, attrgetter


def get_files_in_interval(strt,end,path):
    file_list = []
    for file in os.listdir(path):
        file_time = int(time.mktime(datetime.strptime(file[0:-4], '%Y-%m-%d_%H-%M-%S').timetuple()))
        if file_time >= strt and file_time < end:
            file_list.append([file_time,file,os.path.join(path,file)])
    return file_list


path1 ='G:/Shared drives/UF-AI-Catalyst/UF AI Code/test_data/bosch'
path2 = 'G:/Shared drives/UF-AI-Catalyst/UF AI Code/test_data/iteris'  
strt_time = 1649999413
end_time = 1650137713
fps = 5


file_list1 = get_files_in_interval(strt_time,end_time,path1)
file_list2 = get_files_in_interval(strt_time,end_time,path2)

print(len(file_list1))
print(len(file_list2))
# while(len(file_list2) and len(file_list1)):
#     if file_list1[0][0] > file_list2[0][0]:
#         ref_list = file_list1
#         scroll_list = file_list2
#     else:
#         ref_list = file_list2
#         scroll_list = file_list1

#     while(ref_list[0][0] >= scroll_list[0][0] and ref_list[0][0] < scroll_list[1][0]):

while(len(file_list2) and len(file_list1)):
    if file_list1[0][0] > file_list2[0][0]:
        efile = file_list2[0]
        lfile = file_list1[0]
    else:        
        efile = file_list1[0]
        lfile = file_list2[0]


    evid = cv2.VideoCapture(efile[2])
    fps = evid.get(cv2.CAP_PROP_FPS)      # OpenCV2 version 2 used "CV_CAP_PROP_FPS"
    frame_count = int(evid.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = frame_count/fps
    if (efile[0] + duration) > lfile[0]:
        lvid = cv2.VideoCapture(efile[2])
        evid.set(cv2.CV_CAP_PROP_POS_FRAMES,fps*(lvid[0]-evid[0]))
        




In [None]:
# import cProfile

# def test_func():
#     duration = 5
#     vid_fps = 5
#     batch_size = 64
#     vid_folder = 'G:/.shortcut-targets-by-id/1VaRYG8M-m7nfxKooARU6qTiHpVPhNHuV/out'
#     audio_file = 'G:/Shared drives/UF-AI-Catalyst/UF AI Code/test_data/1649855468-1649880078.csv'

#     loader = vid_dataloader(vid_folder,audio_file,vid_fps,duration,batch_size)
#     for data in loader:
#         # print(data[0].shape, data[1].shape,data[0].dtype)
#         break
    

# cProfile.run('test_func()')

In [None]:

# transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
#                                             torchvision.transforms.Normalize(0.5, 0.5)])
# batch_size  = 100 
# dataset = torchvision.datasets.MNIST(root='\\data',download=True,transform=transform)
# params = {'batch_size': batch_size,
#           'shuffle': True}


# loader = torch.utils.data.DataLoader(dataset,**params)

In [None]:
import seaborn as sns
audio_file = '/home/s.saini/data/Videos/audio/1649855468-1649880078.csv'
df = pd.read_csv(audio_file,header=None)
# strt_time = 1649862360
# end_time = 1649862432


strt_time = 1649862480
end_time = 1649862490
df = df[(df[0] < end_time) & (df[0] >= strt_time)]
# for col in range(17,25):
#     sns.displot(df[col])
sns.displot(df[19])

In [None]:
# Data Loader

def vid_dataloader(vid_folder, audio_file, vid_fps, duration,batch_size):
    df = pd.read_csv(audio_file,header=None)
    vid_files = []
    clips = []
    for vid_file in os.listdir(vid_folder):
        if vid_file[-4:] != '.mp4':
            continue
        vid_time_stamp = int(time.mktime(datetime.strptime(vid_file[0:-4], '%Y-%m-%d_%H-%M-%S').timetuple()))
        vid_obj = cv2.VideoCapture(os.path.join(vid_folder,vid_file)) 
        last_second = int(vid_obj.get(cv2.CAP_PROP_FRAME_COUNT)/vid_fps) - duration 
        vid_tuple = [vid_obj,vid_obj.get(cv2.CAP_PROP_FRAME_COUNT),vid_time_stamp,last_second]
        vid_files.append(vid_tuple)
        for i in range(0,last_second+1):
            clips.append([i,vid_tuple])

#     print('files read')
    
    while(1):
        indices = np.random.permutation(list(range(len(clips))))
        for k in range(0,len(indices),batch_size):
            x_batch = []
            y_batch = []
            for i in range(min(batch_size,len(indices)-k)):
#                 print(min(batch_size,len(indices)-k))
                clip_index = indices[i+k]
                vid_tuple = clips[clip_index][1]
                vid_start_second =  clips[clip_index][0]
                audio_start_tstamp = vid_tuple[2] + vid_start_second + math.ceil(duration/2)
                video_snippet_start_index = vid_start_second*vid_fps
                video_snippet_end_index = video_snippet_start_index + (duration*vid_fps)
                vid_tuple[0].set(cv2.CAP_PROP_POS_FRAMES,video_snippet_start_index)
                y = df[df[0] == audio_start_tstamp][[10,11,12,13,14,15,16,17,18,19,20,21,23,24,25]].to_numpy()

                if y.shape[0] < 1:
                    continue

                frames = []
                for j in range(video_snippet_start_index,video_snippet_end_index):

                    retval,frame = vid_tuple[0].read()
                    if not retval:                    
                        print(retval,video_snippet_start_index,j,vid_tuple[1],vid_start_second,last_second)
                    frames.append(frame)
                x_batch.append(np.concatenate(frames,axis=2))
                y_batch.append(y[0])
            y_batch = np.array(y_batch).astype(np.float32)
            x_batch = np.array(x_batch).astype(np.float32)
            width = x_batch.shape[2]
            x_batch = np.concatenate([x_batch[...,:int(width/2),:],x_batch[...,int(width/2):,:]],axis=3)
            # yield [np.array(x_batch),np.array(y_batch)]
            yield [torch.transpose(torch.tensor(x_batch),1,3),torch.tensor(y_batch)]
        print("epoch")
                

#     while(1):
#         indices = np.random.randint(0,len(vid_files),size=batch_size)
#         x_batch = []
#         y_batch = []
#         for i in indices:
#             vid_tuple = vid_files[i]
# #             last_second = int(vid_tuple[1]/vid_fps) - duration
#             if last_second < 0:
#                 continue

#             # print(int(vid_tuple[1]/vid_fps))
#             # print(last_second)
#             vid_start_second =  np.random.randint(0,last_second+1)
#             audio_start_tstamp = vid_tuple[2] + vid_start_second + math.ceil(duration/2)
#             video_snippet_start_index = vid_start_second*vid_fps
#             video_snippet_end_index = video_snippet_start_index + (duration*vid_fps)
#             vid_tuple[0].set(cv2.CAP_PROP_POS_FRAMES,video_snippet_start_index)

# #             y = df[df[0] == audio_start_tstamp].drop(columns=[1]).to_numpy()
# #             y = df[df[0] == audio_start_tstamp][14]
#             y = df[df[0] == audio_start_tstamp][[14,15,16,17,18,19,20]].to_numpy()

# #             print(y.shape)
#             if y.shape[0] < 1:
# #             if len(y) < 1:
#                 continue
# #             y = y.to_numpy()[np.newaxis,...]

#             frames = []
#             for j in range(video_snippet_start_index,video_snippet_end_index):
                
#                 retval,frame = vid_tuple[0].read()
#                 if not retval:                    
#                     print(retval,video_snippet_start_index,j,vid_tuple[1],vid_start_second,last_second)
#                 frames.append(frame)
#             x_batch.append(np.concatenate(frames,axis=2))
#             y_batch.append(y[0])
#         y_batch = np.array(y_batch).astype(np.float32)
#         x_batch = np.array(x_batch).astype(np.float32)
#         width = x_batch.shape[2]
#         x_batch = np.concatenate([x_batch[...,:int(width/2),:],x_batch[...,int(width/2):,:]],axis=3)
#         # yield [np.array(x_batch),np.array(y_batch)]
#         yield [torch.transpose(torch.tensor(x_batch),1,3),torch.tensor(y_batch)]






In [None]:
def seq_vid_dataloader(vid_folder, audio_file, vid_fps, duration,batch_size):
    df = pd.read_csv(audio_file,header=None)
    vid_files = []
    for vid_file in os.listdir(vid_folder):
        if vid_file[-4:] != '.mp4':
            continue
        vid_time_stamp = int(time.mktime(datetime.strptime(vid_file[0:-4], '%Y-%m-%d_%H-%M-%S').timetuple()))
        vid_obj = cv2.VideoCapture(os.path.join(vid_folder,vid_file))   
        vid_frame_counter = 0
        if not vid_obj.get(cv2.CAP_PROP_FRAME_COUNT):
            continue
        vid_files.append([vid_obj,vid_obj.get(cv2.CAP_PROP_FRAME_COUNT),vid_time_stamp,vid_frame_counter])

#     print('files read')

    while(1):
        
        indices = np.random.randint(0,len(vid_files),size=batch_size)
        x_batch = []
        y_batch = []
        for i in indices:
            vid_tuple = vid_files[i]
            frame_counter = vid_tuple[3]
            if (vid_tuple[1] - frame_counter) > duration*vid_fps:
                frame_counter = 0
                vid_tuple[0].set(cv2.CAP_PROP_POS_FRAMES,frame_counter)
                
            vid_start_second =  int(frame_counter/vid_fps)
            audio_start_tstamp = vid_tuple[2] + vid_start_second + math.ceil(duration/2)

#             y = df[df[0] == audio_start_tstamp].drop(columns=[1]).to_numpy()
            y = df[df[0] == audio_start_tstamp][[10,11,12,13,14,15,16,17,18,19,20,21,23,24,25]].to_numpy()
#             print(y)
            if y.shape[0] < 1:
#             if len(y) < 1:
                continue
#             y = y.to_numpy()[np.newaxis,...]

            frames = []
            for j in range(frame_counter,frame_counter+(vid_fps*duration)):
                
                retval,frame = vid_tuple[0].read()
#                 frame_counter += 1
                if not retval:                    
                    print(retval,frame_counter,vid_tuple[1])
                frames.append(frame)
               
            x_batch.append(np.concatenate(frames,axis=2))
            y_batch.append(y[0])
            vid_tuple[3] += vid_fps 
        y_batch = np.array(y_batch).astype(np.float32)
        x_batch = np.array(x_batch).astype(np.float32)
        width = x_batch.shape[2]
        x_batch = np.concatenate([x_batch[...,:int(width/2),:],x_batch[...,int(width/2):,:]],axis=3)
        # yield [np.array(x_batch),np.array(y_batch)]
        yield [torch.transpose(torch.tensor(x_batch),1,3),torch.tensor(y_batch)]




In [None]:

# duration = 5
# vid_fps = 5
# batch_size = 16
# audio_file = '/home/s.saini/data/Videos/audio/1649855468-1649880078.csv'
# train_vid_folder = '/home/s.saini/data/Videos/train'
# test_vid_folder = '/home/s.saini/data/Videos/test'

# loader = vid_dataloader(test_vid_folder,audio_file,vid_fps,duration,batch_size)


# for data in loader:
#     print(data[0].shape, data[1].shape,data[0].dtype)


In [None]:

# transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
#                                             torchvision.transforms.Normalize(0.5, 0.5)])
# batch_size  = 100 
# dataset = torchvision.datasets.MNIST(root='\\data',download=True,transform=transform)
# params = {'batch_size': batch_size,
#           'shuffle': True}


# loader = torch.utils.data.DataLoader(dataset,**params)

In [None]:
!pip install ffmpeg-python

In [None]:
import ffmpeg
import numpy as np

width = 480
height = 240
in_filename = '/home/s.saini/data/Videos/train/2022-04-13_11-59-49.mp4'
process1 = (
    ffmpeg
    .input(in_filename, loglevel='panic')
    .output('pipe:', format='rawvideo', pix_fmt='rgb24')
    .run_async(pipe_stdout=True)
)


while True:
    in_bytes = process1.stdout.read(width * height * 3)
    if not in_bytes:
        break
    in_frame = (
        np
        .frombuffer(in_bytes, np.uint8)
        .reshape([height, width, 3])        
    
    )
    print(in_frame.shape)
    break

In [None]:
for data in loader:
    print(data[0].shape)
    break
# print(torch.sum(data[0][0][0]))
# print(torch.sum(data[0][0][1]))
# print(torch.sum(data[0][0][2]))

In [None]:
print(torch.sum(data[0][0][1]))