In [34]:

# from torchsummary import summary
import torchvision
import model.c2d as c2d
import os
import numpy as np
from torchvision.utils import save_image, make_grid
from torchvision.models import vgg19

import torch.nn as nn
import torch
import math
import cv2
from datetime import datetime
import time
import pandas as pd
from torch.utils.data import DataLoader



In [43]:
class trafficvidset(torch.utils.data.Dataset):
    def __init__(self,vid_folder,audio_file, vid_fps, duration,cols=None,clip_delta=1):
        self.vid_fps = vid_fps
        self.duration = duration
        self.df = pd.read_csv(audio_file,header=None)
        self.cols = cols
        if self.cols is None:
            self.cols = self.df.columns[1:] 
        vid_files = []
        self.clips = []
        for vid_file in os.listdir(vid_folder):
            if vid_file[-4:] != '.mp4':
                continue
            vid_time_stamp = int(time.mktime(datetime.strptime(vid_file[0:-4], '%Y-%m-%d_%H-%M-%S').timetuple()))
            vid_obj = cv2.VideoCapture(os.path.join(vid_folder,vid_file)) 
            last_second = int(vid_obj.get(cv2.CAP_PROP_FRAME_COUNT)/vid_fps) - duration 
            vid_tuple = [vid_obj,vid_obj.get(cv2.CAP_PROP_FRAME_COUNT),vid_time_stamp,last_second]
            vid_files.append(vid_tuple)
            for i in range(0,last_second+1,clip_delta):
                self.clips.append([i,vid_tuple])
        print("no. of clips - " + str(len(self.clips)))
                    
    def __len__(self):
        return(len(self.clips))
    
    def __getitem__(self,id):
        
        found = False        
        while(not found):

            vid_tuple = self.clips[id][1]
            vid_start_second =  self.clips[id][0]
            audio_start_tstamp = vid_tuple[2] + vid_start_second + math.ceil(duration/2)
            video_snippet_start_index = vid_start_second*self.vid_fps
            video_snippet_end_index = video_snippet_start_index + (self.duration*self.vid_fps)
            vid_tuple[0].set(cv2.CAP_PROP_POS_FRAMES,video_snippet_start_index)
            y = self.df[self.df[0] == audio_start_tstamp][self.cols].to_numpy()
            
            if y.shape[0] < 1:
                if id < (len(self.clips)-1):
                    id += 1
                else:
                    id = 0
                continue

            frames = []
            for j in range(video_snippet_start_index,video_snippet_end_index):

                retval,frame = vid_tuple[0].read()
                if not retval:                    
                    print(retval,video_snippet_start_index,j,vid_tuple[1],vid_start_second,last_second)
                frames.append(frame)
            x_arr = [np.concatenate(frames,axis=2)]
            y_arr = [(y[0])]
            
            break            
        
        y_arr = np.array(y_arr).astype(np.float32)
        x_arr = np.array(x_arr).astype(np.float32)
        width = x_arr.shape[2]
        x_arr = np.concatenate([x_arr[...,:int(width/2),:],x_arr[...,int(width/2):,:]],axis=3)
        return np.transpose(x_arr[0],(2,0,1)),y_arr[0] 
        
        

In [45]:
 
duration = 5
vid_fps = 5
batch_size = 16
audio_file = '/home/s.saini/data/Videos/audio/1649855468-1649880078.csv'
train_vid_folder = '/home/s.saini/data/Videos/train'
test_vid_folder = '/home/s.saini/data/Videos/test'
dset = trafficvidset(train_vid_folder,audio_file,vid_fps,duration,clip_delta=5)
dloader = DataLoader(dset, batch_size=16, shuffle=True,num_workers=0)
for data in dloader:
    print(data[0].shape)
#     break

no. of clips - 6200
torch.Size([16, 150, 240, 240])


KeyboardInterrupt: 

In [6]:
# Data Loader

def vid_dataloader(vid_folder, audio_file, vid_fps, duration,batch_size):
    df = pd.read_csv(audio_file,header=None)
    vid_files = []
    clips = []
    for vid_file in os.listdir(vid_folder):
        if vid_file[-4:] != '.mp4':
            continue
        vid_time_stamp = int(time.mktime(datetime.strptime(vid_file[0:-4], '%Y-%m-%d_%H-%M-%S').timetuple()))
        vid_obj = cv2.VideoCapture(os.path.join(vid_folder,vid_file)) 
        last_second = int(vid_obj.get(cv2.CAP_PROP_FRAME_COUNT)/vid_fps) - duration 
        vid_tuple = [vid_obj,vid_obj.get(cv2.CAP_PROP_FRAME_COUNT),vid_time_stamp,last_second]
        vid_files.append(vid_tuple)
        for i in range(0,last_second+1):
            clips.append([i,vid_tuple])

#     print('files read')
    
    while(1):
        indices = np.random.permutation(list(range(len(clips))))
        for k in range(0,len(indices),batch_size):
            x_batch = []
            y_batch = []
            for i in range(min(batch_size,len(indices)-k)):
#                 print(min(batch_size,len(indices)-k))
                clip_index = indices[i+k]
                vid_tuple = clips[clip_index][1]
                vid_start_second =  clips[clip_index][0]
                audio_start_tstamp = vid_tuple[2] + vid_start_second + math.ceil(duration/2)
                video_snippet_start_index = vid_start_second*vid_fps
                video_snippet_end_index = video_snippet_start_index + (duration*vid_fps)
                vid_tuple[0].set(cv2.CAP_PROP_POS_FRAMES,video_snippet_start_index)
                y = df[df[0] == audio_start_tstamp][[10,11,12,13,14,15,16,17,18,19,20,21,23,24,25]].to_numpy()

                if y.shape[0] < 1:
                    continue

                frames = []
                for j in range(video_snippet_start_index,video_snippet_end_index):

                    retval,frame = vid_tuple[0].read()
                    if not retval:                    
                        print(retval,video_snippet_start_index,j,vid_tuple[1],vid_start_second,last_second)
                    frames.append(frame)
                x_batch.append(np.concatenate(frames,axis=2))
                y_batch.append(y[0])
            y_batch = np.array(y_batch).astype(np.float32)
            x_batch = np.array(x_batch).astype(np.float32)
            width = x_batch.shape[2]
            x_batch = np.concatenate([x_batch[...,:int(width/2),:],x_batch[...,int(width/2):,:]],axis=3)
            # yield [np.array(x_batch),np.array(y_batch)]
            yield [torch.transpose(torch.tensor(x_batch),1,3),torch.tensor(y_batch)]
        print("epoch")
                

#     while(1):
#         indices = np.random.randint(0,len(vid_files),size=batch_size)
#         x_batch = []
#         y_batch = []
#         for i in indices:
#             vid_tuple = vid_files[i]
# #             last_second = int(vid_tuple[1]/vid_fps) - duration
#             if last_second < 0:
#                 continue

#             # print(int(vid_tuple[1]/vid_fps))
#             # print(last_second)
#             vid_start_second =  np.random.randint(0,last_second+1)
#             audio_start_tstamp = vid_tuple[2] + vid_start_second + math.ceil(duration/2)
#             video_snippet_start_index = vid_start_second*vid_fps
#             video_snippet_end_index = video_snippet_start_index + (duration*vid_fps)
#             vid_tuple[0].set(cv2.CAP_PROP_POS_FRAMES,video_snippet_start_index)

# #             y = df[df[0] == audio_start_tstamp].drop(columns=[1]).to_numpy()
# #             y = df[df[0] == audio_start_tstamp][14]
#             y = df[df[0] == audio_start_tstamp][[14,15,16,17,18,19,20]].to_numpy()

# #             print(y.shape)
#             if y.shape[0] < 1:
# #             if len(y) < 1:
#                 continue
# #             y = y.to_numpy()[np.newaxis,...]

#             frames = []
#             for j in range(video_snippet_start_index,video_snippet_end_index):
                
#                 retval,frame = vid_tuple[0].read()
#                 if not retval:                    
#                     print(retval,video_snippet_start_index,j,vid_tuple[1],vid_start_second,last_second)
#                 frames.append(frame)
#             x_batch.append(np.concatenate(frames,axis=2))
#             y_batch.append(y[0])
#         y_batch = np.array(y_batch).astype(np.float32)
#         x_batch = np.array(x_batch).astype(np.float32)
#         width = x_batch.shape[2]
#         x_batch = np.concatenate([x_batch[...,:int(width/2),:],x_batch[...,int(width/2):,:]],axis=3)
#         # yield [np.array(x_batch),np.array(y_batch)]
#         yield [torch.transpose(torch.tensor(x_batch),1,3),torch.tensor(y_batch)]






In [4]:
def seq_vid_dataloader(vid_folder, audio_file, vid_fps, duration,batch_size):
    df = pd.read_csv(audio_file,header=None)
    vid_files = []
    for vid_file in os.listdir(vid_folder):
        if vid_file[-4:] != '.mp4':
            continue
        vid_time_stamp = int(time.mktime(datetime.strptime(vid_file[0:-4], '%Y-%m-%d_%H-%M-%S').timetuple()))
        vid_obj = cv2.VideoCapture(os.path.join(vid_folder,vid_file))   
        vid_frame_counter = 0
        if not vid_obj.get(cv2.CAP_PROP_FRAME_COUNT):
            continue
        vid_files.append([vid_obj,vid_obj.get(cv2.CAP_PROP_FRAME_COUNT),vid_time_stamp,vid_frame_counter])

#     print('files read')

    while(1):
        
        indices = np.random.randint(0,len(vid_files),size=batch_size)
        x_batch = []
        y_batch = []
        for i in indices:
            vid_tuple = vid_files[i]
            frame_counter = vid_tuple[3]
            if (vid_tuple[1] - frame_counter) > duration*vid_fps:
                frame_counter = 0
                vid_tuple[0].set(cv2.CAP_PROP_POS_FRAMES,frame_counter)
                
            vid_start_second =  int(frame_counter/vid_fps)
            audio_start_tstamp = vid_tuple[2] + vid_start_second + math.ceil(duration/2)

#             y = df[df[0] == audio_start_tstamp].drop(columns=[1]).to_numpy()
            y = df[df[0] == audio_start_tstamp][[10,11,12,13,14,15,16,17,18,19,20,21,23,24,25]].to_numpy()
#             print(y)
            if y.shape[0] < 1:
#             if len(y) < 1:
                continue
#             y = y.to_numpy()[np.newaxis,...]

            frames = []
            for j in range(frame_counter,frame_counter+(vid_fps*duration)):
                
                retval,frame = vid_tuple[0].read()
#                 frame_counter += 1
                if not retval:                    
                    print(retval,frame_counter,vid_tuple[1])
                frames.append(frame)
               
            x_batch.append(np.concatenate(frames,axis=2))
            y_batch.append(y[0])
            vid_tuple[3] += vid_fps 
        y_batch = np.array(y_batch).astype(np.float32)
        x_batch = np.array(x_batch).astype(np.float32)
        width = x_batch.shape[2]
        x_batch = np.concatenate([x_batch[...,:int(width/2),:],x_batch[...,int(width/2):,:]],axis=3)
        # yield [np.array(x_batch),np.array(y_batch)]
        yield [torch.transpose(torch.tensor(x_batch),1,3),torch.tensor(y_batch)]




In [43]:

duration = 5
vid_fps = 5
batch_size = 16
audio_file = '/home/s.saini/data/Videos/audio/1649855468-1649880078.csv'
train_vid_folder = '/home/s.saini/data/Videos/train'
test_vid_folder = '/home/s.saini/data/Videos/test'

loader = vid_dataloader(test_vid_folder,audio_file,vid_fps,duration,batch_size)


for data in loader:
    print(data[0].shape, data[1].shape,data[0].dtype)


[mov,mp4,m4a,3gp,3g2,mj2 @ 0x563fea0f7e40] moov atom not found


files read
epoch
torch.Size([16, 150, 240, 240]) torch.Size([16, 7]) torch.float32
torch.Size([16, 150, 240, 240]) torch.Size([16, 7]) torch.float32
torch.Size([16, 150, 240, 240]) torch.Size([16, 7]) torch.float32
torch.Size([16, 150, 240, 240]) torch.Size([16, 7]) torch.float32
torch.Size([16, 150, 240, 240]) torch.Size([16, 7]) torch.float32
torch.Size([16, 150, 240, 240]) torch.Size([16, 7]) torch.float32
torch.Size([15, 150, 240, 240]) torch.Size([15, 7]) torch.float32
torch.Size([16, 150, 240, 240]) torch.Size([16, 7]) torch.float32
torch.Size([16, 150, 240, 240]) torch.Size([16, 7]) torch.float32
torch.Size([16, 150, 240, 240]) torch.Size([16, 7]) torch.float32
torch.Size([16, 150, 240, 240]) torch.Size([16, 7]) torch.float32
torch.Size([16, 150, 240, 240]) torch.Size([16, 7]) torch.float32
torch.Size([16, 150, 240, 240]) torch.Size([16, 7]) torch.float32
torch.Size([16, 150, 240, 240]) torch.Size([16, 7]) torch.float32
torch.Size([16, 150, 240, 240]) torch.Size([16, 7]) torch.f

KeyboardInterrupt: 

In [2]:

transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
                                            torchvision.transforms.Normalize(0.5, 0.5)])
batch_size  = 100 
dataset = torchvision.datasets.MNIST(root='\\data',download=True,transform=transform)
params = {'batch_size': batch_size,
          'shuffle': True}


loader = torch.utils.data.DataLoader(dataset,**params)

In [51]:
duration = 5
vid_fps = 5
batch_size = 16
audio_file = '/home/s.saini/data/Videos/audio/1649855468-1649880078.csv'
train_vid_folder = '/home/s.saini/data/Videos/train'
test_vid_folder = '/home/s.saini/data/Videos/test'

test_dset = trafficvidset(test_vid_folder,audio_file,vid_fps,duration)
train_dset = trafficvidset(train_vid_folder,audio_file,vid_fps,duration,clip_delta=4)

loader = DataLoader(train_dset, batch_size=16, shuffle=True,num_workers=0)
test_loader = DataLoader(test_dset, batch_size=16, shuffle=True,num_workers=0)
# loader = vid_dataloader(train_vid_folder,audio_file,vid_fps,duration,batch_size)

# test_loader = vid_dataloader(test_vid_folder,audio_file,vid_fps,duration,batch_size)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
in_channels = duration*vid_fps*3*2
out_dimension = 32

res = c2d.Resnet(in_channels,out_dimension).to(device)
res.model.train()
criterion_loss = nn.MSELoss().to(device)
optimizer = torch.optim.Adam(res.model.parameters(), lr = 2e-4)

[mov,mp4,m4a,3gp,3g2,mj2 @ 0x55a72b706940] moov atom not found


no. of clips - 1387
no. of clips - 7679


In [None]:
v_loss = []
t_loss = []
while(1):
    res.model.train()
    losses = []
    
    for i, data in enumerate(loader):
#         i+=1
        x = data[0].to(device)
        y = data[1].to(device)
#         print('model start')
        res.model.zero_grad()
        y_pred = res.model(x)
        loss = criterion_loss(y_pred,y)
#         print('model_end')
        losses.append(loss.item())
    
        loss.backward()
        optimizer.step()
        if not i%50:
            print(i)
            t_loss.append(np.mean(losses))
            print('Train' + ' ' + str(t_loss[-1]))
            losses = []
            test_loader = DataLoader(test_dset, batch_size=16, shuffle=True,num_workers=0)
            for j,data in enumerate(test_loader):                
                x = data[0].to(device)
                y = data[1].to(device)
                res.model.eval()
                with torch.no_grad():                    
                    y_pred = res.model(x)
                    losses.append(criterion_loss(y_pred,y).item())
#                     print('Evaluate' + ' ' + str(loss.item()))
                if j == 3:        
                    break
            v_loss.append(np.mean(losses))
            print('Validation' + ' ' + str(v_loss[-1]))
            losses = []    
            res.model.train()
                    
    print('epoch')
                
            



0
Train 29.867095947265625
Validation 18.763875484466553
50
Train 29.365791664123535
Validation 40.09738826751709
100
Train 24.811059646606445
Validation 20.53251600265503
150
Train 25.244449768066406
Validation 19.45361590385437
200
Train 27.631628799438477
Validation 20.75678253173828
250
Train 25.9790878868103
Validation 48.27507781982422
300
Train 24.2585862159729
Validation 26.743857383728027
350
Train 27.565343189239503
Validation 25.345038414001465
400
Train 25.890446128845216
Validation 24.790374755859375
450
Train 24.697357921600343
Validation 17.58067488670349
epoch
0
Train 26.565811157226562
Validation 20.46248197555542
50
Train 27.192783203125
Validation 41.440839767456055
100
Train 27.59659429550171
Validation 32.611820697784424
150
Train 27.72354965209961
Validation 38.35366630554199
200
Train 22.457594242095947
Validation 19.780050039291382
250
Train 26.29104290008545
Validation 19.548136234283447
300
Train 26.134798336029053
Validation 27.871220111846924
350
Train 26.46

In [49]:
for data in test_loader:
    print(res(data[0].to(device)))
    break

tensor([[11.3670, 14.6446, 20.1131, 24.6436, 29.1910, 34.7048, 39.6444, 43.7930,
         46.8338, 52.6740, 53.6185, 55.8550, 59.0919, 60.1943, 62.4122, 63.4136,
         64.4053, 65.3156, 64.3676, 61.3923, 59.0562, 58.0792, 57.8750, 55.7629,
         51.7262, 49.4777, 44.4349, 40.5693, 34.4035, 27.3291, 18.9803, 12.5272],
        [11.6437, 14.7802, 19.7262, 24.3143, 28.6962, 34.3230, 39.1473, 43.2612,
         46.3080, 51.9514, 53.0182, 55.2673, 58.3882, 59.4199, 61.7377, 62.8702,
         64.0070, 65.0819, 64.2692, 61.3151, 59.0229, 57.9433, 57.5922, 55.3538,
         51.4185, 49.1825, 44.2971, 40.4270, 34.4392, 27.4759, 19.2649, 13.0582],
        [11.5743, 14.7191, 19.7172, 24.2840, 28.6738, 34.2699, 39.0946, 43.2008,
         46.2371, 51.8888, 52.9368, 55.1774, 58.3044, 59.3416, 61.6399, 62.7523,
         63.8676, 64.9177, 64.0912, 61.1429, 58.8519, 57.7882, 57.4578, 55.2413,
         51.3051, 49.0740, 44.1836, 40.3249, 34.3322, 27.3760, 19.1720, 12.9518],
        [11.8450, 14.9205

In [14]:
import os
from operator import itemgetter, attrgetter


def get_files_in_interval(strt,end,path):
    file_list = []
    for file in os.listdir(path):
        file_time = int(time.mktime(datetime.strptime(file[0:-4], '%Y-%m-%d_%H-%M-%S').timetuple()))
        if file_time >= strt and file_time < end:
            file_list.append([file_time,file,os.path.join(path,file)])
    return file_list


path1 ='G:/Shared drives/UF-AI-Catalyst/UF AI Code/test_data/bosch'
path2 = 'G:/Shared drives/UF-AI-Catalyst/UF AI Code/test_data/iteris'  
strt_time = 1649999413
end_time = 1650137713
fps = 5


file_list1 = get_files_in_interval(strt_time,end_time,path1)
file_list2 = get_files_in_interval(strt_time,end_time,path2)

print(len(file_list1))
print(len(file_list2))
# while(len(file_list2) and len(file_list1)):
#     if file_list1[0][0] > file_list2[0][0]:
#         ref_list = file_list1
#         scroll_list = file_list2
#     else:
#         ref_list = file_list2
#         scroll_list = file_list1

#     while(ref_list[0][0] >= scroll_list[0][0] and ref_list[0][0] < scroll_list[1][0]):

while(len(file_list2) and len(file_list1)):
    if file_list1[0][0] > file_list2[0][0]:
        efile = file_list2[0]
        lfile = file_list1[0]
    else:        
        efile = file_list1[0]
        lfile = file_list2[0]


    evid = cv2.VideoCapture(efile[2])
    fps = evid.get(cv2.CAP_PROP_FPS)      # OpenCV2 version 2 used "CV_CAP_PROP_FPS"
    frame_count = int(evid.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = frame_count/fps
    if (efile[0] + duration) > lfile[0]:
        lvid = cv2.VideoCapture(efile[2])
        evid.set(cv2.CV_CAP_PROP_POS_FRAMES,fps*(lvid[0]-evid[0]))
        




IndentationError: expected an indented block (Temp/ipykernel_15916/3693431966.py, line 2)

In [None]:
# import cProfile

# def test_func():
#     duration = 5
#     vid_fps = 5
#     batch_size = 64
#     vid_folder = 'G:/.shortcut-targets-by-id/1VaRYG8M-m7nfxKooARU6qTiHpVPhNHuV/out'
#     audio_file = 'G:/Shared drives/UF-AI-Catalyst/UF AI Code/test_data/1649855468-1649880078.csv'

#     loader = vid_dataloader(vid_folder,audio_file,vid_fps,duration,batch_size)
#     for data in loader:
#         # print(data[0].shape, data[1].shape,data[0].dtype)
#         break
    

# cProfile.run('test_func()')

In [None]:

# transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
#                                             torchvision.transforms.Normalize(0.5, 0.5)])
# batch_size  = 100 
# dataset = torchvision.datasets.MNIST(root='\\data',download=True,transform=transform)
# params = {'batch_size': batch_size,
#           'shuffle': True}


# loader = torch.utils.data.DataLoader(dataset,**params)