In [1]:
import torch
import numpy as np
import os
import json
import joblib
from torch.utils.data import Dataset,DataLoader
from itertools import repeat
import pandas as pd
import math
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
class tdset(Dataset):
    def __init__(self,ids):
        self.ids = ids
        self.sz = np.random.randint(5,10)
    def __len__(self):
        return len(self.ids)
    def __getitem__(self,idx):
        return np.random.randn(self.sz)


In [10]:
dset = tdset([0,9,8,6,8,4])
dset2 = tdset([0,9,8,6,8,4])

In [14]:
# 50 percentile length is 25 frames.Thus we will have the sequence of video-frames = 25

class TOYMODEL(pl.LightningModule):
    def __init__(self,hparams):
        super().__init__()
        self.save_hyperparameters(hparams)
        self.lin = nn.Linear(15,1)

    def forward(self,x):
        return self.lin(x)

    def training_step(self,batch,batch_idx):
        out = self.lin(x)
        

    


In [2]:
#utils

def get_vids(base_dir,split):
    trn_split = base_dir+split
    trn_idlst = []
    trn_vidlst = []

    f = open(trn_split,'r')
    for line in f:
        id_,vid = line.split('/')
        vid = vid.strip('\n')
        trn_idlst.append(id_)
        trn_vidlst.append(vid)
        #print(vid)
        #break
    f.close()
    return trn_idlst,trn_vidlst

    
def get_features(data_dir,split='val',feat_dir='/common/users/vk405/feat_csv/'):
    #feat_dir = data_dir
    splits_dir = data_dir+'splits/'
    if split == 'val':
        feat_split_dir = feat_dir+'val_frame_feat_csv/'  
        vid_num,vid_name = get_vids(splits_dir,'val_list.txt')  
    elif split == 'train':
        feat_split_dir = feat_dir+'train_frame_feat_csv/'  
        vid_num,vid_name = get_vids(splits_dir,'train_list.txt') 
    elif split == 'test':
        feat_split_dir = feat_dir+'test_frame_feat_csv/'  
        vid_num,vid_name = get_vids(splits_dir,'test_list.txt')
    else:
        raise NotImplementedError(f'unknown split: {split}')     
    feat_list = {}
    vid_dtls = []
    for num,name in zip(vid_num,vid_name):
        feat_loc = os.path.join(feat_split_dir, f'{num}/{name}/0001/')
        #import pdb;pdb.set_trace()
        if os.path.isdir(feat_loc):
            feat_files = feat_loc + os.listdir(feat_loc)[0]
            feat_list[name] = feat_files
            #feat_list.append(feat_files)
            vid_dtls.append((num,name))
        else:
            print(f"video : {num}/{name} not found")
    assert len(feat_list) == len(vid_dtls),"get-features is giving incorrect features"
    return feat_list,vid_dtls






def get_raw_labels(ids,annotns_file):

    label_info = {}
    with open(annotns_file) as json_file:
        annotns = json.load(json_file)
        print(annotns.keys())
        for _,vidname in ids:
            #import pdb;pdb.set_trace()
            if vidname in annotns['database']:
                #import pdb;pdb.set_trace()
                duration = annotns['database'][vidname]['duration']
                annot = annotns['database'][vidname]['annotations']
                labels = []
                #import pdb;pdb.set_trace()
                for segment_info in annot:
                    interval = segment_info['segment']
                    sent = segment_info['sentence']
                    labels.append((interval,sent,duration))

                label_info[vidname] = labels
            else:
                print(f"label for {vidname} not present")
    return label_info

def regress_labels(raw_labels):
    regress_labels = {}
    for key in raw_labels:
        new_labels = []
        for item in raw_labels[key]:
            rng,sent,vidlen = item
            mid = sum(rng)/2
            duration = rng[-1]-rng[0]
            mid_pred = (1/vidlen)*mid # location of mid-point w.r.t video length
            duration_pred = (1/vidlen)*duration
            new_labels.append(([mid_pred,duration_pred],sent))
        regress_labels[key] = new_labels
    return regress_labels
            
            
    
    
    


In [3]:
from sklearn.base import BaseEstimator, TransformerMixin

class LabelEncoder2(BaseEstimator, TransformerMixin):
    def __init__(self,max_len=499):
        self.vidlens = []
        self.truebounds = []
        self.max_len = max_len

    def fit(self,raw_labels):
        l = []
        for key in raw_labels:
            vid_len = raw_labels[key][0][-1]
            sz = len(raw_labels[key])
            for i in range(sz):l.append(vid_len)
        self.vidlens = np.array(l)
        return self
        
    def transform(self,raw_labels):
        regress_labels = self._regress_labels(raw_labels)
        return regress_labels

    def decode(self,outputs):
        return np.round(outputs*self.max_len)


    def _regress_labels(self,raw_labels):
        regress_labels = {}
        bounds = []
        for key in raw_labels:
            new_labels = []
            for item in raw_labels[key]:
                #import pdb;pdb.set_trace()
                rng,sent,vidlen = item
                new_rng = [rng[0]/self.max_len,rng[-1]/self.max_len]
                bounds.append(rng)
                new_labels.append((new_rng,sent))
            regress_labels[key] = new_labels
        self.truebounds = np.array(bounds)
        return regress_labels


In [4]:

def get_labels(ids,annotns_file):

    label_info = {}
    with open(annotns_file) as json_file:
        annotns = json.load(json_file)
        #print(annotns.keys())
        for _,vidname in ids:
            #import pdb;pdb.set_trace()
            if vidname in annotns:
                #import pdb;pdb.set_trace()
                duration = annotns[vidname]['duration']
                annot = annotns[vidname]['annotations']
                labels = []
                #import pdb;pdb.set_trace()
                for segment_info in annot:
                    interval = segment_info['segment']
                    st_end = [interval[0],interval[-1]]
                    sent = segment_info['sentence']
                    labels.append((st_end,sent,duration))

                label_info[vidname] = labels
            else:
                print(f"label for {vidname} not present")
    return label_info

In [39]:
#dataset
# Dataset/loader
# This is newer version
class YoucookDset2(Dataset):
    def __init__(self,data_dir='/common/home/vk405/Projects/Crossmdl/Data/YouCookII/'\
        ,split='train',use_precomp_emb=True,seqlen=25,framecnt=500,id=0):
        self.id = id
        self.feat_locs = {}
        self.split = split
        self.data_dir = data_dir
        self.use_precomp_emb = use_precomp_emb
        self.text_emb = None
        self.seqlen = seqlen
        self.framecnt = framecnt
        if self.split != 'test':
            self.annotns_file = data_dir+'annotations/segment_youcookii_annotations_trainval.json'
        else:
            raise NotImplementedError(f"Split:{self.split},not yet correctly implemented")
        if self.use_precomp_emb:
            self.txt_emb = joblib.load(os.path.join(self.data_dir,'emb.joblib'))

        self.feat_locs,vids = get_features(self.data_dir,split=self.split)
        assert len(vids) == len(self.feat_locs),"features are wrong"
        #import pdb;pdb.set_trace()
        label_info = get_labels(vids,self.annotns_file)
        #self.labelencoder = LabelEncoder2()
        self.final_labels = label_info
        #self.labelencoder.fit_transform(label_info)
        
        #regress_labels(label_info)
        #(vid_id,seg_id)
        self.data = []
        #self.vid_len = []
        starting_pnt = np.arange(self.id,self.framecnt,self.seqlen)
        for key in self.final_labels:
            annot_len = len(self.final_labels[key])
            if key in self.feat_locs:
                file_loc = self.feat_locs[key]
                #for stpnt in starting_pnt:
                segments = list(zip(repeat(key,annot_len),repeat(file_loc,annot_len),\
                        range(annot_len)))
                for seg in segments:
                    for stpnt in starting_pnt:
                        if stpnt+self.seqlen<=self.framecnt:
                            datapnt = seg[:-1]+(stpnt,)+seg[-1:]
                            self.data.append(datapnt)
                    
                #self.data.extend(segments)
            else:
                print(f"video:{key} not found")
                
            
    def __len__(self):
        return len(self.data)

    def getclass_prob(self,lbl_rng,frame_rng):
        lbl_ids = set(np.arange(lbl_rng[0],lbl_rng[-1]+1))
        frame_ids = set(np.arange(frame_rng[0],frame_rng[-1]+1))
        inter = lbl_ids.intersection(frame_ids)
        if len(inter) == 0:
            return 0.0
        else:
            return len(inter)/len(lbl_ids.union(frame_ids))

    def __getitem__(self,idx):
        if self.use_precomp_emb:
            vidname,file_loc,stid,seg_ind = self.data[idx]
            #import pdb;pdb.set_trace()
            #self.txt_emb[vidname][seg_ind],
            txt_info = self.final_labels[vidname][seg_ind]
            label_value = self.getclass_prob(txt_info[0],(stid,stid+self.seqlen-1))
            return pd.read_csv(file_loc).values.astype(np.float32)[stid:stid+self.seqlen,:],(self.txt_emb[vidname][seg_ind]).astype(np.float32),\
                label_value
            #np.array(self.final_labels[vidname][seg_ind][0],dtype=np.float32)
        else:
            raise NotImplementedError("not yet correctly implemented")

        

           



        

In [40]:

data_dir = '/common/home/vk405/Projects/Crossmdl/Data/YouCookII/'



youcookdata = YoucookDset2(split='val')

In [47]:
#load dataset and dataloader once every epoch, makes stuff a bit slow but this is a simple approach.

In [48]:
youcookdata.data

[('sdB8qBlLS2E',
  '/common/users/vk405/feat_csv/val_frame_feat_csv/405/sdB8qBlLS2E/0001/resnet_34_feat_mscoco.csv',
  0,
  0),
 ('sdB8qBlLS2E',
  '/common/users/vk405/feat_csv/val_frame_feat_csv/405/sdB8qBlLS2E/0001/resnet_34_feat_mscoco.csv',
  25,
  0),
 ('sdB8qBlLS2E',
  '/common/users/vk405/feat_csv/val_frame_feat_csv/405/sdB8qBlLS2E/0001/resnet_34_feat_mscoco.csv',
  50,
  0),
 ('sdB8qBlLS2E',
  '/common/users/vk405/feat_csv/val_frame_feat_csv/405/sdB8qBlLS2E/0001/resnet_34_feat_mscoco.csv',
  75,
  0),
 ('sdB8qBlLS2E',
  '/common/users/vk405/feat_csv/val_frame_feat_csv/405/sdB8qBlLS2E/0001/resnet_34_feat_mscoco.csv',
  100,
  0),
 ('sdB8qBlLS2E',
  '/common/users/vk405/feat_csv/val_frame_feat_csv/405/sdB8qBlLS2E/0001/resnet_34_feat_mscoco.csv',
  125,
  0),
 ('sdB8qBlLS2E',
  '/common/users/vk405/feat_csv/val_frame_feat_csv/405/sdB8qBlLS2E/0001/resnet_34_feat_mscoco.csv',
  150,
  0),
 ('sdB8qBlLS2E',
  '/common/users/vk405/feat_csv/val_frame_feat_csv/405/sdB8qBlLS2E/0001/resnet

In [49]:
youcookdata.final_labels['sdB8qBlLS2E']

[([18, 29], 'add cardamom seeds to a pan of hot oil and shake', 358.72),
 ([32, 53], 'add chili flakes to the pan and shake', 358.72),
 ([58, 78],
  'add turmeric cumin and coriander powder to the pan and shake',
  358.72),
 ([79, 111], 'add garlic ginger paste and onion  to the pan and stir', 358.72),
 ([139, 184], 'add lamb to the pan and stir', 358.72),
 ([222, 251], 'add chili powder to the pan and mix', 358.72),
 ([278, 285], 'add chopped tomato to the pan', 358.72),
 ([377, 406],
  'add green chilis and garam masala powder to the pan and stir',
  358.72),
 ([455, 465], 'add coriander leaves to the pan', 358.72)]

In [51]:
youcookdata[0][-1]

0.23333333333333334

In [52]:
youcookdata[1][-1]

0.15625