In [2]:
# pad your sequences

from torch.nn.utils.rnn import pad_sequence
from pytorch_lightning.callbacks import LearningRateMonitor
import torch
import numpy as np
from PIL import Image
import os
from collections import defaultdict
import json
import joblib
from torch.utils.data import Dataset,DataLoader,random_split
from itertools import repeat
import pandas as pd
import math
import torch.nn as nn
import torch.nn.functional as F
from numpy import linalg as LA
from argparse import Namespace
from numpy import genfromtxt
import os
from torch.optim.lr_scheduler import ReduceLROnPlateau

import pytorch_lightning as pl
import wandb
import logging
from pytorch_lightning.loggers import CSVLogger, TensorBoardLogger, WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

import clip


import wandb
import logging
from pytorch_lightning.loggers import CSVLogger, TensorBoardLogger, WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
import os

logger = logging.getLogger(__name__)
wandb_logger = lambda dir, version: WandbLogger(
    name="wandb", save_dir=dir, version=version
)
csvlogger = lambda dir, version: CSVLogger(dir, name="csvlogs", version=version)
tblogger = lambda dir, version: TensorBoardLogger(dir, name="tblogs", version=version)

def get_loggers(dir,version,lis=["csv"]):
    lgrs = []
    if "wandb" in lis:
        lgrs.append(wandb_logger(dir, version))
    if "csv" in lis:
        lgrs.append(csvlogger(dir, version))
    if "tb" in lis:
        lgrs.append(tblogger(dir, version))
    return lgrs







def get_vid_ids(split='training',\
    annotns_file='/common/home/vk405/Projects/Crossmdl/Data/YouCookII/annotations/youcookii_annotations_trainval.json'):
    # Returns vid_ids corresponding to the split: 'training'/'validation'
    
    vid_lis = []
    with open(annotns_file) as json_file:
        annotns = json.load(json_file)['database']
        for key in annotns:
            if annotns[key]['subset'] == split:
                vid_lis.append(key)
    return vid_lis


def get_split_files(split='training',\
    annotns_file='/common/home/vk405/Projects/Crossmdl/Data/YouCookII/annotations/youcookii_annotations_trainval.json',\
        data_dir = '/common/users/vk405/Youcook/'):
    total_ids = get_vid_ids(split,annotns_file)
    downloaded_ids = set([dir for dir in os.listdir(data_dir) if 'joblib' not in dir])
    vid_locs = []
    sents = {}
    segs = {}
    incomplete = []
    for id in total_ids:
        if id in downloaded_ids:
            vid_loc = data_dir+id + '/'
            if len(os.listdir(vid_loc))>=495:
                vid_locs.append(vid_loc)
                seg = joblib.load(data_dir+f'{id}global_segs.joblib')
                sent = joblib.load(data_dir+f'{id}global_sents.joblib')
                try:
                    sents[id] = sent[id]
                    segs[id] = seg[id]
                except:
                    print(f"{id} is no corresponding global sent/seg")
            else:
                #print(f"{id} has only imgs {len(os.listdir(vid_loc))}")
                incomplete.append(id)
    return vid_locs,segs,sents,incomplete 



  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import pathlib

FEAT_DIR = pathlib.Path('/common/users/vk405/CLIP_FEAT')
RAWFRAME_DIR = pathlib.Path('/common/users/vk405/Youcook/')

class Dset(Dataset):
    def __init__(self,data_dir,feat_dir,split):
        self.data_dir = data_dir
        self.feat_dir = feat_dir
        self.split = split
        self.vid_ids,self.sents = self.get_ids()
        self.labels = self.getlabels()
        self.sanitycheck()
        self.data = self.getdata()
        


    def sanitycheck(self):
        mis = []
        #import pdb;pdb.set_trace()
        for key in self.labels.keys():
            txt_loc = self.feat_dir/self.split/f'txt_{key}.joblib'
            txt = joblib.load(txt_loc)
            if len(self.labels[key]) == len(self.sents[key]) == len(txt):
                pass
            else:
                print(key)
                mis.append(key)
        print(f"segs are not matching:{mis}")
        for key in mis:
            self.vid_ids.remove(key)
        self.sents = None

        
        

    def __len__(self):
        return len(self.data)

    def __getitem__(self,idx):
        return self.load(self.data[idx])

    def getdata(self):
        data = []
        for id in self.vid_ids:
            segs = self.labels[id]
            #import pdb;pdb.set_trace()
            for i in range(len(segs)):
                data.append((id,i))
        return data

    def load(self,data):
        vid_id,ind = data
        vid_frames_loc = self.feat_dir/self.split/f'vid_{vid_id}.joblib'
        txt_loc = self.feat_dir/self.split/f'txt_{vid_id}.joblib'
        st,end = self.labels[vid_id][ind]
        vid = joblib.load(vid_frames_loc)
        try:
            txt = joblib.load(txt_loc)[ind]
        except:
            import pdb;pdb.set_trace()
        #normalize data
        #import pdb;pdb.set_trace()
        vid = vid/(LA.norm(vid,axis=-1)).reshape(500,1)
        txt = (txt/LA.norm(txt))
        out = np.squeeze(vid@txt.reshape(512,1))
        #regression outputs
        return out,st/499,end/499
         

    def getlabels(self):
        label_dict = {}
        for vidid in self.vid_ids:
            vidloc = self.data_dir/vidid
            segs = self.extract_seg(vidloc)
            label_dict[vidid] = segs
        return label_dict
    
    def extract_seg(self,vid_loc):
        imgs = sorted(os.listdir(vid_loc),key=lambda x: int(x.split('_')[0]))
        segs = defaultdict(list)
        for img in imgs:
            ind,rem = int(img.split('_')[0]),img.split('_')[-1]
            
            if 'n.' not in rem:
                #print(ind,rem)
                seg_id = int(rem.split('.')[0])
                segs[seg_id].append(ind)
                #print(seg_id,ind)
        final_segs = []
        #import pdb;pdb.set_trace()
        segids = sorted(segs.keys())
        for segid in segids:
            final_segs.append((min(segs[segid]),max(segs[segid])))
        return final_segs
        
    def get_ids(self):
        annotns_file='/common/home/vk405/Projects/Crossmdl/Data/YouCookII/annotations/youcookii_annotations_trainval.json'
        data_dir = '/common/users/vk405/Youcook/'
        vid_locs,_,sents,_ = get_split_files('training',annotns_file,data_dir)
        ids = [ele.split('/')[-2] for ele in vid_locs]
        files = set(os.listdir(self.feat_dir/self.split))
        finids = []
        missing = []
        for id in ids:
            if f'vid_{id}.joblib' in files:
                finids.append(id)
            else:missing.append(id)
        print(f"missing:{missing}")
        return finids,sents

        

In [16]:
# annotns_file='/common/home/vk405/Projects/Crossmdl/Data/YouCookII/annotations/youcookii_annotations_trainval.json'
# data_dir = '/common/users/vk405/Youcook/'
# vid_locs,_,sents,_ = get_split_files('training',annotns_file,data_dir)
# mis = []
# for key in d.labels:
#     if len(d.labels[key]) == len(sents[key]):
#         pass
#     else:
#         print(key)
#         mis.append(key)


In [7]:
d  = Dset(RAWFRAME_DIR,FEAT_DIR,'training')

missing:['ukfCQQpZ0k4', 'NK2xHVWojgY', 'mixdagZ-fwI']
cwsDQ7M5OTI
uf65nfh6X2U
segs are not matching:['cwsDQ7M5OTI', 'uf65nfh6X2U']


In [9]:
#d.vid_ids

In [20]:
out,st,end = d[0]

In [30]:
#
trn_sz = int(len(d)*0.8)
val_sz = len(d)-trn_sz
trndset,valdset = random_split(d,[trn_sz,val_sz])

In [7]:
class BaselineModel(pl.LightningModule):
    
    def __init__(self,hparams):
        super().__init__()
        self.save_hyperparameters(hparams)
        self.shared = nn.Sequential(nn.Linear(500,250),nn.ReLU())
        self.start  = nn.Linear(250,1)
        self.end = nn.Linear(250,1)

    def forward(self,x):
        #fixing for now
        #torch.squeeze(self.start(self.shared(input)))
        return torch.squeeze(self.start(self.shared(x))),\
            torch.squeeze(self.end(self.shared(x)))

    def training_step(self,batch,batch_idx):

        input,st,end = batch
        loss_st = nn.MSELoss()
        #loss_end = nn.CrossEntropyLoss()
        #import pdb;pdb.set_trace()
        st_l = loss_st(torch.squeeze(self.start(self.shared(input))).float(),st.float())
        end_l = loss_st(torch.squeeze(self.end(self.shared(input))).float(),end.float())
        loss = st_l + end_l
        self.log("train_loss",loss,on_step=True)
        return loss

    def validation_step(self,batch,batch_idx):

        input,st,end = batch
        loss_st = nn.MSELoss()
        #loss_end = nn.CrossEntropyLoss()
        #import pdb;pdb.set_trace()
        st_l = loss_st(torch.squeeze(self.start(self.shared(input))).float(),st.float())
        end_l = loss_st(torch.squeeze(self.end(self.shared(input))).float(),end.float())
        loss = st_l + end_l
        self.log("val_loss",loss,on_step=False)
        return loss
        
    def configure_optimizers(self):
        
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr)
        return optimizer

        

In [32]:
# Note that Crossentropy loss is not optimal as the implicit ordering is not 
#considered here.
d = Dset(RAWFRAME_DIR,FEAT_DIR,'training')
trn_sz = int(len(d)*0.8)
val_sz = len(d)-trn_sz
trndset,valdset = random_split(d,[trn_sz,val_sz])
trnl = DataLoader(trndset,batch_size=64,shuffle=True)
vall = DataLoader(valdset,batch_size=64)

missing:['ukfCQQpZ0k4', 'NK2xHVWojgY', 'mixdagZ-fwI']
cwsDQ7M5OTI
uf65nfh6X2U
segs are not matching:['cwsDQ7M5OTI', 'uf65nfh6X2U']


In [50]:
#batch = next(iter(vall))
#for data in d:
    #pass

False

In [40]:
hparams = Namespace(
    lr = 1e-4
)

model = BaselineModel(hparams)
model

BaselineModel(
  (shared): Sequential(
    (0): Linear(in_features=500, out_features=250, bias=True)
    (1): ReLU()
  )
  (start): Linear(in_features=250, out_features=1, bias=True)
  (end): Linear(in_features=250, out_features=1, bias=True)
)

In [34]:
first_layer = []
for name,param in model.named_parameters():
    print(name)
    print(first_layer.append(param.cpu().detach().numpy()))
    break



shared.0.weight
None


In [17]:
first_layer[0].shape

(250, 500)

In [8]:
def run(cfg):
    #pl.seed_everything(cfg.seed)
    dir = cfg.artifacts_loc
    version = str(cfg.version)
    logger_list = get_loggers(dir, version,cfg.loggers)
    cbs = []
    if "early_stop" in cfg.cbs:
        #? does'nt really work atm
        params = cfg.early_stop
        earlystopcb = EarlyStopping(**params, min_delta=0.00, verbose=False)
        cbs.append(earlystopcb)
    if "checkpoint" in cfg.cbs:
        store_path = dir + "ckpts/" + str(cfg.version) + "/"
        isExist = os.path.exists(store_path)
        if not isExist:
            os.makedirs(store_path)
        fname = "{epoch}-{train_loss:.2f}"
        params = cfg.checkpoint
        checkptcb = ModelCheckpoint(**params, dirpath=store_path, filename=fname)
        cbs.append(checkptcb)

    #wandb.init(project="videoretrieval", config=cfg)
    if cfg.mode == 'train':
        d = Dset(cfg.RAWFRAME_DIR,cfg.FEAT_DIR,cfg.split)
        trn_sz = int(len(d)*cfg.trn_split)
        val_sz = len(d)-trn_sz
        trndset,valdset = random_split(d,[trn_sz,val_sz])
        trnl = DataLoader(trndset,batch_size=64,shuffle=True)
        vall = DataLoader(valdset,batch_size=64)
        hparams = cfg    
        net = BaselineModel(hparams)
        trainer = pl.Trainer(
            logger=logger_list,callbacks=cbs,deterministic=True, **cfg.trainer
        )
        trainer.fit(net, trnl,vall)
        return trainer
        #trainer.tune(net,train_loader)
            
    else:
        pass
    

In [9]:
from argparse import Namespace
FEAT_DIR = pathlib.Path('/common/users/vk405/CLIP_FEAT')
RAWFRAME_DIR = pathlib.Path('/common/users/vk405/Youcook/')

cfg = Namespace(
    version = 'clip',
    id = 0,
    FEAT_DIR = FEAT_DIR,
    RAWFRAME_DIR = RAWFRAME_DIR,
    artifacts_loc = "/common/home/vk405/Projects/Crossmdl/nbs/",
    data_dir = "/common/home/vk405/Projects/Crossmdl/Data/YouCookII/",
    trn_split = 0.8,
    mode = 'train',
    split = 'training',
    loggers = ["csv"],
    seed = 0,
    cbs = ["checkpoint","early_stop"],
    trainer = {'log_every_n_steps': 1,
    'max_epochs': 30},
    checkpoint = {"every_n_epochs": 1,
    "monitor": "val_loss"},
    early_stop = {"monitor":"val_loss","mode":"min","patience":5},
    lr = 1e-4

)

In [1]:
#run(cfg)

In [10]:
from pathlib import Path
def get_model(cfg):
    hparams = cfg    
    PATH = Path(cfg.artifacts_loc)/'ckpts'/cfg.version
    ckpt = os.listdir(PATH)[-1]
    net = BaselineModel(hparams)
    print(f"loading ckpt:{ckpt}")
    new_model = net.load_from_checkpoint(checkpoint_path=str(PATH/ckpt))
    return new_model

In [11]:
model = get_model(cfg)

loading ckpt:epoch=26-train_loss=0.05.ckpt


In [12]:
d = Dset(cfg.RAWFRAME_DIR,cfg.FEAT_DIR,cfg.split)
trn_sz = int(len(d)*cfg.trn_split)
val_sz = len(d)-trn_sz
trndset,valdset = random_split(d,[trn_sz,val_sz])
trnl = DataLoader(trndset,batch_size=64,shuffle=False)
vall = DataLoader(valdset,batch_size=64)

missing:['ukfCQQpZ0k4', 'NK2xHVWojgY', 'mixdagZ-fwI']
cwsDQ7M5OTI
uf65nfh6X2U
segs are not matching:['cwsDQ7M5OTI', 'uf65nfh6X2U']


In [13]:
input,st,end = next(iter(trnl))

In [14]:
p_st,p_end = model(input)

In [15]:
p_st*499

tensor([191.7125, 258.3401, 180.0220, 186.2513, 212.2451, 251.2304, 266.2261,
        143.1521, 208.0229, 156.0505, 236.7549, 275.8039, 203.5645, 285.6291,
        210.5240, 278.6678, 198.1594, 248.0790, 226.1214, 141.7011, 197.1049,
        297.8026, 255.9364, 239.2966, 127.1581, 282.4768, 224.7016, 268.1040,
        187.8851, 221.5266, 213.4053, 282.1989, 331.4463, 279.1958, 290.1028,
        229.0420, 190.0243, 235.0196, 207.5753, 198.9007, 315.5528, 199.9480,
        213.5080, 209.4245, 269.1695, 265.9732, 344.6645, 247.7789, 227.4361,
        288.5319, 191.1913, 172.3463, 229.7609, 221.6632, 301.6812, 276.2428,
        298.5623, 145.8917, 222.5144, 250.6949, 152.6636, 252.0577, 260.4008,
        176.0404], grad_fn=<MulBackward0>)

In [16]:
st*499

tensor([153.0000, 222.0000, 175.0000,  96.0000, 320.0000, 231.0000, 208.0000,
        190.0000, 341.0000,  92.0000,  99.0000, 292.0000,  89.0000, 266.0000,
        232.0000, 429.0000, 135.0000, 223.0000, 262.0000, 190.0000,  99.0000,
        382.0000, 359.0000,  18.0000, 101.0000, 277.0000, 140.0000, 357.0000,
        263.0000, 130.0000, 106.0000, 382.0000, 248.0000, 285.0000, 351.0000,
        306.0000, 277.0000, 134.0000, 294.0000, 372.0000, 434.0000, 305.0000,
         84.0000, 361.0000, 346.0000, 273.0000, 329.0000, 317.0000, 414.0000,
         30.0000, 127.0000,  59.0000, 357.0000, 231.0000, 339.0000, 425.0000,
        306.0000, 160.0000, 358.0000, 294.0000, 261.0000, 158.0000, 307.0000,
        278.0000], dtype=torch.float64)

In [17]:
import pandas as pd
logs = pd.read_csv('/common/home/vk405/Projects/Crossmdl/nbs/csvlogs/clip/metrics.csv')

In [25]:
logs['val_loss'].dropna().tail(3)

3131    0.082000
3247    0.083049
3363    0.082151
Name: val_loss, dtype: float64

In [None]:
#inference

class Inference():
    def __init__(self,data_dir,feat_dir,split):
        self.data_dir = data_dir
        self.feat_dir = feat_dir
        self.split = split
        
        
    def load(self,data):
        vid_id,ind = data
        vid_frames_loc = self.feat_dir/self.split/f'vid_{vid_id}.joblib'
        txt_loc = self.feat_dir/self.split/f'txt_{vid_id}.joblib'
        st,end = self.labels[vid_id][ind]
        vid = joblib.load(vid_frames_loc)
        try:
            txt = joblib.load(txt_loc)[ind]
        except:
            import pdb;pdb.set_trace()
        #normalize data
        #import pdb;pdb.set_trace()
        vid = vid/(LA.norm(vid,axis=-1)).reshape(500,1)
        txt = (txt/LA.norm(txt))
        out = np.squeeze(vid@txt.reshape(512,1))
        #regression outputs
        return out,st,end

In [41]:
csvlogger = CSVLogger('/common/home/vk405/Projects/Crossmdl/nbs')
trainer = pl.Trainer(
    deterministic=True,log_every_n_steps =  1,
    max_epochs= 10
        )
#trainer.fit(model, trnl,vall)

GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_warn(

  | Name   | Type       | Params
--------------------------------------
0 | shared | Sequential | 125 K 
1 | start  | Linear     | 251   
2 | end    | Linear     | 251   
--------------------------------------
125 K     Trainable params
0         Non-trainable params
125 K     Total params
0.503     Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


Validation sanity check:  50%|█████     | 1/2 [00:00<00:00,  1.22it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


                                                                      

  rank_zero_warn(


Epoch 0:   0%|          | 0/144 [00:00<?, ?it/s] > [0;32m<ipython-input-39-d214a13063c5>[0m(20)[0;36mtraining_step[0;34m()[0m
[0;32m     18 [0;31m        [0;31m#loss_end = nn.CrossEntropyLoss()[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     19 [0;31m        [0;32mimport[0m [0mpdb[0m[0;34m;[0m[0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 20 [0;31m        [0mst_l[0m [0;34m=[0m [0mloss_st[0m[0;34m([0m[0mtorch[0m[0;34m.[0m[0msqueeze[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mstart[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mshared[0m[0;34m([0m[0minput[0m[0;34m)[0m[0;34m)[0m[0;34m)[0m[0;34m.[0m[0mfloat[0m[0;34m([0m[0;34m)[0m[0;34m,[0m[0mst[0m[0;34m.[0m[0mfloat[0m[0;34m([0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     21 [0;31m        [0;31m#end_l = loss_end(self.end(input),end)[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     22 [0;3

BdbQuit: 

In [38]:
#inference
class Inference():
    def __init__(self,data_dir,feat_dir,split):
        self.data_dir = data_dir
        self.feat_dir = feat_dir
        self.split = split
        
    def load(self,data):
        vid_id,ind = data
        vid_frames_loc = self.feat_dir/self.split/f'vid_{vid_id}.joblib'
        txt_loc = self.feat_dir/self.split/f'txt_{vid_id}.joblib'
        st,end = self.labels[vid_id][ind]
        vid = joblib.load(vid_frames_loc)
        try:
            txt = joblib.load(txt_loc)[ind]
        except:
            import pdb;pdb.set_trace()
        #normalize data
        #import pdb;pdb.set_trace()
        vid = vid/(LA.norm(vid,axis=-1)).reshape(500,1)
        txt = (txt/LA.norm(txt))
        out = np.squeeze(vid@txt.reshape(512,1))
        #regression outputs
        return out,st,end

In [None]:
#clip.load(#)

In [90]:
data_dir = '/common/users/vk405/Youcook/'
for vid in d.vid_ids:
    vidloc = data_dir+vid
    imgs = os.listdir(vidloc)
    if '.png' in imgs[0]:
        pass
    else:
        print(vidloc)

In [5]:
store_dir = '/common/users/vk405/CLIP_FEAT/'
split = 'training'
all_files = set(os.listdir(store_dir+split))
missing = []
for vid_id in d.vid_ids:
    a = f'vid_{vid_id}.joblib'
    b = f'txt_{vid_id}.joblib'
    if (a in all_files) and (b in all_files):
        pass
    else:
        print(vid_id)
        missing.append(vid_id)

In [6]:
missing

[]

In [11]:
(data_dir+missing[0]+'/').split('/')

['', 'common', 'users', 'vk405', 'Youcook', 'ukfCQQpZ0k4', '']

In [7]:
annotns_file='/common/home/vk405/Projects/Crossmdl/Data/YouCookII/annotations/youcookii_annotations_trainval.json'
data_dir = '/common/users/vk405/Youcook/'
model_name = 'ViT-B/32'
missing = ['ukfCQQpZ0k4', 'NK2xHVWojgY', 'mixdagZ-fwI']
#useful_vids = missing
from tqdm import tqdm
model,preprocess = clip.load(model_name)
model.eval().cuda()
error_cnt = {}

RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 15.74 GiB total capacity; 75.01 MiB already allocated; 15.69 MiB free; 82.00 MiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [5]:

vid_locs,_,sents,_ = get_split_files(split,annotns_file,data_dir)
useful_vids  = [data_dir+ele+'/' for ele in missing]
for vidloc in tqdm(useful_vids):
    vid_id = vidloc.split('/')[-2]
    save_loc_vid = store_dir+split+'/'+f'vid_{vid_id}.joblib'
    save_loc_text = store_dir+split+'/'+f'txt_{vid_id}.joblib'
    print(save_loc_text)
    print(os.path.exists(save_loc_vid))
    #import pdb;pdb.set_trace()
    if not os.path.exists(save_loc_vid):
        text_tokens = clip.tokenize(sents[vid_id]).cuda()

        # with torch.no_grad():
        #     text_features = model.encode_text(text_tokens).float()
        #     joblib.dump(text_features.detach().cpu().numpy(),save_loc_text)
        files = sorted(os.listdir(vidloc),key=lambda x:int(x.split('_')[0]))
        if len(files) == 500:
            imgs  = []
            cnt = 0
            for file in files:
                try:
                    im = Image.open(vidloc+file)
                    imgs.append(preprocess(im))
                except:
                    # hoping here it wont be the first one
                    cnt += 1
                    imgs.append(imgs[-1])
                    #import pdb;pdb.set_trace()
            error_cnt[vid_id]  = cnt
                
            image_input = torch.tensor(np.stack(imgs)).cuda()
            #import pdb;pdb.set_trace()
            im_emb = []
            with torch.no_grad():
                # else can throw memory error
                text_features = model.encode_text(text_tokens).float()
                joblib.dump(text_features.detach().cpu().numpy(),save_loc_text)
                out1 = model.encode_image(image_input[:250]).float()
                out2 = model.encode_image(image_input[250:]).float()
                im_emb = torch.concat([out1,out2],dim=0)
                joblib.dump(im_emb.detach().cpu().numpy(),save_loc_vid)
                print(error_cnt)

RuntimeError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 15.74 GiB total capacity; 203.54 MiB already allocated; 4.69 MiB free; 224.00 MiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [72]:
len([ele for ele in os.listdir(RAWFRAME_DIR) if 'joblib' not in ele])

1616

In [73]:
vids = set([ele for ele in os.listdir(RAWFRAME_DIR) if 'joblib' not in ele])
missing = []

for ele in d.vid_ids:
    if ele in vids:
        pass
    else:
        missing.append(ele)


In [77]:
missing[0]



'PJmzCZ3uw'

In [78]:
for ele in vids:
    if missing[0] in ele:
        print(ele)

iPJmzCZ3uwo


In [81]:
annotns_file='/common/home/vk405/Projects/Crossmdl/Data/YouCookII/annotations/youcookii_annotations_trainval.json'
data_dir = '/common/users/vk405/Youcook/'
vid_locs,_,sents,_ = get_split_files('training',annotns_file,data_dir)

In [84]:
vid_locs[0].split('/')[-2]

'GLd3aX16zBg'

In [61]:
vidloc.split('/')[-2]
d = Dset(RAWFRAME_DIR,FEAT_DIR,'training')

FileNotFoundError: [Errno 2] No such file or directory: '/common/users/vk405/Youcook/PJmzCZ3uw'

In [59]:
d.vid_ids

['E46hfYyQzD4',
 'VwBJ230pjSk',
 '9zqwA2munL0',
 'MIXkGU4TJp0',
 'PJmzCZ3uw',
 '2ICT6R4XpG8',
 'CElmXCveb48',
 '9Rrfeohr3Fk',
 'ZsAfIadkwj8',
 '8PFyEtZgSUs',
 'C67CT1mdkzI',
 'rAvuiOAC-V8',
 'KpzFf81iax8',
 'Z1ajYx0UIx',
 'v7xUF6_YFcc',
 '1U5GzTal-2Y',
 'aotPPzd8T_c',
 'L3x9HDsfpBY',
 'sLFCqYYhrZM',
 'R-_LEZ41Pt4',
 'UHmKlQ2OkTM',
 'gswKIbddBHw',
 'DOpNRTefVjE',
 'zwbCD3CUFIs',
 'Y4SijHJ02Zg',
 '-lMphXmWvbk',
 'EP2OBrPPWtU',
 'ueGKsfUW-PM',
 'VswrGW9b3ck',
 'Moh7iYf2rG',
 'BFz-pqB2Opw',
 '1LxRUKMCsrM',
 '91Fz5ZBgeL4',
 '-ORd4EBliqw',
 'OL_bDVVxmwA',
 'ORHuKU5vxRg',
 'DXYCXcuifM8',
 'TsrTU3CJn2c',
 'rrJbjpCOXs',
 'Vw0jI7kJtY',
 'QS0I7mLQIgc',
 'p2wKeNmzIJ0',
 'IJXdHAGqxhA',
 'pS5T4dCCclQ',
 'eyfZZWZwDOY',
 'hXkepSgN_z',
 'Hu1-l8xvJjU',
 'frCFxOt9390',
 'fIhIyOWIIz0',
 'JlXYqpEWUuA',
 'zudeWqL9ALU',
 'd_-261CDy18',
 '7PATXwoKFeI',
 'khmRPBMz6d',
 'Kbrkj7wZ4UA',
 'qaDMnLE6NBU',
 'MxcI4wQLvK4',
 'IvDEV7eROm4',
 'T3v9c2m0ni0',
 'KmqfNQG1BXk',
 'P0QvEM1pyMU',
 'nB-z3lmxFXM',
 'EN1Qf9vcnN0',


In [55]:
txt = [ele for ele in os.listdir(FEAT_DIR/'training') if 'txt' in ele]


'E46hfYyQzD4.joblib'

In [56]:
txt[0].strip('txt_').strip('.joblib')

'E46hfYyQzD4'

In [27]:
os.listdir(RAWFRAME_DIR/'SKRTpHStvT8')

['83_1.png',
 '175_3.png',
 '371_n.png',
 '455_n.png',
 '272_n.png',
 '102_2.png',
 '139_n.png',
 '154_3.png',
 '226_5.png',
 '317_7.png',
 '123_2.png',
 '474_n.png',
 '489_n.png',
 '188_4.png',
 '21_n.png',
 '253_n.png',
 '42_n.png',
 '360_8.png',
 '478_n.png',
 '485_n.png',
 '292_6.png',
 '184_4.png',
 '158_3.png',
 '459_n.png',
 '211_n.png',
 '63_n.png',
 '341_8.png',
 '436_n.png',
 '135_n.png',
 '179_3.png',
 '380_n.png',
 '230_5.png',
 '301_7.png',
 '58_n.png',
 '245_n.png',
 '37_n.png',
 '429_9.png',
 '288_6.png',
 '275_6.png',
 '462_n.png',
 '95_1.png',
 '367_n.png',
 '163_3.png',
 '264_n.png',
 '16_n.png',
 '334_8.png',
 '443_n.png',
 '140_n.png',
 '320_7.png',
 '79_n.png',
 '114_2.png',
 '268_n.png',
 '295_n.png',
 '338_8.png',
 '357_8.png',
 '75_n.png',
 '207_n.png',
 '118_2.png',
 '0_n.png',
 '396_n.png',
 '401_n.png',
 '54_n.png',
 '249_n.png',
 '425_9.png',
 '284_6.png',
 '192_4.png',
 '279_6.png',
 '493_n.png',
 '273_n.png',
 '323_8.png',
 '454_n.png',
 '138_n.png',
 '103

In [3]:
# Example of target with class indices
loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
output = loss(input, target)
output.backward()


In [5]:
# import clip

# m,p = clip.load('ViT-B/32')

RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 15.74 GiB total capacity; 75.01 MiB already allocated; 15.69 MiB free; 82.00 MiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
# Example of target with class probabilities
input = torch.randn(3, 5, requires_grad=True)
target = torch.randn(3, 5).softmax(dim=1)
output = loss(input, target)
output.backward()