# Annotate Homographies
Large scale homography annotation for extraction of next best views.

In [2]:
import os
import cv2
import copy
import torch
import torchvision
import torchvision.io as io
from torchvision.datasets.video_utils import VideoClips
from torch.utils.data import DataLoader
import sys

sys.path.append('../')

from utils import load_yaml, save_yaml, dictListToCompose, recursiveMethodCallFromDictList

servers = load_yaml('../configs/servers.yml')
server = servers['local']

databases = load_yaml('../configs/cholec80_transforms.yml')
database = databases['databases'][0]

In [13]:
# load transforms and convert them to torch.functional methods
key_dict = {
    'Crop': 'crop',
    'Resize': 'resize'
}



functional_databases = copy.deepcopy(databases)
for db_idx, db in enumerate(databases['databases']):
    functional_databases['databases'][db_idx]['transforms'] = [] # delete old transforms
    for transforms in db['transforms']:
        functional_transforms = []
        for transform in transforms:
            # 'Crop' -> 'crop', shape -> height, width, top_left_corner -> top, left
            # 'Resize' -> 'resize', dsize -> size
            functional_transform = {}
            for key, value in transform.items():
                if key == 'Crop':
                    functional_transform[key_dict[key]] = {
                        'height': value['shape'][0],
                        'width': value['shape'][1],
                        'top': value['top_left_corner'][0],
                        'left': value['top_left_corner'][1]
                    }
                elif key == 'Resize':
                    functional_transform[key_dict[key]] = {
                        'size': value['dsize']
                    }
                else:
                    raise ValueError('Key not known')

            functional_transforms.append(functional_transform)                
        functional_databases['databases'][db_idx]['transforms'].append(functional_transforms)

save_yaml('../configs/cholec80_transforms_functional.yml', functional_databases)

In [5]:
functional_databases = load_yaml('../configs/cholec80_transforms_functional.yml')
functional_database = functional_databases['databases'][0]

paths = [
    os.path.join(
        server['database']['location'], 
        database['prefix'], 
        database['videos']['prefix'], 
        x
    ) for x in functional_database['videos']['files'][:1]
]

paths = ['/media/martin/Samsung_T5/data/endoscopic_data/SurgVisDom/train_1/Porcine/Dissection/DS_0001.mp4']
# transforms = load_yaml('../configs/cholec80_transforms_functional.yml')

# video reader not compiled yet https://github.com/pytorch/vision/issues/1446
# video reading https://github.com/pytorch/vision/blob/ed5b2dc3a5e7411d8b40cc7e526e151983e99cf9/torchvision/datasets/video_utils.py#L45-L69
# dataset example check https://github.com/pytorch/vision/blob/ed5b2dc3a5e7411d8b40cc7e526e151983e99cf9/torchvision/datasets/kinetics.py#L50-L78

vc = VideoClips(paths, clip_length_in_frames=2, frames_between_clips=1)

N = 25
videos = []
# re-batch BxHxWxC -> Bx2CxHxW: i0, i1, i2, ..., iN -> [i0, i1], [i1, i2], ..., [iN-1, iN]
for i in range(N):
    video, audio, info, video_idx = vc.get_clip(i)
    video = video.permute(0, 3, 1, 2)
    transforms = functional_database['transforms'][video_idx]
    video = recursiveMethodCallFromDictList(video, transforms, torchvision.transforms.functional)
    video = torch.cat((video[0], video[1])).unsqueeze(0)
    videos.append(video)

videos = torch.cat(videos)
print(video.shape)

# forward through model



    


# print(ds.metadata)
# clip = ds.get_clip(0)

# for idx, file in enumerate(database['videos']['files']):
#     path = os.path.join(server['database']['location'], database['prefix'], database['videos']['prefix'], file)



#     #  pts, video_fps = io.read_video_timestamps(path)
#     # print(pts, video_fps) # 3600
#     pts = 3600
#     N_start = 1500001
#     N_end = 1500002  # predictive horizon
#     vframe, aframe, info = io.read_video(path, N_start*pts, (N_end-1)*pts, pts_unit='str')

#     print(info)

#     transforms = database['transforms'][idx]
#     transforms = dict_list_to_compose(transforms)

#     print(vframe.shape)

#     for idx, frame in enumerate(vframe):
#         frame = frame.numpy()
#         frame = transforms(frame)
#         cv2.imshow('frame', frame)
#         cv2.waitKey()

# cv2.destroyAllWindows()


100%|██████████| 1/1 [00:01<00:00,  1.57s/it]
torch.Size([1, 6, 640, 480])
