# Hand Labelled Homography
Generate a pipeline for quantitative analysis of homography estimation. Do
 - Randomly sample sequences from list of videos
 - Safe sequences into folder
 - Annotate some sequences
 - Create evaluation pipeline, precision, drift


In [1]:
import sys
import pandas as pd
from dotmap import DotMap

sys.path.append('../')

from utils.io import load_yaml

server = 'local'
servers = load_yaml('../config/servers.yml')
server = DotMap(servers[server])

meta_df = pd.read_pickle('../config/cholec80_transforms.pkl')
meta_df

Unnamed: 0,database,train,file,pre_transforms,aug_transforms,auxiliary
0,cholec80,True,"{'name': 'video01.mp4', 'path': 'videos'}","[{'module': 'utils.transforms', 'type': 'Crop'...","[{'module': 'torchvision.transforms', 'type': ...",{}
1,cholec80,True,"{'name': 'video02.mp4', 'path': 'videos'}","[{'module': 'utils.transforms', 'type': 'Crop'...","[{'module': 'torchvision.transforms', 'type': ...",{}
2,cholec80,True,"{'name': 'video03.mp4', 'path': 'videos'}","[{'module': 'utils.transforms', 'type': 'Crop'...","[{'module': 'torchvision.transforms', 'type': ...",{}
3,cholec80,True,"{'name': 'video04.mp4', 'path': 'videos'}","[{'module': 'utils.transforms', 'type': 'Crop'...","[{'module': 'torchvision.transforms', 'type': ...",{}
4,cholec80,True,"{'name': 'video05.mp4', 'path': 'videos'}","[{'module': 'utils.transforms', 'type': 'Crop'...","[{'module': 'torchvision.transforms', 'type': ...",{}
...,...,...,...,...,...,...
70,cholec80,True,"{'name': 'video17.mp4', 'path': 'videos'}","[{'module': 'utils.transforms', 'type': 'Crop'...","[{'module': 'torchvision.transforms', 'type': ...",{}
71,cholec80,True,"{'name': 'video18.mp4', 'path': 'videos'}","[{'module': 'utils.transforms', 'type': 'Crop'...","[{'module': 'torchvision.transforms', 'type': ...",{}
72,cholec80,True,"{'name': 'video19.mp4', 'path': 'videos'}","[{'module': 'utils.transforms', 'type': 'Crop'...","[{'module': 'torchvision.transforms', 'type': ...",{}
73,cholec80,True,"{'name': 'video20.mp4', 'path': 'videos'}","[{'module': 'utils.transforms', 'type': 'Crop'...","[{'module': 'torchvision.transforms', 'type': ...",{}


## Randomly Sample Image Sequences

In [32]:
import cv2
import numpy as np
from kornia import tensor_to_image

from utils.transforms import anyDictListToCompose
from utils.sampling import RandomSequences
from utils.io import generate_path

debug = False

max_seq = 5
paths = meta_df.apply(lambda x: os.path.join(server.database.location, x.database, x.file['path'], x.file['name']), axis=1).tolist()
seq_len = 100
strides = [1]

# append to tensor transform, as meta_df is supposed to operate on tensors
to_tensor = {'module': 'torchvision.transforms', 'type': 'ToTensor', 'kwargs': {}}
transforms = meta_df.apply(lambda x: anyDictListToCompose([to_tensor] + x.pre_transforms), axis=1).tolist()

random_sequences = RandomSequences(
    max_seq=max_seq,
    paths=paths,
    seq_len=seq_len,
    transforms=transforms,
    verbose=True
)

out_prefix = 'out/homography_labelling'

for seq, vid_idx, frame_idx in random_sequences:
    print('vid_idx: {}, frame_idx: {}'.format(vid_idx, frame_idx))
    for idx, frame in enumerate(seq):
        frame = (tensor_to_image(frame)*255).astype(np.uint8)
        if debug:
            cv2.imshow('random_frame', frame)  # show images
            cv2.waitKey()
        else:
            vid_path = os.path.join(out_prefix, 'vid_{}_frame_{}'.format(vid_idx. frame_idx))
            generate_path(vid_path)
            cv2.imwrite(os.path.join(vid_path, 'frame_{}.png'.format(frame_idx + idx*strides[0])), frame)
cv2.destroyAllWindows()

vid_idx: 14, frame_idx: 48924
vid_idx: 9, frame_idx: 43711
vid_idx: 0, frame_idx: 24431
vid_idx: 21, frame_idx: 1726
vid_idx: 18, frame_idx: 20768


### Dataset on Sampled Frames

In [33]:
import os

from utils.io import recursive_scan2df

prefix = 'out/homography_labelling'

# create a simplified dataframe
postfix = '.png'
df = recursive_scan2df(prefix, postfix)
df['vid'] = df.folder.apply(lambda x: int(x.split('_')[-1]))
df['frame'] = df.file.apply(lambda x: int(x.split('_')[-1].replace(postfix, '')))
df = df.sort_values(['vid', 'frame']).reset_index(drop=True)

out_path = 'light_log'
df.to_pickle(os.path.join(prefix, out_path + '.pkl'))

df

Unnamed: 0,folder,file,vid,frame
0,vid_0,frame_24431.png,0,24431
1,vid_0,frame_24432.png,0,24432
2,vid_0,frame_24433.png,0,24433
3,vid_0,frame_24434.png,0,24434
4,vid_0,frame_24435.png,0,24435
...,...,...,...,...
495,vid_21,frame_1821.png,21,1821
496,vid_21,frame_1822.png,21,1822
497,vid_21,frame_1823.png,21,1823
498,vid_21,frame_1824.png,21,1824


## Estimate Homographies on Sequences

### Forward-Backward Consistency

In [41]:
import cv2
import torch
import numpy as np
import pandas as pd
from kornia import tensor_to_image, warp_perspective
from torch.utils.data import DataLoader
import sys

sys.path.append('../')

from utils.viz import yt_alpha_blend
from utils.transforms import anyDictListToCompose
from utils.processing import forward_backward_sequence, image_edges, four_point_homography_to_matrix
from datasets import ImageSequenceDataset
from lightning_modules import DeepImageHomographyEstimationModuleBackbone

def visualize(fw_img, fw_wrp, bw_img, bw_wrp, fw_duv, bw_duv):
    fw_uv, bw_uv = image_edges(fw_img), image_edges(bw_img)
    fw_H, bw_H = four_point_homography_to_matrix(fw_uv, fw_duv), four_point_homography_to_matrix(bw_uv, bw_duv)

    fw_pred_wrp = warp_perspective(fw_img, torch.inverse(fw_H), fw_img.shape[-2:])
    bw_pred_wrp = warp_perspective(bw_img, torch.inverse(bw_H), bw_img.shape[-2:])
    
    fw_blend, bw_blend = yt_alpha_blend(fw_wrp, fw_pred_wrp), yt_alpha_blend(bw_wrp, bw_pred_wrp)
    fw_blend, bw_blend = tensor_to_image(fw_blend), tensor_to_image(bw_blend)

    cv2.imshow('fw_blend', fw_blend[0])
    cv2.imshow('bw_blend', bw_blend[0])
    cv2.waitKey()

prefix = 'out/homography_labelling'
df = pd.read_pickle(os.path.join(prefix, 'light_log.pkl'))
seq_len = 48

transforms = [
    {'module': 'torchvision.transforms', 'type': 'ConvertImageDtype', 'kwargs': {'dtype': torch.float}}
]
transforms = [anyDictListToCompose(transforms) for _ in range(len(meta_df))]

ds = ImageSequenceDataset(
    df=df,
    prefix=prefix,
    seq_len=seq_len,
    transforms=transforms
)

# load network
prefix = '/home/martin/Tresors/homography_imitation_learning_logs/deep_image_homography_estimation_backbone/version_2'
configs = load_yaml(os.path.join(prefix, 'configs.yml'))
model = DeepImageHomographyEstimationModuleBackbone.load_from_checkpoint(os.path.join(prefix, 'checkpoints/epoch=49.ckpt'), shape=configs['model']['shape'])

device = 'cpu'
if torch.cuda.is_available():
    print('Running with CUDA backend.')
    device = 'cuda'

model.to(device)
model = model.eval()
model.freeze()


batch_size = 1
num_workers = 0

dl = DataLoader(ds, batch_size=batch_size, num_workers=num_workers)

for batch in dl:
    fw, bw = forward_backward_sequence(batch)

    # create pairs
    fw_img, fw_wrp = fw[:,:-1:].view((-1,) + fw.shape[-3:]).to(device), fw[:,1::].view((-1,) + fw.shape[-3:]).to(device) 
    bw_img, bw_wrp = bw[:,:-1:].view((-1,) + bw.shape[-3:]).to(device), bw[:,1::].view((-1,) + bw.shape[-3:]).to(device)
    
    fw_duv = model(fw_img, fw_wrp)
    bw_duv = model(bw_img, bw_wrp)

    # compute error
    print(fw_duv.mean(axis=0).mean())
    print(bw_duv.mean(axis=0).mean())
    duv = torch.cat((fw_duv, bw_duv))
    print(duv.mean(axis=0).mean())
    break

    # visualize(fw_img, fw_wrp, bw_img, bw_wrp, fw_duv, bw_duv)
cv2.destroyAllWindows()
    
    # for fw_frame, bw_frame in zip(fw[0], bw[0]):
    #     fw_frame, bw_frame = tensor_to_image(fw_frame), tensor_to_image(bw_frame)

    #     cv2.imshow('fw', fw_frame)
    #     cv2.imshow('bw', bw_frame)
    #     cv2.waitKey()
    

Running with CUDA backend.
tensor(-1.2412, device='cuda:0')
tensor(1.1628, device='cuda:0')
tensor(-0.0392, device='cuda:0')
