In [1]:
import numpy as np
import cv2
import math
import torch
import torch.nn.functional as F
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import flow_transforms
import models
import datasets
from multiscaleloss import multiscaleEPE, realEPE
from util import flow2rgb, AverageMeter, save_checkpoint



In [2]:
mean=[0.45,0.432,0.411]
mean=[0,0,0]
input_transform = transforms.Compose([
    flow_transforms.ArrayToTensor(),
    transforms.Normalize(mean=[0,0,0], std=[255,255,255]),
    transforms.Normalize(mean=mean, std=[1,1,1])
])
target_transform = transforms.Compose([
    flow_transforms.ArrayToTensor(),
    transforms.Normalize(mean=[0,0],std=[20*0+1,20*0+1])
])


In [3]:
co_transform = flow_transforms.Compose([
            flow_transforms.RandomTranslate(10),
            flow_transforms.RandomRotate(10,5),
            flow_transforms.RandomCrop((320,448)),
            flow_transforms.RandomVerticalFlip(),
            flow_transforms.RandomHorizontalFlip()
             ])

In [4]:
data = r"KITTI_split\training_80"

In [5]:
print("=> fetching img pairs in '{}'".format(data))
train_set, test_set = datasets.__dict__["KITTI_noc"](
    data,
    transform=input_transform,
    target_transform=target_transform,
    co_transform=None,
    split=0.8
)
print('{} samples found, {} train samples and {} test samples '.format(len(test_set)+len(train_set),
                                                                        len(train_set),
                                                                        len(test_set)))

=> fetching img pairs in 'KITTI_split\training_80'
144 samples found, 115 train samples and 29 test samples 


In [6]:
train_loader = torch.utils.data.DataLoader(
        train_set, batch_size=4,
        num_workers=1, pin_memory=True, shuffle=True)
val_loader = torch.utils.data.DataLoader(
    test_set, batch_size=4,
    num_workers=1, pin_memory=True, shuffle=False)

In [7]:
input, target = next(iter(train_loader))

In [8]:
i = 0
im1 = input[0][i].numpy().transpose(1, 2, 0)
im2 = input[1][i].numpy().transpose(1, 2, 0)
im3 = input[0][i+1].numpy().transpose(1, 2, 0)
im4 = input[1][i+1].numpy().transpose(1, 2, 0)

In [9]:
cv2.imshow("1", im1)
cv2.imshow("2", im2)
cv2.waitKey(0)
cv2.destroyAllWindows()

## MSE of two input images

In [10]:
def mse(imageA, imageB):
	# the 'Mean Squared Error' between the two images is the
	# sum of the squared difference between the two images;
	# NOTE: the two images must have the same dimension
	err = np.sum((imageA.astype("float") - imageB.astype("float")) ** 2)
	err /= float(imageA.shape[0] * imageA.shape[1])
	
	# return the MSE, the lower the error, the more "similar"
	# the two images are
	return err

In [11]:
mse(im1,im2)

0.19613990740005707

## Applying Flow Map on Image 1

In [12]:
tg1 = target[0].permute(1,2,0)

In [13]:
recon = np.zeros_like(im1)

In [14]:
width = len(im1[0])
height = len(im1)
width, height

(1242, 375)

In [15]:
print(f"image 1 shape: \t{im1.shape}")
print(f"recon shape: \t{recon.shape}")
print(f"target shape: \t{tg1.numpy().shape}")

image 1 shape: 	(375, 1242, 3)
recon shape: 	(375, 1242, 3)
target shape: 	(375, 1242, 2)


In [19]:
%%time

# recon = np.zeros_like(im1)
recon = im1.copy()

for h in range(height):
    for w in range(width):
        vec = tg1[h,w,:]
        dx = round(vec[0].item())
        dy = round(vec[1].item())
        h_ = h+dy if h+dy < height else height-1
        w_ = w+dx if w+dx < width else width-1
        
        recon[h_, w_, :] = im1[h, w, :]


Wall time: 8.23 s


In [23]:
cv2.imshow("image2", im2)
cv2.imshow("recon", recon)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [22]:
print(f"mse im1 and im2: {mse(im1,im2):.2f}")
print(f"mse im2 and recon: {mse(im2,recon)}")

mse im1 and im2: 0.20
mse im2 and recon: 0.18737329857721532
