In [4]:
import numpy as np
import cv2
import math
import torch
import torch.nn.functional as F
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import flow_transforms
import models
import datasets
from multiscaleloss import multiscaleEPE, realEPE
from util import flow2rgb, AverageMeter, save_checkpoint



In [5]:
mean=[0.45,0.432,0.411]
mean=[0,0,0]
input_transform = transforms.Compose([
    flow_transforms.ArrayToTensor(),
    transforms.Normalize(mean=[0,0,0], std=[255,255,255]),
    transforms.Normalize(mean=mean, std=[1,1,1])
])
target_transform = transforms.Compose([
    flow_transforms.ArrayToTensor(),
    transforms.Normalize(mean=[0,0],std=[20*0+1,20*0+1])
])


In [6]:
co_transform = flow_transforms.Compose([
            flow_transforms.RandomTranslate(10),
            flow_transforms.RandomRotate(10,5),
            flow_transforms.RandomCrop((320,448)),
            flow_transforms.RandomVerticalFlip(),
            flow_transforms.RandomHorizontalFlip()
             ])

In [7]:
data = r"KITTI_split\training_80"

In [8]:
print("=> fetching img pairs in '{}'".format(data))
train_set, test_set = datasets.__dict__["KITTI_noc"](
    data,
    transform=input_transform,
    target_transform=target_transform,
    co_transform=None,
    split=1.0
)
print('{} samples found, {} train samples and {} test samples '.format(len(test_set)+len(train_set),
                                                                        len(train_set),
                                                                        len(test_set)))

=> fetching img pairs in 'KITTI_split\training_80'
144 samples found, 144 train samples and 0 test samples 


In [9]:
len(train_set)

144

In [10]:
train_loader = torch.utils.data.DataLoader(
        train_set, batch_size=1,
        num_workers=1, pin_memory=True, shuffle=True)
# val_loader = torch.utils.data.DataLoader(
#     test_set, batch_size=0,
#     num_workers=1, pin_memory=True, shuffle=False)

In [11]:
for i, (input, target) in enumerate(train_loader):
     

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143


In [12]:
input, target = next(iter(train_loader))

In [36]:
i = 0
im1 = input[0][i].numpy().transpose(1, 2, 0)
im2 = input[1][i].numpy().transpose(1, 2, 0)
im3 = input[0][i+1].numpy().transpose(1, 2, 0)
im4 = input[1][i+1].numpy().transpose(1, 2, 0)

In [37]:
im1 = im1[...,::-1].copy()
im2 = im2[...,::-1].copy()
im3 = im3[...,::-1].copy()
im4 = im4[...,::-1].copy()

In [38]:
cv2.imshow("1", im1)
cv2.imshow("2", im2)
cv2.waitKey(0)
cv2.destroyAllWindows()

## MSE of two input images

In [39]:
def mse(imageA, imageB):
	# the 'Mean Squared Error' between the two images is the
	# sum of the squared difference between the two images;
	# NOTE: the two images must have the same dimension
	err = np.sum((imageA.astype("float") - imageB.astype("float")) ** 2)
	err /= float(imageA.shape[0] * imageA.shape[1])
	
	# return the MSE, the lower the error, the more "similar"
	# the two images are
	return err

In [40]:
mse(im1,im2)

0.16918404435096754

## Applying Flow Map on Image 1

In [41]:
tg1 = target[0].permute(1,2,0)

In [42]:
recon = np.zeros_like(im1)

In [43]:
width = len(im1[0])
height = len(im1)
width, height

(1242, 375)

In [44]:
print(f"image 1 shape: \t{im1.shape}")
print(f"recon shape: \t{recon.shape}")
print(f"target shape: \t{tg1.numpy().shape}")

image 1 shape: 	(375, 1242, 3)
recon shape: 	(375, 1242, 3)
target shape: 	(375, 1242, 2)


In [45]:
%%time

# recon = np.zeros_like(im1)
recon = im2.copy()

for h in range(height):
    for w in range(width):
        vec = tg1[h,w,:]   # 2D vector containing OF of pixel (h,w)
        dx = round(vec[0].item()) # x part of vector
        dy = round(vec[1].item()) # y part of vector
        h_new = h+dy if h+dy < height else height-1 # make sure index stays in bounds
        w_new = w+dx if w+dx < width else width-1
        
        recon[h_new, w_new, :] = im1[h, w, :]


Wall time: 11.1 s


In [46]:
cv2.imshow("image1", im1)
cv2.imshow("image2", im2)
cv2.imshow("recon", recon)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [19]:
print(f"mse im1 and im2: {mse(im1,im2):.2f}")
print(f"mse im2 and recon: {mse(im2,recon)}")

mse im1 and im2: 0.17
mse im2 and recon: 0.15307593408474898
