-
Notifications
You must be signed in to change notification settings - Fork 0
/
alignment.py
247 lines (210 loc) · 8.29 KB
/
alignment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
import math
import random
import cv2
import numpy as np
eTranslate = 0
eHomography = 1
def computeHomography(f1, f2, matches, A_out=None):
'''
Input:
f1 -- list of cv2.KeyPoint objects in the first image
f2 -- list of cv2.KeyPoint objects in the second image
matches -- list of cv2.DMatch objects
DMatch.queryIdx: The index of the feature in the first image
DMatch.trainIdx: The index of the feature in the second image
DMatch.distance: The distance between the two features
A_out -- ignore this parameter. If computeHomography is needed
in other TODOs, call computeHomography(f1,f2,matches)
Output:
H -- 2D homography (3x3 matrix)
Takes two lists of features, f1 and f2, and a list of feature
matches, and estimates a homography from image 1 to image 2 from the matches.
'''
num_matches = len(matches)
# Dimensions of the A matrix in the homogenous linear
# equation Ah = 0
num_rows = 2 * num_matches
num_cols = 9
A_matrix_shape = (num_rows,num_cols)
A = np.zeros(A_matrix_shape)
for i in range(len(matches)):
m = matches[i]
(a_x, a_y) = f1[m.queryIdx].pt
(b_x, b_y) = f2[m.trainIdx].pt
#BEGIN TODO 2
#Fill in the matrix A in this loop.
#Access elements using square brackets. e.g. A[0,0]
# (2*i)th Row
A[2*i,0] = a_x
A[2*i,1] = a_y
A[2*i,2] = 1
A[2*i,-3] = -b_x * a_x
A[2*i,-2] = -b_x * a_y
A[2*i,-1] = -b_x
# (2*i+1)th Row
A[2*i+1,3] = a_x
A[2*i+1,4] = a_y
A[2*i+1,5] = 1
A[2*i+1,-3] = -b_y * a_x
A[2*i+1,-2] = -b_y * a_y
A[2*i+1,-1] = -b_y
U, s, Vt = np.linalg.svd(A)
if A_out is not None:
A_out[:] = A
#s is a 1-D array of singular values sorted in descending order
#U, Vt are unitary matrices
#Rows of Vt are the eigenvectors of A^TA.
#Columns of U are the eigenvectors of AA^T.
#Homography to be calculated
H = np.eye(3)
#BEGIN TODO 3
#Fill the homography H with the appropriate elements of the SVD
#TODO-BLOCK-BEGIN
H = Vt[-1].reshape(3,3)
#TODO-BLOCK-END
#END TODO
return H
def alignPair(f1, f2, matches, m, nRANSAC, RANSACthresh):
'''
Input:
f1 -- list of cv2.KeyPoint objects in the first image
f2 -- list of cv2.KeyPoint objects in the second image
matches -- list of cv2.DMatch objects
DMatch.queryIdx: The index of the feature in the first image
DMatch.trainIdx: The index of the feature in the second image
DMatch.distance: The distance between the two features
m -- MotionModel (eTranslate, eHomography)
nRANSAC -- number of RANSAC iterations
RANSACthresh -- RANSAC distance threshold
Output:
M -- inter-image transformation matrix
Repeat for nRANSAC iterations:
Choose a minimal set of feature matches.
Estimate the transformation implied by these matches
count the number of inliers.
For the transformation with the maximum number of inliers,
compute the least squares motion estimate using the inliers,
and return as a transformation matrix M.
'''
#BEGIN TODO 4
#Write this entire method. You need to handle two types of
#motion models, pure translations (m == eTranslation) and
#full homographies (m == eHomography). However, you should
#only have one outer loop to perform the RANSAC code, as
#the use of RANSAC is almost identical for both cases.
num_samples = 1 if (m == eTranslate) else 4
#Your homography handling code should call compute_homography.
#This function should also call get_inliers and, at the end,
#least_squares_fit.
#TODO-BLOCK-BEGIN
indices = []
for i in range(nRANSAC):
homography = np.eye(3,3)
samples = random.sample(matches, num_samples)
if m==eTranslate:
(ax, ay) = f1[matches[0].queryIdx].pt
(bx, by) = f2[matches[0].trainIdx].pt
homography[0,2] = bx - ax
homography[1,2] = by - ay
else:
homography = computeHomography(f1,f2,samples)
inlier_indices = getInliers(f1,f2,matches,homography,RANSACthresh)
if len(inlier_indices) > len(indices):
indices = inlier_indices
M = leastSquaresFit(f1, f2, matches, m, indices)
return M
def getInliers(f1, f2, matches, M, RANSACthresh):
'''
Input:
f1 -- list of cv2.KeyPoint objects in the first image
f2 -- list of cv2.KeyPoint objects in the second image
matches -- list of cv2.DMatch objects
DMatch.queryIdx: The index of the feature in the first image
DMatch.trainIdx: The index of the feature in the second image
DMatch.distance: The distance between the two features
M -- inter-image transformation matrix
RANSACthresh -- RANSAC distance threshold
Output:
inlier_indices -- inlier match indices (indexes into 'matches')
Transform the matched features in f1 by M.
Store the match index of features in f1 for which the transformed
feature is within Euclidean distance RANSACthresh of its match
in f2.
Return the array of the match indices of these features.
'''
inlier_indices = []
for i in range(len(matches)):
#BEGIN TODO 5
#Determine if the ith matched feature f1[id1], when transformed
#by M, is within RANSACthresh of its match in f2.
#If so, append i to inliers
#TODO-BLOCK-BEGIN
m = matches[i]
(ax, ay) = f1[m.queryIdx].pt
(bx, by) = f2[m.trainIdx].pt
point = np.array([ax,ay,1])
mapped_pt = M @ point
mapped_pt_pred1 = mapped_pt[0]/mapped_pt[2] #x/z
mapped_pt_pred2 = mapped_pt[1]/mapped_pt[2] #y/z
euclidian_dist = np.sqrt((bx - mapped_pt_pred1)**2 + (by-mapped_pt_pred2)**2)
if euclidian_dist < RANSACthresh:
inlier_indices.append(i)
#TODO-BLOCK-END
#END TODO
return inlier_indices
def leastSquaresFit(f1, f2, matches, m, inlier_indices):
'''
Input:
f1 -- list of cv2.KeyPoint objects in the first image
f2 -- list of cv2.KeyPoint objects in the second image
matches -- list of cv2.DMatch objects
DMatch.queryIdx: The index of the feature in the first image
DMatch.trainIdx: The index of the feature in the second image
DMatch.distance: The distance between the two features
m -- MotionModel (eTranslate, eHomography)
inlier_indices -- inlier match indices (indexes into 'matches')
Output:
M - transformation matrix
Compute the transformation matrix from f1 to f2 using only the
inliers and return it.
'''
# This function needs to handle two possible motion models,
# pure translations (eTranslate)
# and full homographies (eHomography).
M = np.eye(3)
if m == eTranslate:
#For spherically warped images, the transformation is a
#translation and only has two degrees of freedom.
#Therefore, we simply compute the average translation vector
#between the feature in f1 and its match in f2 for all inliers.
u = 0.0
v = 0.0
for i in range(len(inlier_indices)):
#BEGIN TODO 6
#Use this loop to compute the average translation vector
#over all inliers.
#TODO-BLOCK-BEGIN
(ax,ay) = f1[matches[inlier_indices[i]].queryIdx].pt
(bx,by) = f2[matches[inlier_indices[i]].trainIdx].pt
u += bx - ax
v += by - ay
#TODO-BLOCK-END
#END TODO
u /= len(inlier_indices)
v /= len(inlier_indices)
M[0,2] = u
M[1,2] = v
elif m == eHomography:
#BEGIN TODO 7
#Compute a homography M using all inliers.
#This should call computeHomography.
#TODO-BLOCK-BEGIN
inlier_matches = []
for inlier_idx in inlier_indices:
inlier_matches.append(matches[inlier_idx])
M = computeHomography(f1, f2, inlier_matches, None)
#TODO-BLOCK-END
#END TODO
else:
raise Exception("Error: Invalid motion model.")
return M