forked from surenkum/uq_gaussian_processes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pose_estimation.tex
365 lines (326 loc) · 14.3 KB
/
pose_estimation.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
\section[Pose Estimation]{Model-less Pose Estimation}
\begin{frame}{Problem Statement}
\begin{figure}
\centering
\includegraphics[height=0.6\linewidth,trim = 60mm 5mm 35mm 20mm,clip]{figures/human_figure3d}
\end{figure}
\end{frame}
\begin{frame}{Parts of Pose Estimator}
\begin{itemize}
\item Motion Model : Second Order motion continuity
\item Observation Model : Mapping features to pose using Gaussian Processes
\item Estimation : Kalman Filter
\end{itemize}
\end{frame}
\begin{frame}{Previous Work }
\begin{columns}
\begin{column}{0.5\textwidth}
Model Based Optimization [Agarwal et. al, RSS 2012]:
\centering
\includegraphics[width=1\textwidth]{figures/person_model}
\begin{itemize}
\item Computationally expensive
\item Model Required: Articulation, CAD
\item Often requires background subtraction
\end{itemize}
\end{column}
\begin{column}{0.5\textwidth}
Template Based Methods [Reiter et. al, CARS 2012]
\begin{columns}
\begin{column}{0.3\textwidth}
\centering
\includegraphics[width=1\textwidth]{figures/tool_model}
\end{column}
\begin{column}{0.7\textwidth}
\centering
\includegraphics[width = 1\textwidth]{figures/tool_image}
\end{column}
\end{columns}
\begin{itemize}
\item Curse of Dimensionality
\item Model Required: Articulation, CAD, Visual
\end{itemize}
\end{column}
\end{columns}
\end{frame}
\subsection[]{GPR Pose Estimation}
\begin{frame}{Model-less Pose Estimation}
\begin{block}{Guiding Principles}
\begin{itemize}
\item Real-time performance
\item Measure of confidence in estimates
\end{itemize}
\end{block}
\begin{figure}
\includegraphics[scale=0.5,trim=5mm 100mm 60mm 50mm,clip]{figures/surgical/flow_chart}
\end{figure}
\begin{itemize}
%\item Requires ground truth poses for generalization
\item Model Required: Articulation
\item Map generic visual features $x$ to tool end-effector pose estimate $y$
\item Tool Pose $y$ represents orientation, end-effector opening angle
\end{itemize}
\end{frame}
\section{Object Tracking}
\subsection[]{Product of Tracking Experts (PoTE Model)}
\begin{frame}{Parts of the Tracker}
\begin{itemize}
\item Motion Model: Second Order Motion Continuity
\item Observation Model: Multiple observers
\item Interaction Model: Constant Velocity assumption
\item Estimation: Depends on the observation model
\end{itemize}
\end{frame}
\begin{frame}{Product of Tracking Experts}
\begin{figure}
%\includegraphics[scale=0.4,trim=5mm 105mm 30mm 40mm,clip]{figures/trackerfusion.pdf}
\includegraphics[scale=0.4,trim=5mm 105mm 30mm 40mm,clip]{figures/converted/trackerfusion}
\label{fig:systemflow}
\end{figure}
Mixture Models:
\begin{align*}
P(x) &= \sum_{j=1}^M \pi_{i}p_{j}(x|\theta_j),\hspace{5pt} \sum_{j=1}^M \pi_{i} = 1
\end{align*}
Product of Experts Model
\begin{align*}
P(x) &= \frac{1}{Z} \prod\limits_{j=1}^M f_{j}(x|\theta_j),\hspace{5pt} Z = \mathlarger{\int} \prod\limits_{j=1}^M f_{j}(x|\theta_j) \mathrm{d}x
\end{align*}
\end{frame}
\begin{frame}{Product of Experts Model}
\begin{itemize}
\item Expert 1: $\mu = [245,270]^T$, $\Sigma = diag([16.66,25])$
\item Expert 2: $\mu = [255,275]^T$, $\Sigma = diag([16.66,25])$
\end{itemize}
\begin{figure}[h]
\centering
\subfloat{\includegraphics[width=0.35\linewidth, trim = 20mm 80mm 40mm 85mm,clip]{figures/two_experts.pdf}}
\subfloat{\includegraphics[width=0.35\linewidth, trim = 20mm 80mm 40mm 85mm,clip]{figures/two_experts_contour.pdf}}
\end{figure}
Resulting Output: $\mu = [250,272.5]^T$, $\Sigma = [8.33,12.5]$
\begin{itemize}
\item Merges output of various trackers
\item Can faithfully use information from detectors
\item Allows to use complete densities for inference in tracking
\end{itemize}
\end{frame}
\begin{frame}{Uncertainty Representation}
\begin{align*}
\begin{split}
\mu &= [x_{CB},y_{CB}]^T,
\Sigma = \frac{1}{6} \begin{bmatrix}
w_{B} & 0\\
0 & h_{B}
\end{bmatrix}
\end{split}
\end{align*}
PoTE Model with K Gaussian Experts ( $\underline{\mu_k},\Sigma_k$ )
\begin{align*}
p(\underline{x}|\theta_{T_1},\theta_{T_2},...,\theta_{T_K}) = \frac{\prod\limits_{k=1}^{K}\frac{1}{2\pi|\Sigma_k|^{\frac{1}{2}}}\exp(-\frac{1}{2}[\underline{x}-\underline{\mu_k}]^T\Sigma_k^{-1}[\underline{x}-\underline{\mu_k}])}{\int{ \prod\limits_{k=1}^{K} p_k(\underline{x}|\theta_{k})\mathrm{d}\underline{x}}}\\
%& = \frac{\exp(\sum\limits_{k=1}^{K}-\frac{1}{2}[\underline{x}-\underline{\mu_k}]^T\mathbf{\Sigma_k}^{-1}[\underline{x}-\underline{\mu_k}])}{Z}
\end{align*}
\begin{align*}
&p(\underline{x}|\theta_{T_1},\theta_{T_2},...,\theta_{T_K}) \sim \mathcal{N}(\underline{\mu},\Sigma), \text{ where }\\
&\Sigma^{-1} = \sum_{k=1}^{K}\Sigma_k^{-1},
\underline{\mu} = \Sigma\left(\sum_{k=1}^{K} \Sigma_k^{-1}\underline{\mu_k}\right)
\end{align*}
\end{frame}
\begin{frame}{Commonly used Tracking Experts}
\begin{description}[leftmargin=*]
\item [Kanade Lucas Tracking] Point Feature Tracking for bounding box prediction
\item [Background Subtraction] Helpful for stationary camera
\item [Motion Prediction] Second order motion continuity
\item [Object Detector] High Confidence detection from a detector
\item [Dense Optical Flow] Texture-less objects
\end{description}
\end{frame}
\begin{frame}{Surgical Tracking Results}
\begin{center}
\movie[autostart,loop,showcontrols]{\includegraphics[width=0.8\linewidth]{videos/clamp_tracking}}{videos/clamp_tracking.avi}
\end{center}
\end{frame}
\begin{frame}{Person Tracking Results}
\begin{center}
\movie[autostart,loop,showcontrols]{\includegraphics[width=0.8\linewidth]{videos/ubchockey}}{videos/ubchockey.avi}
\end{center}
\end{frame}
\section{Gaussian Process based Pose Estimation}
\begin{frame}{Gaussian Process Regression}
\begin{block}{Problem with Regression Models}
\begin{itemize}
\item Parametric Models
\item Prediction is dependent on the chosen model
\end{itemize}
\end{block}
%Gaussian process regression essentially defines a distribution over function with inference taking place in the space of functions, thus avoiding the need to estimate the weights/parameters associated with traditional regression methods.
\begin{block}{Gaussian Process Regression}
$f(x) \sim \mathbb{GP}(m(x),k(x,x^1))$
\begin{itemize}
\item Inference takes place in space of functions
\item Makes minimal assumptions on the underlying data distribution
\item High variance in regions with sparse ground truth data
\end{itemize}
\end{block}
\end{frame}
%\begin{frame}{Gaussian Process Regression}
%$f(x): x \mapsto y$.
%\begin{itemize}
%\item Input: Ground truth data from $(\mathbf{X},\mathbf{y})$ from $n$ observations
%\item Output: Pose $y*$ for a new image with associated feature vector $x^*$, $p(y^*|x^*)$.
%\item Measurement Process: $y = f(x)+\epsilon $, $\epsilon \sim \mathcal{N}(0,\sigma^2)$
%\end{itemize}
%Inference:
%\begin{itemize}
%\item $f(x) \sim \mathbb{GP}(m(x),k(x,x^1))$
%\item $\mathrm{cov}(f(x_p),f(x_q)) = k(x_p,x_q) = \exp^{-\frac{1}{2l}(x_p-x_q)^2}$
%\item For two observation, $y_p,y_q$, we get, $\mathrm{cov}(y_p,y_q)=k(x_p,x_q)+\sigma^{2}\delta_{pq} $
%\end{itemize}
%Prediction: Marginalize training data,
%\begin{align}
%\mathbf{f}^*|\mathbf{X},\mathbf{y},X^*&\sim \mathcal{N}(\mathbf{\bar{f}}^*,\mathrm{cov}(\mathbf{f}^*)) \label{eq:final_prediction}\\
%\mathbf{\bar{f}}^* = &K(X^*,X)[K(X,X)+\sigma^2I]^{-1}\mathbf{y} \nonumber \\
%\mathrm{cov}(\mathbf{f}^*) = &K(X^*,X^*)- K(X^*,X)[K(X,X)+\sigma^2I]^{-1}K(X,X^*) \nonumber
%\end{align}
%%Intuitively this algorithm gives high confidence estimates in the region of feature space with lot of ground truth data, and estimates with high variance in regions with sparse ground truth data.
%\end{frame}
%
\subsection[]{Smoothing Predictions}
\begin{frame}{Improving Prediction}
\begin{itemize}
\item Regression process predicts pose solely using one image
\item Smoothness in surgical actions
\end{itemize}
\begin{block}{Motion Continuity}
Consider a single $k^{th}$ element of the pose state $y(k)$,
\begin{align}
\begin{bmatrix}
y(k)_{t}\\
\dot{y}(k)_{t}
\end{bmatrix}=
\begin{bmatrix}
1 & \delta t\\
0 & 1
\end{bmatrix}\begin{bmatrix}
y(k)_{t-1}\\
\dot{y}(k)_{t-1}
\end{bmatrix}+
\begin{bmatrix}
\frac{1}{2} \delta t^2 \\
\delta t
\end{bmatrix}\ddot{y}(k)_{t-1} \label{eq:state_prop}
\end{align}
Acceleration is modeled as zero mean Gaussian white noise $\ddot{y}(k)_t \sim \mathcal{N}(0,\sigma_a^2) \forall t $.
\end{block}
\end{frame}
\begin{frame}{Improving Predictions}
The observation model is the Gaussian process regression framework which can be represented by
\begin{align}
z_t = \begin{bmatrix}
1&0
\end{bmatrix}\begin{bmatrix}
y(k)_{t}\\
\dot{y}(k)_{t}
\end{bmatrix}+v_t \label{eq:state_obs}
\end{align}
\begin{figure}
\centering
\includegraphics[width = 0.5\linewidth,trim= 20mm 80mm 35mm 85mm,clip]{figures/surgical/gaussian_estimation_1}
\caption{Gaussian Process regression with 3 sigma bounds plotted with true value of tool opening angle}\label{fig:three_sigma}
\end{figure}
\end{frame}
\begin{frame}{Visual Features}
\begin{block}{Ideal Features}
Unique, Invariant, Computationally Efficient
\end{block}
Histograms of Oriented Gradients (HOG) and Local Binary Patterns (LBP)
\begin{figure}
\centering
\includegraphics[width=0.45\linewidth,trim=40mm 106mm 80mm 100mm,clip]{figures/surgical/feature_pic}
\includegraphics[width=0.45\linewidth,trim=50mm 93mm 40mm 70mm,clip]{figures/surgical/feature_pic_hog}
\caption{An example image with tool and corresponding HOG feature} \label{fig:hog}
\end{figure}
\end{frame}
\subsection[]{Experiments}
\begin{frame}{Experimental Data}
\begin{itemize}
\item Both moving and fixed part of a tool are tracked at $50$ fps
\item Endoscopic camera : $640 \times 480$ pixels at $15$ fps
\item Entire dataset has $4346$ different tool poses
\item Sensing Noise: Motion blur, partial occlusions, lighting variation
\end{itemize}
\begin{figure}[h]
\centering
\includegraphics[height=0.4\columnwidth,trim= 40mm 50mm 30mm 35mm,clip]{figures/surgical/box}
\caption{Customized Box Trainer Setup Retrofitted with Optical Reflective Markers}
\label{fig:boxTrainer}
\end{figure}
\end{frame}
\begin{frame}{Results}
\begin{table}
\begin{tabular}{| c| c| c| }\hline
Features & Orientation Error & Opening Angle \\ \hline
HOG& 2.42 & 2.49\\ \hline
LBP & 1.92 & 2.48\\ \hline
\end{tabular}
\caption{Tool pose estimate angular accuracy in degrees using different visual features}
\end{table}
\begin{figure}
\centering
\includegraphics[width = 0.5\columnwidth,trim = 20mm 80mm 40mm 80mm,clip]{figures/surgical/kalman_filter}
\caption{True, Regression mean estimate and filtered estimates for tool opening angle} \label{fig:kalman_filter}
\end{figure}
\end{frame}
\begin{frame}{Visual Results}
\begin{figure}
\centering
\subfloat{\includegraphics[width = 0.2\linewidth,trim = 32mm 10mm 70mm 30mm,clip]{figures/dark_ims/23_16_20_128}}
\subfloat{\includegraphics[width = 0.2\linewidth,trim = 32mm 10mm 70mm 30mm,clip]{figures/dark_ims/23_16_25_966}}
\subfloat{\includegraphics[width = 0.2\linewidth,trim = 32mm 10mm 70mm 30mm,clip]{figures/dark_ims/23_16_30_623}}
\subfloat{\includegraphics[width = 0.2\linewidth,trim = 32mm 10mm 70mm 30mm,clip]{figures/dark_ims/23_16_31_151}}
\subfloat{\includegraphics[width = 0.2\linewidth,trim = 32mm 10mm 70mm 30mm,clip]{figures/dark_ims/23_16_37_728}}\\
\vspace{-10pt}
\subfloat{\includegraphics[width = 0.20\linewidth,trim = 32mm 15mm 102mm 30mm,clip]{figures/vr/scale_gt_dark_lbp_left/frame_52}}
\subfloat{\includegraphics[width = 0.20\linewidth,trim = 32mm 15mm 102mm 30mm,clip]{figures/vr/scale_gt_dark_lbp_left/frame_139}}
\subfloat{\includegraphics[width = 0.20\linewidth,trim = 32mm 15mm 102mm 30mm,clip]{figures/vr/scale_gt_dark_lbp_left/frame_209}}
\subfloat{\includegraphics[width = 0.20\linewidth,trim = 32mm 15mm 102mm 30mm,clip]{figures/vr/scale_gt_dark_lbp_left/frame_217}}
\subfloat{\includegraphics[width = 0.20\linewidth,trim = 32mm 15mm 102mm 30mm,clip]{figures/vr/scale_gt_dark_lbp_left/frame_285}}\\
\vspace{-10pt}
\subfloat{\includegraphics[width = 0.20\linewidth,trim = 32mm 15mm 102mm 30mm,clip]{figures/vr/scale_gt_dark_lbp_right/frame_52}}
\subfloat{\includegraphics[width = 0.20\linewidth,trim = 32mm 15mm 102mm 30mm,clip]{figures/vr/scale_gt_dark_lbp_right/frame_139}}
\subfloat{\includegraphics[width = 0.20\linewidth,trim = 32mm 15mm 102mm 30mm,clip]{figures/vr/scale_gt_dark_lbp_right/frame_209}}
\subfloat{\includegraphics[width = 0.20\linewidth,trim = 32mm 15mm 102mm 30mm,clip]{figures/vr/scale_gt_dark_lbp_right/frame_217}}
\subfloat{\includegraphics[width = 0.20\linewidth,trim = 32mm 15mm 102mm 30mm,clip]{figures/vr/scale_gt_dark_lbp_right/frame_285}}\\
%\caption{Representative video frames for a ``dark" sequence in the collected dataset obtained using GPR to estimate tool opening angle. First row shows the image frame, second row shows the orientation and opening angle of the left tool and the third row shows the orientation and opening angle of the right tool using LBP features.}
%\label{fig:dark_results_pose}
\end{figure}
\end{frame}
\begin{frame}{Conclusion}
\begin{itemize}
\item Real-time method to predict tool pose using generic visual features
%that are not specific to any tool and environment setting.
\item Robust variance estimates along with mean predictions
\item Variance estimate is demonstrated to be useful for filtering
\item Experimental results using a customized box trainer demonstrate good tool pose prediction
\end{itemize}
\end{frame}
\begin{frame}{Publications}
\begin{block}{Journal}
\begin{enumerate}
\item \textbf{S. Kumar}, J. Sovizi, V. Krovi, ``Error Propagation on SE(3) for Surgical Tool Pose Filtering" (In Preparation)
\item P. Agarwal, \textbf{S. Kumar}, J. Ryde, J. Corso, and V. Krovi, ``Estimating Dynamics On-the-fly Using Monocular Video For Vision-Based Robotics", IEEE/ASME Transactions on Mechatronics, 2013.
\end{enumerate}
\end{block}
\begin{block}{Conference}
\begin{enumerate}
\item \textbf{S. Kumar}, J. Sovizi, M. S. Narayanan, V. Krovi, ``Surgical Tool Pose Estimation from Monocular Endoscopic Videos", IEEE International Conference on Robotics and Automation (ICRA), 2015
\item P. Agarwal, \textbf{S. Kumar}, J. Ryde, J. Corso, and V. Krovi,
``Estimating Human Dynamics On-the-fly Using Monocular Video for Pose
Estimation", Robotics: Science and Systems VIII, 2013
\end{enumerate}
\end{block}
\end{frame}
\begin{frame}{References}
\begin{itemize}
\item Gaussian Processes for Regression: A Quick Introduction, M. Ebden
\item Gaussian Processes for Machine Learning, Carl Edward Rasmussen and Chris Williams, the MIT Press, 2006
\end{itemize}
\end{frame}