pose_estimation.tex


\section[Pose Estimation]{Model-less Pose Estimation}
\begin{frame}{Problem Statement}
\begin{figure}
\centering
\includegraphics[height=0.6\linewidth,trim = 60mm 5mm 35mm 20mm,clip]{figures/human_figure3d}
\end{figure}
\end{frame}

\begin{frame}{Parts of Pose Estimator}
\begin{itemize}
\item Motion Model : Second Order motion continuity
\item Observation Model : Mapping features to pose using Gaussian Processes
\item Estimation : Kalman Filter
\end{itemize}
\end{frame}

\begin{frame}{Previous Work }
  \begin{columns}
    \begin{column}{0.5\textwidth}
Model Based Optimization [Agarwal et. al, RSS 2012]:
      \centering
      \includegraphics[width=1\textwidth]{figures/person_model}
\begin{itemize}
\item Computationally expensive
\item Model Required: Articulation, CAD
\item Often requires background subtraction
\end{itemize}
    \end{column}
\begin{column}{0.5\textwidth}
Template Based Methods  [Reiter et. al, CARS 2012]
  \begin{columns}
    \begin{column}{0.3\textwidth}
      \centering
      \includegraphics[width=1\textwidth]{figures/tool_model}
    \end{column}
    \begin{column}{0.7\textwidth}
      \centering
      \includegraphics[width = 1\textwidth]{figures/tool_image}
    \end{column}
  \end{columns}
\begin{itemize}
\item Curse of Dimensionality
\item Model Required: Articulation, CAD, Visual
\end{itemize}
\end{column}
  \end{columns}
\end{frame}

\subsection[]{GPR Pose Estimation}
\begin{frame}{Model-less Pose Estimation}
\begin{block}{Guiding Principles}
\begin{itemize}
\item Real-time performance
\item Measure of confidence in estimates
\end{itemize}
\end{block}

\begin{figure}
\includegraphics[scale=0.5,trim=5mm 100mm 60mm 50mm,clip]{figures/surgical/flow_chart}
\end{figure}

\begin{itemize}
%\item Requires ground truth poses for generalization
\item Model Required: Articulation
\item Map generic visual features $x$ to tool end-effector pose estimate $y$
\item Tool Pose $y$ represents orientation, end-effector opening angle
\end{itemize}

\end{frame}

\section{Object Tracking}
\subsection[]{Product of Tracking Experts (PoTE Model)}

\begin{frame}{Parts of the Tracker}
\begin{itemize}
\item Motion Model: Second Order Motion Continuity
\item Observation Model: Multiple observers 
\item Interaction Model: Constant Velocity assumption
\item Estimation: Depends on the observation model
\end{itemize}
\end{frame}


\begin{frame}{Product of Tracking Experts}
\begin{figure}
	%\includegraphics[scale=0.4,trim=5mm 105mm 30mm 40mm,clip]{figures/trackerfusion.pdf}
	\includegraphics[scale=0.4,trim=5mm 105mm 30mm 40mm,clip]{figures/converted/trackerfusion}
	\label{fig:systemflow}
\end{figure}
Mixture Models:
\begin{align*}
P(x) &= \sum_{j=1}^M \pi_{i}p_{j}(x|\theta_j),\hspace{5pt}  \sum_{j=1}^M \pi_{i} = 1
\end{align*}
Product of Experts Model
\begin{align*}
P(x) &= \frac{1}{Z} \prod\limits_{j=1}^M f_{j}(x|\theta_j),\hspace{5pt} Z = \mathlarger{\int}  \prod\limits_{j=1}^M f_{j}(x|\theta_j) \mathrm{d}x
\end{align*}
\end{frame}

\begin{frame}{Product of Experts Model}
\begin{itemize}
\item Expert 1: $\mu = [245,270]^T$, $\Sigma = diag([16.66,25])$
\item Expert 2: $\mu = [255,275]^T$, $\Sigma = diag([16.66,25])$
\end{itemize}
\begin{figure}[h]
\centering
  \subfloat{\includegraphics[width=0.35\linewidth, trim = 20mm 80mm 40mm 85mm,clip]{figures/two_experts.pdf}}
  \subfloat{\includegraphics[width=0.35\linewidth, trim = 20mm 80mm 40mm 85mm,clip]{figures/two_experts_contour.pdf}}
\end{figure} 
Resulting Output: $\mu = [250,272.5]^T$, $\Sigma = [8.33,12.5]$

\begin{itemize}
\item Merges output of various trackers
\item Can faithfully use information from detectors
\item Allows to use complete densities for inference in tracking
\end{itemize}
\end{frame}


\begin{frame}{Uncertainty Representation}
\begin{align*}
\begin{split}
 \mu &= [x_{CB},y_{CB}]^T, 
\Sigma = \frac{1}{6} \begin{bmatrix}
 w_{B} & 0\\ 
0 & h_{B}
\end{bmatrix}
\end{split}
\end{align*}

PoTE Model with K Gaussian Experts ( $\underline{\mu_k},\Sigma_k$ )

\begin{align*}
p(\underline{x}|\theta_{T_1},\theta_{T_2},...,\theta_{T_K}) = \frac{\prod\limits_{k=1}^{K}\frac{1}{2\pi|\Sigma_k|^{\frac{1}{2}}}\exp(-\frac{1}{2}[\underline{x}-\underline{\mu_k}]^T\Sigma_k^{-1}[\underline{x}-\underline{\mu_k}])}{\int{  \prod\limits_{k=1}^{K} p_k(\underline{x}|\theta_{k})\mathrm{d}\underline{x}}}\\
%& = \frac{\exp(\sum\limits_{k=1}^{K}-\frac{1}{2}[\underline{x}-\underline{\mu_k}]^T\mathbf{\Sigma_k}^{-1}[\underline{x}-\underline{\mu_k}])}{Z}
\end{align*}
\begin{align*}
&p(\underline{x}|\theta_{T_1},\theta_{T_2},...,\theta_{T_K}) \sim \mathcal{N}(\underline{\mu},\Sigma), \text{ where }\\
&\Sigma^{-1} = \sum_{k=1}^{K}\Sigma_k^{-1},
\underline{\mu} = \Sigma\left(\sum_{k=1}^{K} \Sigma_k^{-1}\underline{\mu_k}\right)
\end{align*}
\end{frame}

\begin{frame}{Commonly used Tracking Experts}
\begin{description}[leftmargin=*]
\item [Kanade Lucas Tracking] Point Feature Tracking for bounding box prediction
\item [Background Subtraction] Helpful for stationary camera
\item [Motion Prediction] Second order motion continuity
\item [Object Detector] High Confidence detection from a detector
\item [Dense Optical Flow] Texture-less objects
\end{description}
\end{frame}

\begin{frame}{Surgical Tracking Results}
	\begin{center}
\movie[autostart,loop,showcontrols]{\includegraphics[width=0.8\linewidth]{videos/clamp_tracking}}{videos/clamp_tracking.avi}
\end{center}
\end{frame}

\begin{frame}{Person Tracking Results}
	\begin{center}
\movie[autostart,loop,showcontrols]{\includegraphics[width=0.8\linewidth]{videos/ubchockey}}{videos/ubchockey.avi}
\end{center}
\end{frame}

\section{Gaussian Process based Pose Estimation}
\begin{frame}{Gaussian Process Regression}
\begin{block}{Problem with Regression Models}
\begin{itemize}
\item Parametric Models
\item Prediction is dependent on the chosen model
\end{itemize}
\end{block}
%Gaussian process regression essentially defines a distribution over function with inference taking place in the space of functions, thus avoiding the need to estimate the weights/parameters associated with traditional regression methods.
\begin{block}{Gaussian Process Regression}
$f(x) \sim \mathbb{GP}(m(x),k(x,x^1))$
\begin{itemize}
\item Inference takes place in space of functions
\item Makes minimal assumptions on the underlying data distribution
\item High variance in regions with sparse ground truth data
\end{itemize}
\end{block}
\end{frame}

%\begin{frame}{Gaussian Process Regression}
%$f(x): x \mapsto y$. 
%\begin{itemize}
%\item Input: Ground truth data  from $(\mathbf{X},\mathbf{y})$ from $n$ observations
%\item Output: Pose $y*$ for a new image with associated feature vector $x^*$, $p(y^*|x^*)$. 
%\item Measurement Process: $y = f(x)+\epsilon $, $\epsilon \sim \mathcal{N}(0,\sigma^2)$
%\end{itemize}
%Inference:
%\begin{itemize}
%\item $f(x) \sim \mathbb{GP}(m(x),k(x,x^1))$
%\item $\mathrm{cov}(f(x_p),f(x_q)) = k(x_p,x_q) = \exp^{-\frac{1}{2l}(x_p-x_q)^2}$
%\item For two observation, $y_p,y_q$, we get, $\mathrm{cov}(y_p,y_q)=k(x_p,x_q)+\sigma^{2}\delta_{pq} $
%\end{itemize}
%Prediction: Marginalize training data,
%\begin{align}
%\mathbf{f}^*|\mathbf{X},\mathbf{y},X^*&\sim \mathcal{N}(\mathbf{\bar{f}}^*,\mathrm{cov}(\mathbf{f}^*)) \label{eq:final_prediction}\\
%\mathbf{\bar{f}}^* = &K(X^*,X)[K(X,X)+\sigma^2I]^{-1}\mathbf{y} \nonumber \\
%\mathrm{cov}(\mathbf{f}^*) = &K(X^*,X^*)- K(X^*,X)[K(X,X)+\sigma^2I]^{-1}K(X,X^*) \nonumber 
%\end{align}
%%Intuitively this algorithm gives high confidence estimates in the region of feature space with lot of ground truth data, and estimates with high variance in regions with sparse ground truth data. 
%\end{frame}
%
\subsection[]{Smoothing Predictions}
\begin{frame}{Improving Prediction}
\begin{itemize}
\item Regression process predicts pose solely using one image
 \item Smoothness in surgical actions
\end{itemize}

\begin{block}{Motion Continuity}
 Consider a single $k^{th}$ element of the pose state $y(k)$, 
\begin{align}
\begin{bmatrix}
y(k)_{t}\\
\dot{y}(k)_{t}
\end{bmatrix}=
\begin{bmatrix}
1 & \delta t\\
0 & 1
\end{bmatrix}\begin{bmatrix}
y(k)_{t-1}\\
\dot{y}(k)_{t-1}
\end{bmatrix}+
\begin{bmatrix}
\frac{1}{2} \delta t^2 \\
\delta t
\end{bmatrix}\ddot{y}(k)_{t-1} \label{eq:state_prop}
\end{align}
Acceleration is modeled as zero mean Gaussian white noise $\ddot{y}(k)_t \sim \mathcal{N}(0,\sigma_a^2) \forall t $.
\end{block}
\end{frame}

\begin{frame}{Improving Predictions}
The observation model is the Gaussian process regression framework which can be represented by 
\begin{align}
z_t = \begin{bmatrix}
1&0
\end{bmatrix}\begin{bmatrix}
y(k)_{t}\\
\dot{y}(k)_{t}
\end{bmatrix}+v_t \label{eq:state_obs}
\end{align}
\begin{figure}
\centering
\includegraphics[width = 0.5\linewidth,trim= 20mm 80mm 35mm 85mm,clip]{figures/surgical/gaussian_estimation_1}
\caption{Gaussian Process regression with 3 sigma bounds plotted with true value of tool opening angle}\label{fig:three_sigma}
\end{figure}
\end{frame}

\begin{frame}{Visual Features}
\begin{block}{Ideal Features}
Unique, Invariant, Computationally Efficient
\end{block}
Histograms of Oriented Gradients (HOG) and Local Binary Patterns (LBP)
\begin{figure}
\centering
\includegraphics[width=0.45\linewidth,trim=40mm 106mm 80mm 100mm,clip]{figures/surgical/feature_pic}
\includegraphics[width=0.45\linewidth,trim=50mm 93mm 40mm 70mm,clip]{figures/surgical/feature_pic_hog}
\caption{An example image with tool and corresponding HOG feature} \label{fig:hog}
\end{figure}
\end{frame}

\subsection[]{Experiments}
\begin{frame}{Experimental Data}
\begin{itemize}
\item Both moving and fixed part of a tool are tracked at $50$ fps
\item Endoscopic camera : $640 \times 480$ pixels at $15$ fps 
\item Entire dataset has $4346$ different tool poses
\item Sensing Noise: Motion blur, partial occlusions, lighting variation
\end{itemize}
\begin{figure}[h]
\centering
\includegraphics[height=0.4\columnwidth,trim= 40mm 50mm 30mm 35mm,clip]{figures/surgical/box}
\caption{Customized Box Trainer Setup Retrofitted with Optical Reflective Markers}
\label{fig:boxTrainer}
\end{figure}
\end{frame}

\begin{frame}{Results}
\begin{table}
\begin{tabular}{| c| c| c| }\hline
 Features &  Orientation Error & Opening Angle \\ \hline
HOG& 2.42  & 2.49\\ \hline
  LBP & 1.92 & 2.48\\ \hline
\end{tabular}
\caption{Tool pose estimate angular accuracy in degrees using different visual features}
\end{table}

\begin{figure}
\centering
\includegraphics[width = 0.5\columnwidth,trim = 20mm 80mm 40mm 80mm,clip]{figures/surgical/kalman_filter}
\caption{True, Regression mean estimate and filtered estimates for tool opening angle} \label{fig:kalman_filter}
\end{figure}

\end{frame}

\begin{frame}{Visual Results}
\begin{figure}
\centering
\subfloat{\includegraphics[width = 0.2\linewidth,trim = 32mm 10mm 70mm 30mm,clip]{figures/dark_ims/23_16_20_128}}
\subfloat{\includegraphics[width = 0.2\linewidth,trim = 32mm 10mm 70mm 30mm,clip]{figures/dark_ims/23_16_25_966}}
\subfloat{\includegraphics[width = 0.2\linewidth,trim = 32mm 10mm 70mm 30mm,clip]{figures/dark_ims/23_16_30_623}}
\subfloat{\includegraphics[width = 0.2\linewidth,trim = 32mm 10mm 70mm 30mm,clip]{figures/dark_ims/23_16_31_151}}
\subfloat{\includegraphics[width = 0.2\linewidth,trim = 32mm 10mm 70mm 30mm,clip]{figures/dark_ims/23_16_37_728}}\\
\vspace{-10pt}
\subfloat{\includegraphics[width = 0.20\linewidth,trim = 32mm 15mm 102mm 30mm,clip]{figures/vr/scale_gt_dark_lbp_left/frame_52}}
\subfloat{\includegraphics[width = 0.20\linewidth,trim = 32mm 15mm 102mm 30mm,clip]{figures/vr/scale_gt_dark_lbp_left/frame_139}}
\subfloat{\includegraphics[width = 0.20\linewidth,trim = 32mm 15mm 102mm 30mm,clip]{figures/vr/scale_gt_dark_lbp_left/frame_209}}
\subfloat{\includegraphics[width = 0.20\linewidth,trim = 32mm 15mm 102mm 30mm,clip]{figures/vr/scale_gt_dark_lbp_left/frame_217}}
\subfloat{\includegraphics[width = 0.20\linewidth,trim = 32mm 15mm 102mm 30mm,clip]{figures/vr/scale_gt_dark_lbp_left/frame_285}}\\
\vspace{-10pt}
\subfloat{\includegraphics[width = 0.20\linewidth,trim = 32mm 15mm 102mm 30mm,clip]{figures/vr/scale_gt_dark_lbp_right/frame_52}}
\subfloat{\includegraphics[width = 0.20\linewidth,trim = 32mm 15mm 102mm 30mm,clip]{figures/vr/scale_gt_dark_lbp_right/frame_139}}
\subfloat{\includegraphics[width = 0.20\linewidth,trim = 32mm 15mm 102mm 30mm,clip]{figures/vr/scale_gt_dark_lbp_right/frame_209}}
\subfloat{\includegraphics[width = 0.20\linewidth,trim = 32mm 15mm 102mm 30mm,clip]{figures/vr/scale_gt_dark_lbp_right/frame_217}}
\subfloat{\includegraphics[width = 0.20\linewidth,trim = 32mm 15mm 102mm 30mm,clip]{figures/vr/scale_gt_dark_lbp_right/frame_285}}\\
%\caption{Representative video frames for a ``dark" sequence in the collected dataset obtained using GPR to estimate tool opening angle. First row shows the image frame, second row shows the orientation and opening angle of the left tool and the third row shows the orientation and opening angle of the right tool using LBP features.}
%\label{fig:dark_results_pose}
\end{figure}

\end{frame}

\begin{frame}{Conclusion}
\begin{itemize}
\item Real-time method to predict tool pose using generic visual features
%that are not specific to any tool and environment setting.
\item Robust variance estimates along with mean predictions
\item Variance estimate is demonstrated to be useful for filtering
\item Experimental results using a customized box trainer demonstrate good tool pose prediction 
\end{itemize}
\end{frame}

\begin{frame}{Publications}
\begin{block}{Journal}
\begin{enumerate}
\item \textbf{S. Kumar}, J. Sovizi, V. Krovi, ``Error Propagation on SE(3) for Surgical Tool Pose Filtering" (In Preparation)
\item P. Agarwal, \textbf{S. Kumar}, J. Ryde, J. Corso, and V. Krovi, ``Estimating Dynamics On-the-fly Using Monocular Video For Vision-Based Robotics", IEEE/ASME Transactions on Mechatronics, 2013.
\end{enumerate}
\end{block}

\begin{block}{Conference}
\begin{enumerate}
\item \textbf{S. Kumar}, J. Sovizi, M. S. Narayanan, V. Krovi, ``Surgical Tool Pose Estimation from Monocular Endoscopic Videos", IEEE International Conference on Robotics and Automation (ICRA), 2015
\item P. Agarwal, \textbf{S. Kumar}, J. Ryde, J. Corso, and V. Krovi,
``Estimating Human Dynamics On-the-fly Using Monocular Video for Pose
Estimation", Robotics: Science and Systems VIII, 2013
\end{enumerate}
\end{block}
\end{frame}

\begin{frame}{References}
    \begin{itemize}
        \item Gaussian Processes for Regression: A Quick Introduction, M. Ebden
        \item Gaussian Processes for Machine Learning, Carl Edward Rasmussen and Chris Williams, the MIT Press, 2006 
        \end{itemize}
\end{frame}