## Definitions

### Observables
+ time <span style="background-color: #F5F5F5">$t_{k}$</span>
+ observable <span style="background-color: #F5F5F5">$y_{k}$</span>

### Latent Variables
+ Continuous state <span style="background-color: #F5F5F5">$x_{k}$</span>
+ Discrete state <span style="background-color: #F5F5F5">$I_{k}$</span>

## Goal
+ Compute joint filter distribution $p(x_{k}, I_{k} \mid y_{1:k})$

## Filter Derivation

1. Use Bayes theorem to define the posterior in terms of a data-derived likelihood and a prior ($ posterior \propto likelihood * prior $)
$$
p(x_{k}, I_{k} \mid y_{1:k}) \propto p(y_{k} \mid x_{k}, I_{k}) * p(x_{k}, I_{k} \mid y_{1:k-1})
$$

2. Use Chapman-Kolmogorov to define prior in terms of previous time step's posterior and state transitions
$$
\begin{align*}
p(x_{k}, I_{k} \mid y_{1:k-1}) &= \sum_{I_{k-1}} \int p(x_{k}, x_{k-1}, I_{k}, I_{k-1} \mid y_{1:k-1}) * dx_{k-1} \tag{Marg.}
\\ &= \sum_{I_{k-1}} \int p(x_{k}, I_{k} \mid x_{k-1}, I_{k-1}, y_{k-1}) * p(x_{k-1}, I_{k-1} \mid y_{1:k-1}) * dx_{k-1} \tag{Cond. Prob.}
\\ &= \sum_{I_{k-1}} \int p(x_{k}, I_{k} \mid x_{k-1}, I_{k-1}) * p(x_{k-1}, I_{k-1} \mid y_{1:k-1}) * dx_{k-1} \tag{Markov}
\\ &= \sum_{I_{k-1}} \int p(x_{k} \mid x_{k-1}, I_{k}, I_{k-1}) * Pr(I_{k} \mid I_{k-1}, x_{k-1}) * p(x_{k-1}, I_{k-1} \mid y_{1:k-1}) * dx_{k-1} \tag{Cond. Prob.}
\end{align*}$$

    where:
    + $p(x_{k-1}, I_{k-1} \mid y_{1:k-1})$ is the previous posterior
    + $Pr(I_{k} \mid I_{k-1}, x_{k-1})$ is the discrete state transition
    + $p(x_{k} \mid x_{k-1}, I_{k}, I_{k-1})$ is the continuous state transition


3. Final Filter
$$
p(x_{k}, I_{k} \mid y_{1:k}) \propto p(y_{k} \mid x_{k}, I_{k}) * \sum_{I_{k-1}} \int p(x_{k} \mid x_{k-1}, I_{k}, I_{k-1}) * Pr(I_{k} \mid I_{k-1}, x_{k-1}) * p(x_{k-1}, I_{k-1} \mid y_{1:k-1}) * dx_{k-1}
$$

## Smoother Derivation

1. 
$$
\begin{align*}
p(x_{k}, I_{k} \mid y_{1:T}) &= \sum_{I_{k+1}} \int p(x_{k}, x_{k+1}, I_{k}, I_{k+1} \mid y_{1:T}) * dx_{k+1} \tag{Marg.}
\\ &= \sum_{I_{k+1}} \int p(x_{k}, I_{k} \mid x_{k+1}, I_{k+1}, y_{1:T}) * p(x_{k+1}, I_{k+1} \mid y_{1:T}) * dx_{k+1} \tag{Cond. Prob.}
\\ &= \sum_{I_{k+1}} \int p(x_{k}, I_{k} \mid x_{k+1}, I_{k+1}, y_{1:k}) * p(x_{k+1}, I_{k+1} \mid y_{1:T}) * dx_{k+1} \tag{Markov}
\\ &= \sum_{I_{k+1}} \int \frac{p(x_{k}, I_{k}, x_{k+1}, I_{k+1} \mid y_{1:k})}{p(x_{k+1}, I_{k+1} \mid y_{1:k})} * p(x_{k+1}, I_{k+1} \mid y_{1:T}) * dx_{k+1} \tag{Cond. Prob.}
\\ &= \sum_{I_{k+1}} \int \frac{p(x_{k+1}, I_{k+1} \mid x_{k}, I_{k}, y_{1:k}) * p(x_{k}, I_{k} \mid y_{1:k})}{p(x_{k+1}, I_{k+1} \mid y_{1:k})} * p(x_{k+1}, I_{k+1} \mid y_{1:T}) * dx_{k+1} \tag{Cond. Prob.}
\\ &= \sum_{I_{k+1}} \int \frac{p(x_{k+1}, I_{k+1} \mid x_{k}, I_{k}) * p(x_{k}, I_{k} \mid y_{1:k})}{p(x_{k+1}, I_{k+1} \mid y_{1:k})} * p(x_{k+1}, I_{k+1} \mid y_{1:T}) * dx_{k+1} \tag{Markov}
\\ &= p(x_{k}, I_{k} \mid y_{1:k}) * \sum_{I_{k+1}} \int \frac{p(x_{k+1}, I_{k+1} \mid x_{k}, I_{k})}{p(x_{k+1}, I_{k+1} \mid y_{1:k})} * p(x_{k+1}, I_{k+1} \mid y_{1:T}) * dx_{k+1} 
\\ &= p(x_{k}, I_{k} \mid y_{1:k}) * \sum_{I_{k+1}} \int \frac{p(x_{k+1} \mid x_{k}, I_{k+1}, I_{k}) * Pr(I_{k+1} \mid I_{k}, x_{k})}{p(x_{k+1}, I_{k+1} \mid y_{1:k})} * p(x_{k+1}, I_{k+1} \mid y_{1:T}) * dx_{k+1}
\end{align*}$$

2. 
$$
\begin{align*}
p(x_{k+1}, I_{k+1} \mid y_{1:k}) &= \sum_{I_{k}} \int p(x_{k+1}, I_{k+1}, x_{k}, I_{k} \mid y_{1:k}) * dx_{k} \tag{Marg.}
\\ &= \sum_{I_{k}} \int p(x_{k+1}, I_{k+1} \mid x_{k}, I_{k}, y_{1:k}) * p(x_{k}, I_{k} \mid y_{1:k}) * dx_{k} \tag{Cond. Prob.}
\\ &= \sum_{I_{k}} \int p(x_{k+1}, I_{k+1} \mid x_{k}, I_{k}) * p(x_{k}, I_{k} \mid y_{1:k}) * dx_{k} \tag{Markov}
\\ &= \sum_{I_{k}} \int p(x_{k+1} \mid x_{k}, I_{k+1}, I_{k}) * Pr(I_{k+1} \mid I_{k}, x_{k}) * p(x_{k}, I_{k} \mid y_{1:k}) * dx_{k} \tag{Cond. Prob.}
\end{align*}$$

## Replay Trajectory Classification

Let:
+ the likelihood be the same for all discrete states $p(y_{k} \mid x_{k}, I_{k}) = p(y_{k} \mid x_{k})$
+ the continuous state transition depend only on the current discrete state $p(x_{k} \mid x_{k-1}, I_{k}, I_{k-1}) = p(x_{k} \mid x_{k-1}, I_{k})$
+ and the discrete state transition depend on only the previous state $Pr(I_{k} \mid I_{k-1}, x_{k-1}) = Pr(I_{k} \mid I_{k-1})$.

Then the final filter simplifies to:
$$
p(x_{k}, I_{k} \mid y_{1:k}) \propto p(y_{k} \mid x_{k}) * \sum_{I_{k-1}} \int p(x_{k} \mid x_{k-1}, I_{k}) * Pr(I_{k} \mid I_{k-1}) * p(x_{k-1}, I_{k-1} \mid y_{1:k-1}) * dx_{k-1}
$$

Define the state transition as:
$$
  \begin{equation}
    p(x_{k} \mid x_{k-1}, I_{k}) =
    \begin{cases}
      U(a, b), & \text{if}\ I_{k} = 1 \\
      \mathbb{1}, & \text{if}\ I_{k} = 2 \\
      N(x_{k-1}, \sigma), & \text{if}\ I_{k} = 3 \\
    \end{cases}
  \end{equation}
$$

### Computing the `predict` step

+ $I_{k} = 1$
$$
\sum_{I_{k-1}} \int p(x_{k} \mid x_{k-1}, I_{k}=1) * Pr(I_{k}=1 \mid I_{k-1}) * p(x_{k-1}, I_{k-1} \mid y_{1:k-1}) * dx_{k-1}$$
$$
= U(a, b) * \sum_{I_{k-1}} \int Pr(I_{k}=1 \mid I_{k-1}) * p(x_{k-1}, I_{k-1} \mid y_{1:k-1}) * dx_{k-1}  
$$
$$\begin{multline*}
= U(a, b) * \big[ Pr(I_{k}=1 \mid I_{k-1}=1) * \int p(x_{k-1}, I_{k-1}=1 \mid y_{1:k-1}) * dx_{k-1}\\ + Pr(I_{k}=1 \mid I_{k-1}=2) * \int p(x_{k-1}, I_{k-1}=2 \mid y_{1:k-1}) * dx_{k-1}\\ + Pr(I_{k}=1 \mid I_{k-1}=3) * \int p(x_{k-1}, I_{k-1}=3 \mid y_{1:k-1}) * dx_{k-1} \big]
\end{multline*}$$
$$\begin{multline*}
= U(a, b) * \big[ Pr(I_{k}=1 \mid I_{k-1}=1) * Pr(I_{k-1}=1 \mid y_{1:k-1})\\ + Pr(I_{k}=1 \mid I_{k-1}=2) * Pr(I_{k-1}=2 \mid y_{1:k-1})\\ + Pr(I_{k}=1 \mid I_{k-1}=3) * Pr(I_{k-1}=3 \mid y_{1:k-1}) \big]
\end{multline*}$$

+ $I_{k} = 2$
$$
\sum_{I_{k-1}} \int p(x_{k} \mid x_{k-1}, I_{k}=2) * Pr(I_{k}=1 \mid I_{k-1}) * p(x_{k-1}, I_{k-1} \mid y_{1:k-1}) * dx_{k-1}
$$
$$
= \sum_{I_{k-1}} \int Pr(I_{k}=2 \mid I_{k-1}) * p(x_{k-1}, I_{k-1} \mid y_{1:k-1}) * dx_{k-1}  
$$
$$\begin{multline*}
= Pr(I_{k}=2 \mid I_{k-1}=1) * Pr(I_{k-1}=1 \mid y_{1:k-1})\\ + Pr(I_{k}=2 \mid I_{k-1}=2) * Pr(I_{k-1}=2 \mid y_{1:k-1})\\ + Pr(I_{k}=2 \mid I_{k-1}=3) * Pr(I_{k-1}=3 \mid y_{1:k-1})
\end{multline*}$$
+ $I_{k} = 3$
$$
\sum_{I_{k-1}} \int p(x_{k} \mid x_{k-1}, I_{k}=3) * Pr(I_{k}=3 \mid I_{k-1}) * p(x_{k-1}, I_{k-1} \mid y_{1:k-1}) * dx_{k-1}
$$
$$
=\sum_{I_{k-1}} \int N(x_{k-1}, \sigma) * Pr(I_{k}=3 \mid I_{k-1}) * p(x_{k-1}, I_{k-1} \mid y_{1:k-1}) * dx_{k-1}
$$
$$
=\sum_{I_{k-1}} \big[ Pr(I_{k}=3 \mid I_{k-1}) * \int N(x_{k-1}, \sigma) * p(x_{k-1}, I_{k-1} \mid y_{1:k-1}) * dx_{k-1} \big]
$$

### Smoother `predict` step

Define the state transition as:
$$
  \begin{equation}
    p(x_{k+1} \mid x_{k}, I_{k+1}) =
    \begin{cases}
      U(a, b), & \text{if}\ I_{k+1} = 1 \\
      \mathbb{1}, & \text{if}\ I_{k+1} = 2 \\
      N(x_{k}, \sigma), & \text{if}\ I_{k+1} = 3 \\
    \end{cases}
  \end{equation}
$$

Then the predict step for the smoother is:
$$
p(x_{k+1}, I_{k+1} \mid y_{1:k}) = \sum_{I_{k}} \int p(x_{k+1} \mid x_{k}, I_{k
+1}) * Pr(I_{k+1} \mid I_{k}) * p(x_{k}, I_{k} \mid y_{1:k}) * dx_{k}
$$

+ $p(x_{k+1}, I_{k+1}=1 \mid y_{1:k})$
$$
= \sum_{I_{k}} \int p(x_{k+1} \mid x_{k}, I_{k+1}=1) * Pr(I_{k+1}=1 \mid I_{k}) * p(x_{k}, I_{k} \mid y_{1:k}) * dx_{k}
$$
$$
= U(a,b) * \sum_{I_{k}} \int Pr(I_{k+1}=1 \mid I_{k}) * p(x_{k}, I_{k} \mid y_{1:k}) * dx_{k}
$$
$$\begin{multline*}
= U(a,b) * \big[ Pr(I_{k+1}=1 \mid I_{k}=1) * \int p(x_{k}, I_{k}=1 \mid y_{1:k}) * dx_{k} \\ + Pr(I_{k+1}=1 \mid I_{k}=2) * \int p(x_{k}, I_{k}=2 \mid y_{1:k}) * dx_{k}\\ + Pr(I_{k+1}=1 \mid I_{k}=3) * \int p(x_{k}, I_{k}=3 \mid y_{1:k}) * dx_{k} \big]
\end{multline*}$$
$$\begin{multline*}
= U(a,b) * \big[ Pr(I_{k+1}=1 \mid I_{k}=1) * Pr(I_{k}=1 \mid y_{1:k}) \\ + Pr(I_{k+1}=1 \mid I_{k}=2) * Pr(I_{k}=2 \mid y_{1:k})\\ + Pr(I_{k+1}=1 \mid I_{k}=3) * Pr(I_{k}=3 \mid y_{1:k}) \big]
\end{multline*}$$

+ $p(x_{k+1}, I_{k+1}=2 \mid y_{1:k})$
$$
= \sum_{I_{k}} \int p(x_{k+1} \mid x_{k}, I_{k+1}=2) * Pr(I_{k+1}=2 \mid I_{k}) * p(x_{k}, I_{k} \mid y_{1:k}) * dx_{k}
$$
$$
= \sum_{I_{k}} \int Pr(I_{k+1}=2 \mid I_{k}) * p(x_{k}, I_{k} \mid y_{1:k}) * dx_{k}
$$
$$\begin{multline*}
= Pr(I_{k+1}=2 \mid I_{k}=1) * Pr(I_{k}=1 \mid y_{1:k}) \\ + Pr(I_{k+1}=2 \mid I_{k}=2) * Pr(I_{k}=2 \mid y_{1:k})\\ + Pr(I_{k+1}=2 \mid I_{k}=3) * Pr(I_{k}=3 \mid y_{1:k})
\end{multline*}$$

+ $p(x_{k+1}, I_{k+1}=3 \mid y_{1:k})$
$$
= \sum_{I_{k}} \int p(x_{k+1} \mid x_{k}, I_{k+1}=3) * Pr(I_{k+1}=3 \mid I_{k}) * p(x_{k}, I_{k} \mid y_{1:k}) * dx_{k}
$$
$$
= \sum_{I_{k}} \big[ Pr(I_{k+1}=3 \mid I_{k}) * \int N(x_{k}, \sigma) * p(x_{k}, I_{k} \mid y_{1:k}) * dx_{k} \big]
$$

### Smoother `Backwards Update` step

$$
\sum_{I_{k+1}} \int \frac{p(x_{k+1} \mid x_{k}, I_{k+1}) * Pr(I_{k+1} \mid I_{k})}{p(x_{k+1}, I_{k+1} \mid y_{1:k})} * p(x_{k+1}, I_{k+1} \mid y_{1:T}) * dx_{k+1}
$$

+ $I_{k} = 1$
$$
\sum_{I_{k+1}} \int \frac{p(x_{k+1} \mid x_{k}, I_{k+1}) * Pr(I_{k+1} \mid I_{k}=1)}{p(x_{k+1}, I_{k+1} \mid y_{1:k})} * p(x_{k+1}, I_{k+1} \mid y_{1:T}) * dx_{k+1}
$$
$$\begin{multline*}
= \int \frac{p(x_{k+1} \mid x_{k}, I_{k+1}=1) * Pr(I_{k+1}=1 \mid I_{k}=1)}{p(x_{k+1}, I_{k+1}=1 \mid y_{1:k})} * p(x_{k+1}, I_{k+1}=1 \mid y_{1:T}) * dx_{k+1}\\ + \int \frac{p(x_{k+1} \mid x_{k}, I_{k+1}=2) * Pr(I_{k+1}=2 \mid I_{k}=1)}{p(x_{k+1}, I_{k+1}=2 \mid y_{1:k})} * p(x_{k+1}, I_{k+1}=2 \mid y_{1:T}) * dx_{k+1}\\ + \int \frac{p(x_{k+1} \mid x_{k}, I_{k+1}=3) * Pr(I_{k+1}=3 \mid I_{k}=1)}{p(x_{k+1}, I_{k+1}=3 \mid y_{1:k})} * p(x_{k+1}, I_{k+1}=3 \mid y_{1:T}) * dx_{k+1}
\end{multline*}$$
$$\begin{multline*}
= \int \frac{U(a,b) * Pr(I_{k+1}=1 \mid I_{k}=1)}{p(x_{k+1}, I_{k+1}=1 \mid y_{1:k})} * p(x_{k+1}, I_{k+1}=1 \mid y_{1:T}) * dx_{k+1}\\ + \int \frac{Pr(I_{k+1}=2 \mid I_{k}=1)}{p(x_{k+1}, I_{k+1}=2 \mid y_{1:k})} * p(x_{k+1}, I_{k+1}=2 \mid y_{1:T}) * dx_{k+1}\\ + \int \frac{N(x_{k}, \sigma) * Pr(I_{k+1}=3 \mid I_{k}=1)}{p(x_{k+1}, I_{k+1}=3 \mid y_{1:k})} * p(x_{k+1}, I_{k+1}=3 \mid y_{1:T}) * dx_{k+1}
\end{multline*}$$

+ $I_{k} = 2$
$$
\sum_{I_{k+1}} \int \frac{p(x_{k+1} \mid x_{k}, I_{k+1}) * Pr(I_{k+1} \mid I_{k}=2)}{p(x_{k+1}, I_{k+1} \mid y_{1:k})} * p(x_{k+1}, I_{k+1} \mid y_{1:T}) * dx_{k+1}
$$
$$\begin{multline*}
= \int \frac{p(x_{k+1} \mid x_{k}, I_{k+1}=1) * Pr(I_{k+1}=1 \mid I_{k}=2)}{p(x_{k+1}, I_{k+1}=1 \mid y_{1:k})} * p(x_{k+1}, I_{k+1}=1 \mid y_{1:T}) * dx_{k+1}\\ + \int \frac{p(x_{k+1} \mid x_{k}, I_{k+1}=2) * Pr(I_{k+1}=2 \mid I_{k}=2)}{p(x_{k+1}, I_{k+1}=2 \mid y_{1:k})} * p(x_{k+1}, I_{k+1}=2 \mid y_{1:T}) * dx_{k+1}\\ + \int \frac{p(x_{k+1} \mid x_{k}, I_{k+1}=3) * Pr(I_{k+1}=3 \mid I_{k}=2)}{p(x_{k+1}, I_{k+1}=3 \mid y_{1:k})} * p(x_{k+1}, I_{k+1}=3 \mid y_{1:T}) * dx_{k+1}
\end{multline*}$$
$$\begin{multline*}
= \int \frac{U(a,b) * Pr(I_{k+1}=1 \mid I_{k}=2)}{p(x_{k+1}, I_{k+1}=1 \mid y_{1:k})} * p(x_{k+1}, I_{k+1}=1 \mid y_{1:T}) * dx_{k+1}\\ + \int \frac{Pr(I_{k+1}=2 \mid I_{k}=2)}{p(x_{k+1}, I_{k+1}=2 \mid y_{1:k})} * p(x_{k+1}, I_{k+1}=2 \mid y_{1:T}) * dx_{k+1}\\ + \int \frac{N(x_{k}, \sigma) * Pr(I_{k+1}=3 \mid I_{k}=2)}{p(x_{k+1}, I_{k+1}=3 \mid y_{1:k})} * p(x_{k+1}, I_{k+1}=3 \mid y_{1:T}) * dx_{k+1}
\end{multline*}$$


+ $I_{k} = 3$
$$
\sum_{I_{k+1}} \int \frac{p(x_{k+1} \mid x_{k}, I_{k+1}) * Pr(I_{k+1} \mid I_{k}=3)}{p(x_{k+1}, I_{k+1} \mid y_{1:k})} * p(x_{k+1}, I_{k+1} \mid y_{1:T}) * dx_{k+1}
$$
$$\begin{multline*}
= \int \frac{p(x_{k+1} \mid x_{k}, I_{k+1}=1) * Pr(I_{k+1}=1 \mid I_{k}=3)}{p(x_{k+1}, I_{k+1}=1 \mid y_{1:k})} * p(x_{k+1}, I_{k+1}=1 \mid y_{1:T}) * dx_{k+1}\\ + \int \frac{p(x_{k+1} \mid x_{k}, I_{k+1}=2) * Pr(I_{k+1}=2 \mid I_{k}=3)}{p(x_{k+1}, I_{k+1}=2 \mid y_{1:k})} * p(x_{k+1}, I_{k+1}=2 \mid y_{1:T}) * dx_{k+1}\\ + \int \frac{p(x_{k+1} \mid x_{k}, I_{k+1}=3) * Pr(I_{k+1}=3 \mid I_{k}=3)}{p(x_{k+1}, I_{k+1}=3 \mid y_{1:k})} * p(x_{k+1}, I_{k+1}=3 \mid y_{1:T}) * dx_{k+1}
\end{multline*}$$
$$\begin{multline*}
= \int \frac{U(a,b) * Pr(I_{k+1}=1 \mid I_{k}=3)}{p(x_{k+1}, I_{k+1}=1 \mid y_{1:k})} * p(x_{k+1}, I_{k+1}=1 \mid y_{1:T}) * dx_{k+1}\\ + \int \frac{Pr(I_{k+1}=2 \mid I_{k}=3)}{p(x_{k+1}, I_{k+1}=2 \mid y_{1:k})} * p(x_{k+1}, I_{k+1}=2 \mid y_{1:T}) * dx_{k+1}\\ + \int \frac{N(x_{k}, \sigma) * Pr(I_{k+1}=3 \mid I_{k}=3)}{p(x_{k+1}, I_{k+1}=3 \mid y_{1:k})} * p(x_{k+1}, I_{k+1}=3 \mid y_{1:T}) * dx_{k+1}
\end{multline*}$$