In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as tck
import hypothesis as h
from plotting_tools import make_dirs_safe
import time
plt.rcParams['figure.figsize'] = 10, 5

# Analysis of the rank of the constrain matrix

In this notebook we use a little rearanged definitions of the left and right parts of matrix:

Let us write $\pmb{f_n}^\top$ as $[1\ \pmb{g_n}^\top]^\top$, and similarly
$\tilde{\pmb{a}}_m^\top = [\pmb{a_m}^\top 1]^\top$. We can then 
write the whole constraint vector as:
$$\begin{bmatrix}
\text{vect}(\tilde{\pmb{a}}_m\pmb{f}_n^\top)^\top & \text{vect}( 
\pmb{g_n} \pmb{f_n}^\top)^\top
\end{bmatrix}$$
Then we have the needed constrains on the left, and constrains added due to the 
regularisation on the right.
From the Lemmas from the paper we know that we can write:
$$\text{vect}(\pmb{f_n f_n^\top}) = \pmb{R} \pmb{f_n^e},$$
where $\pmb{R}$ is some (sparse) matrix, and $\pmb{f_n^e}$ is a vector 
similar in structure to $\pmb{f_n}$, and can be written as a concatenation of  $\pmb{f_n}$ and $\pmb{f}_n^{r}$.
This means we can write our full system of equations as:
\begin{align}
b &=
\begin{bmatrix}
\text{vect}(\pmb{a_m f_n}^\top)^\top & (\pmb{f_n^e})^\top
\end{bmatrix}
\begin{bmatrix}
\pmb{C} \\
\pmb{R^\top L} \\\end{bmatrix}\\
\end{align}
Or using $\tilde{\pmb{a}}_m$:
\begin{align}
b &=
\begin{bmatrix}
\text{vect}(\tilde{\pmb{a}}_m \pmb{f}_n^\top)^\top & \pmb{f}_n^{r\top}
\end{bmatrix}
\begin{bmatrix}
\pmb{C} \\
\pmb{R}^\top \pmb{L} \\\end{bmatrix}
\end{align}

And so for us left hand side refers to $\text{vect}(\tilde{\pmb{a}}_m \pmb{f}_n^\top)^\top$ and right hand side refers to $\pmb{f}_n^{r\top}$.


### Rank vs number of measurements
I those experiments, the number of dimentions $D$, number of constrains $K$ and number of positions $N$ is fixed,
and for several different ranks the number of measurements is increasted.

In [None]:
n_dimensions = 2
n_constrains = 5
n_positions = 20
min_positions = 10
n_repetitions = 100
directory = "results/ranks/"

key = "_d{}_c{}_p{}".format(n_dimensions, n_constrains, n_positions)

frame = h.get_frame(n_constrains, n_positions)
n_anchors_list = [n_dimensions + 1, 2*n_dimensions, 4*n_dimensions, 8*n_dimensions, 100*n_dimensions]
measuremets_range = list(range(n_dimensions * n_constrains, (n_dimensions + 1) * min_positions + 1))

ranks = np.zeros((len(measuremets_range), len(n_anchors_list), n_repetitions))
for a_idx, n_anchors in enumerate(n_anchors_list):
    anchors = h.get_anchors(n_anchors, n_dimensions)
    for m_idx, n_measurements in enumerate(measuremets_range):
        for r in range(n_repetitions):
            idx_a, idx_f = h.random_indexes(n_anchors, n_positions, n_measurements)
            constrains = h.get_left_submatrix(idx_a, idx_f, anchors, frame)
            ranks[m_idx, a_idx, r] = np.linalg.matrix_rank(constrains)

In [None]:
max_rank = (n_dimensions + 1) * n_constrains
stop = 3 * max_rank
n_repetitions = ranks.shape[2]
x = np.array(measuremets_range) / max_rank

f, ax = plt.subplots()
for a_idx, n_anchors in enumerate(n_anchors_list):
    plt.plot(
        x,
        np.mean(ranks[:, a_idx, :], axis=1) / max_rank,
        label="mean rank, {} anchors".format(n_anchors),
        color="C{}".format(a_idx),
        linestyle='dashed')
    plt.step(
        x,
        np.sum(ranks[:, a_idx, :] >= max_rank, axis=1) / n_repetitions,
        label="probability, {} anchors".format(n_anchors),
        color="C{}".format(a_idx),
        where='post')
plt.xlabel("number of measurements")
plt.grid()
ax.xaxis.set_major_formatter(tck.FormatStrFormatter('%g (D+1)K'))
ax.xaxis.set_major_locator(tck.MultipleLocator(base=1))
plt.legend()
fname = directory + "left_matrix_anchors" + key + ".pdf"
make_dirs_safe(fname)
plt.savefig(fname)
plt.show()

### Rank vs number of positions
Here the number of dimentions $D$, the number of constrains $K$ and the total number of measrurements are fixed.
In particular, the number of measurements is $(D+1)K$, and we increase total number of sampling positions along the trajectory. We can see drastically different behaviour for exactly $D+1$ anchors than for more than $D+1$ anchors.

In [None]:
n_dimensions = 2
n_constrains = 5
n_anchors = 3
min_positions = 5
n_repetitions = 100
directory = "results/ranks/"

n_anchors_list = [n_dimensions + 1, n_dimensions + 2, n_dimensions + 3, n_dimensions + 4, n_dimensions + 20]
n_measurements = (n_dimensions + 1) * n_constrains
n_positions_list = list(range(min_positions, 10*min_positions+1))

key = "_d{}_c{}_m{}".format(n_dimensions, n_constrains, n_measurements)

start = time.time()
ranks = np.zeros((len(n_positions_list), len(n_anchors_list), n_repetitions))
for a_idx, n_anchors in enumerate(n_anchors_list):
    anchors = h.get_anchors(n_anchors, n_dimensions)
    for p_idx, n_positions in enumerate(n_positions_list):
        for r in range(n_repetitions):
            frame = h.get_frame(n_constrains, n_positions)
            idx_a, idx_f = h.random_indexes(n_anchors, n_positions, n_measurements)
            constrains = h.get_left_submatrix(idx_a, idx_f, anchors, frame)
            ranks[p_idx, a_idx, r] = np.linalg.matrix_rank(constrains)
end = time.time()
print("elapsed time: {:.2f}s".format(end - start))

In [None]:
max_rank = (n_dimensions + 1) * n_constrains

f, ax = plt.subplots()
for idx, n_anchors in enumerate(n_anchors_list):
    plt.plot(
        n_positions_list,
        np.mean(ranks[:, idx, :], axis=-1) / max_rank,
        label="mean rank, {} anchors".format(n_anchors),
        color="C{}".format(idx),
        linestyle='dashed')
    plt.step(
        n_positions_list,
        np.mean(ranks[:, idx, :] >= max_rank, axis=-1),
        label="probability, {} anchors".format(n_anchors),
        color="C{}".format(idx),
        where='post')
plt.xlabel("number of positions")
# plt.ylim(0)
plt.grid()
plt.legend()
fname = directory + "left_matrix_points2" + key + ".pdf"
make_dirs_safe(fname)
plt.savefig(fname)
plt.show()

### Theory vs experiments

Comparison between theoretical and simulated probabilties for $D+1$ anchros. 

#### Theory
For exactly $D+1$ anchors, each anchor is 
"responsible" for a direction. Indeed, if we change the frame to the basis 
defined by our anchors, it will change the coefficients $\pmb{C}$ we want to 
recover, but won't change $\pmb{f}_n$s. But then the constrain matrix will 
have a form:
$$\begin{bmatrix}
\pmb{f_{n_1}} & \pmb{0} & \pmb{0} \\
\pmb{0} & \pmb{f_{n_2}} & \pmb{0} \\
\pmb{0} & \pmb{0} & \pmb{f_{n_3}}\\
\pmb{f_{n_4}} & \pmb{0} & \pmb{0} \\
\pmb{0} & \pmb{f_{n_5}} & \pmb{0} \\
\vdots & \vdots & \vdots \\ 
\pmb{0} & \pmb{0} & \pmb{f_{n_{(D+1)K}}}\\
\end{bmatrix},
$$
where $\pmb{0}$ is a vector of zeros of length $K$. Then we can see that 
in order to diagonalise the matrix, we need at least $K$ different 
$\pmb{f}_n$s for each $K$. Since $\pmb{f_n}$s do not repeat for the 
same anchor, this is sufficient and necessary condition. 

We can then calculate what is the probability of such situation. Assume that  
we have $R = (D+1)K$ measurements, $M = D+1$ anchors and $N$ possible 
$f_n$s. We can pick $(D+1)K$ measurements out of $(D+1)N$ in:
$${(D+1)N}\choose{(D+1)K}$$
ways. On the other hand, there is:
$${{N}\choose{K}}^{D+1}$$
ways choose the $K$ measurements corresponding to each anchor.
So the probability of having the right number of measurements for each anchor 
is:
$$
P_{N} = {{{N}\choose{K}}^{D+1}}\bigg/ {{{(D+1)N}\choose{(D+1)K}}}
$$
Note that for $N=K$ we just get $P_{K}=1$.

Using approximation formulas from Wikipedia, get a limit for large $N$:
\begin{align*}
P_{N\rightarrow \infty} &\approx \frac{\sqrt{D+1}}{\sqrt{2\pi 
K}^{D}}
\end{align*}

In [None]:
n_dimensions = 2
n_constrains = 5
n_anchors = 3
min_positions = 5
n_repetitions = 1000
directory = "results/ranks/"

n_anchors_list = [n_dimensions + 1]
n_measurements = (n_dimensions + 1) * n_constrains
n_positions_list = list(range(n_constrains, 10*n_constrains))

key = "_d{}_c{}_m{}".format(n_dimensions, n_constrains, n_measurements)

start = time.time()
ranks = np.zeros((len(n_positions_list), len(n_anchors_list), n_repetitions))
for a_idx, n_anchors in enumerate(n_anchors_list):
    anchors = h.get_anchors(n_anchors, n_dimensions)
    for p_idx, n_positions in enumerate(n_positions_list):
        for r in range(n_repetitions):
            frame = h.get_frame(n_constrains, n_positions)
            idx_a, idx_f = h.random_indexes(n_anchors, n_positions, n_measurements)
            constrains = h.get_left_submatrix(idx_a, idx_f, anchors, frame)
            ranks[p_idx, a_idx, r] = np.linalg.matrix_rank(constrains)
end = time.time()
print("elapsed time: {:.2f}s".format(end - start))

probabilities = [h.probability_few_anchors(n_dimensions, n_constrains, n) for n in n_positions_list]
probabilities = np.array(probabilities)

In [None]:
max_rank = (n_dimensions + 1) * n_constrains

f, ax = plt.subplots()
idx = 0
n_anchors = n_anchors_list[idx]
mean =  np.mean(ranks[:, idx, :] >= max_rank, axis=-1)
std = np.std(ranks[:, idx, :] >= max_rank, axis=-1)
limit = h.probability_few_anchors_limit(n_dimensions, n_constrains)

plt.plot(
    n_positions_list,
    mean,
#     where='post',
    label="estimated probability")
ax.fill_between(
    n_positions_list,
    np.maximum(mean-std, 0),
    mean+std,
    alpha=0.2,
    label="estimated +/- std")
plt.plot(
    n_positions_list,
    probabilities,
#     where='post',
    label="calculated probability")
std_true = np.sqrt(probabilities - probabilities ** 2)
ax.fill_between(
    n_positions_list,
    np.maximum(probabilities - std_true, 0),
    probabilities + std_true,
    alpha=0.2,
    label="calcualted +/- std")
ax.axhline(
    limit,
    linestyle="--",
    color="g",
    label=r"limit for $positoins\rightarrow \infty$")
plt.xlabel("number of positions")
# plt.ylim(0)
plt.grid()
plt.legend()
plt.savefig(directory + "estimated_claculated_100" + key + ".pdf")
plt.show()