# Consinstently estimating Markov Chains with Noisy Aggregated Data

## Notebook con esperimenti numerici per il seminario di fine corso di Metodi Numerici per le Catene di Markov

In [1]:
# Import some basic stuff
import numpy as np
import matplotlib.pyplot as plt
from utilities import P_mom_nonstationary, add_noise

In [2]:
# Number of repeated observations
K = 30
# Population size
N = 100
# Number of states
S = 20
# Number of timesteps
T = 2000

#### Scrappy trials:

In [14]:
y_t_array = generate_random_y_t_array(K=K, N=N, S=S)
y_t_array

array([[ 9.,  6.,  5.,  3.,  0.,  7.,  5.,  2.,  8.,  8.,  8.,  1.,  5.,
         0.,  6.,  7.,  2.,  7.,  3.,  8.],
       [ 2.,  0.,  4.,  2.,  6., 10.,  0.,  0.,  4.,  0., 10.,  9.,  5.,
         2.,  5.,  8.,  8., 12.,  3., 10.],
       [ 3.,  2.,  7.,  5.,  5.,  9.,  1.,  2.,  5.,  5.,  8.,  8.,  6.,
         0.,  9.,  4.,  5.,  0.,  6., 10.]])

In [None]:
P_true = np.random.rand(S,S)
for i in range(S):
    P_true[i,:] = P_true[i,:]/P_true[i,:].sum()

#### Less scrappy trials:

Let's generate the data

In [3]:
# True transition matrix
P = np.random.rand(S,S)
for i in range(S):
    P[i,:] = P[i,:]/P[i,:].sum()

# Initial distribution
pi_0 = np.random.rand(S)
pi_0 = pi_0/pi_0.sum()

Let's generate the observed data. We immediately generate the $K$ observations

In [14]:
mu_t = pi_0.T
n_t_vector = []
y_t_vector = []
for t in range(T):
    # create distribution of x_t
    mu_t = np.dot(mu_t,P)
    # create K observations of the observed data (multinomial draw)
    n_t = np.random.multinomial(n=N,pvals=mu_t, size=K)
    # create noisy observations
    y_t, _ = add_noise(n_t)
    # append the observations
    n_t_vector.append(n_t)
    y_t_vector.append(y_t)

**OSS**: `n_t_vector` and `y_t_vector` are lists of length $T$, the item in the list in position $t\in\{0,\dots,T-1\}$ is a $K\times S$ `np.ndarray` that contains the $K$ observations for timestep $t+1$ 

In [19]:
# Let's fix a value of t, as an example
t = 1000
P_mom_t = P_mom_nonstationary(y_t_array = y_t_vector[t-1],
                              y_tp1_array = y_t_vector[t], 
                              A_t = np.eye(S), 
                              A_tp1 = np.eye(S),
                              N = N)

In [20]:
P_mom_t.sum(axis=1)


array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1.])

In [21]:
np.linalg.norm(P_mom_t-P)

3.862222814883906