# IMPORTANT

Due to huge changes in the project, this notebook has been transformed into an archive file. 
See the new file main

# Packages installation

In [None]:
#install packages needed 
!pip install matplotlib
!pip install particles

# Importations

In [4]:
import matplotlib.pyplot as plt 
import numpy as np
import seaborn as sb
import scipy.stats as stats

# Modules from particles
import particles 
from particles import distributions as dists # Where proba distributions are defined
from particles import state_space_models as ssm # Where state-space-models are defined
from particles.collectors import Moments

# Creating the model

In [6]:
# Definition of the negative binomial 

class NegativeBinomiale(dists.DiscreteDist):
    
    """
    Negative binomial distribution
    Relies on scipy.stats
    https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.nbinom.html
    """
    
    def __init__(self, n = 1, p = 0.5):
        self.n = n 
        self.p = p
    
    # Random variable sampler
    def rvs(self, size = None):
        return stats.nbinom(self.n, self.p, size = size)
    
    # Log probability density (mass) function 
    def logpdf(self, x):
        return stats.nbinom.logpmf(x, self.n, self.p)
    
    # Percentile point function
    def ppf(self, u):
        return stats.nbinom.ppf(u, self.n, self.p)

In [None]:
class expression(ssm.StateSpaceModel):
    
    """
    # Work in progress. In the end, we should add default values of the parameters here. 
    default_params = {"eta":, "alpha":, "beta":, "beta_0", "gamma_u":, "gamma_d":,
                     "zeta":, "lamba_u":, "lambda_d": ,
                     "alpha_s":, "kappa_s":,
                     "epsilon_b":, "epsilon_0":, "b":}
    """
    
    # Initial distribution of the data (X0)
    def PX0(self): 
    
    
    # Transition kernel 
    def PX(self, t, xp): # Distribution of X_t given X_{t-1} = xp (p = past)
        
        # Here, X involves two things: u_t (0th index) and s_t (1th index)
        # Therefore, we have to define both 
        
        # For u_t 
        uni_x = dists.Uniform(a = 0, b = 1).rvs(size = 1) # Uniform to "choose a move"
        
        if xp[:,0] == 0 :
            if uni <= self.eta:
                # Dirac on xp = 0 
                u_t =  dists.Dirac(loc = 0)
            else:
                # Exponential law with rate zeta 
                u_t = dists.Gamma(1, zeta)
            
        elif xp[:,0] !=0:
            
            if uni <= alpha:
                # Dirac on xp
                u_t = dists.Dirac(loc = xp[:, 0])
                
            elif uni <= alpha + beta:
                # Exponential law with rate zeta 
                u_t = dists.Gamma(1, zeta)
            
            elif uni <= alpha + beta + beta_0:
                # Dirac on 0
                u_t = dists.Dirac(loc = 0)
            
            elif uni <= alpha + beta + beta_0 + gamma_u:
                # Drift upward
                u_t = dists.linearD(dists.Gamma(1, lambda_u/xp[:,0]), a = 1, b = xp[:,0])
            
            else:
                # Drift downward 
                u_t = dists.linearD(dists.InvD(dists.Gamma(1, lambda_d), a = 1, b =xp[:,0])
                                    
                
        #For s_t
        uni_s = dists.Uniform(a = 0, b = 1).rvs(size = 1) # Uniform to "choose" a move 
        
        if unis <= alpha_s:
            s_t = dists.Dirac(loc = xp[:,1])
        else:
            s_t = dists.Gamma(a = kappa_s, b = kappa_s)
        
        # Return the independant product of u_t and s_t
        return dists.IndeProd(u_t, s_t)
            
    # Emission model
    def PY(self, t, xp, x): # Distribution of Y_t given X_t = x (and possibly X_{t-1} =xp)
        mix = dists.Mixture([1 - epsilon_b - epsilon0, epsilon_b, epsilon_0],
                            dists.,
                            dists.,
                            dists.DiscreteUniform(lo = 0, hi = b+1) # "In practice, b is set to max(y)"
                            )
        return mix 
    
                                    
                                    
    # We should use the first expression (the one not involving the infinite sum), but involving x_t and a_t. 
    # And so we might need conditionnal distributions ? 
        
        

In [None]:
def abdist(xp):  # xp means X_{t-1}
    d = {'u': #Rewrite the big expression,
         's': #Rewrite the simple kernel,
         'a': dists.Gamma(kappa, 1/theta),
         'x': dists.Cond(lambda x: dists.Poisson(x["u"]*s[u]/(kappa*theta)), # Et il faudrait rajouter ces paramètres}
    return dists.StructDist(d)
                         

In [None]:
def usdist(xp):  # xp means X_{t-1}
    d = {'u': dists.Normal(loc=xp['a']),
         's': dists.Cond(lambda x: dists.Dirac(xp['b'] + x['a']))}
    return dists.StructDist(d)

class SillyModel(ssms.StateSpaceModel):
    def PX0(self):
        return abdist({'a': 0., 'b': 0.})
    def PX(self, t, xp):
        return abdist(xp)
    def PY(self, t, xp, x):
        return dists.Normal(loc=x['a'], scale=0.3)  # whatever


# Older

What we have to do 
1. **Describe the model using the pacakge particles.**
    - This means we have to "code" the initial law $X_0$ (which we will chose somehow, not that important), the Markov transition kernel of $X_t$ and the emission law of $Y_t$. The two last one are described thoroughly in the **supplementary materials** of the paper, but we might have technical difficulties for implementing it. 
    - Then, we will be able to *generate* observations using the model.
   
2. **Implement a particle filter with a fixed $\Theta$**
    - That is, recoverint $X_t$ based on $Y_t$, in a situation where all the parameter values are known.

3. **Implement two ways to do bayesian inference on the model.**
    - That is, estimate the values of the parameters.
    - **Implementation 1**: Particle Gibbs (like in the paper)
    - **Implementation 2**: PMMH (simpler to implement but harder to calibrate). 
    - And compare those two implementations.  

## About the distributions 

All probability distributions are not available in the package (see the documentation https://particles-sequential-monte-carlo-in-python.readthedocs.io/en/latest/distributions.html). However, it is possible to define **transformations** of distributions and, as such, to create new ones. The two next transformations will prove useful. 

If $X \sim \mathcal{U}(0,1)$ then $-\frac{1}{\lambda} log X \sim \mathcal{E}(\lambda)$ (here, $\lambda$ is the **rate**). 

If $Y \sim \mathcal{G}a(n, \frac{1-p}{p})$ and $ X|y \sim \mathcal{P}(y)$ then $X \sim \mathcal{N}eg(n,p)$  

- We can define our exponential laws more simply, as $\mathcal{E}(\lambda) \sim \mathcal{G}amma(1, \lambda)$
- For the Negative Binomial, we should define a new probability (using ProbDist) and interface it with scipy (https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.nbinom.html) 

## On the use of uniforms to make "choices"

Here, we rely on the "particles" package for the distributions. 
The following is the code that allows to draw a number from a uniform continuous distribution on [0,1]. 
Note that first we $define$ the law (as an object), then we can generate numbers from it.

In [3]:
Uniform = dists.Uniform(a = 0, b = 1)
# Generate one number from a (continuous) uniform(0,1)
x = Uniform.rvs(size = 1)
print(x)

# Equivalently
y = dists.Uniform(a = 0., b = 1.).rvs(size = 1)
print(y)

[0.6896325]
[0.14226246]


In [4]:
# Generate a bunch of uniforms 
vector_x = Uniform.rvs(size = 10000)
#plt.hist(vector_x, density = True, bins = 100)
#plt.show()

# Equivalently 
vector_y = dists.Uniform(a = 0., b = 1.).rvs(size = 10000)
#plt.hist(vector_y, density = True, bins = 100)
#plt.show()

In our case, uniforms have two different uses:
- We can create more complex distributions based on uniforms (see next section for more details)
- We can use them to make **choices**
    
The markov transition kernels for $u_{t}$ consists in a mixture of different possible **moves**. If, for example, $u_{t-1} = 0$, $u_t$ either stays at 0 (with probability $1 - \eta$) or follows an exponential distribution (with probability $\eta$). In our implementation, it is necessary, at each step, to choose between these options.

One way to do it is to generate a uniform $u \sim \mathcal{U}(0,1)$, then to check if $ u < \eta$. If so, $u_{t}$ should follow an exponential, if not, it should stay at 0.

This procedure can be extended to multiple moves. Say we have 3 options, with respective probabilities $\alpha$, $\beta$ and $\gamma$ such that $\alpha + \beta + \gamma = 1$. First we generate $u \sim \mathcal{U}(0,1)$. Then we check:
- $u < \alpha$ ? If so: option 1, if not, continue the loop.
- $u < \alpha + \beta$ ? If so:option 2, if not, continue the loop.
- Given that probabilities sum up to 1, the only possibility left is option 3. 


In [None]:
class expression(ssm.StateSpaceModel):
    
    """
    # Work in progress. In the end, we should add default values of the parameters here. 
    default_params = {"eta":, "alpha":, "beta":, "beta_0", "gamma_u":, "gamma_d":,
                     "zeta":, "lamba_u":, "lambda_d": ,
                     "alpha_s":, "kappa_s":}
    """
    
    # Initial distribution of the data (X0)
    def PX0(self): 
    
    
    # Transition kernel 
    def PX(self, t, xp): # Distribution of X_t given X_{t-1} = xp (p = past)
        # Here, X involves two things: u_t (0th index) and s_t (1th index)
        # Therefore, we have to define both 
        
        # For u_t 
        
        uni_x = dists.Uniform(a = 0, b = 1).rvs(size = 1) # Uniform to "choose a move"
        
        if xp[:,0] == 0 :
            if uni <= self.eta:
                # Dirac on xp = 0 
                u_t =  dists.Dirac(loc = 0)
            else:
                # Exponential law with rate zeta 
                u_t = dists.LinearD(dists.LogD(dists.Uniform(a = 0, b = 1)), a = - 1 / zeta)
            
        elif xp[:,0] !=0:
            if uni <= alpha:
                # Dirac on xp
                u_t = dists.Dirac(loc = xp[:, 0])
                
            elif uni <= alpha + beta:
                # Exponential law with rate zeta 
                u_t = dists.LinearD(dists.LogD(dists.Uniform(a = 0, b = 1)), a = - 1 / zeta)
            
            elif uni <= alpha + beta + beta_0:
                # Dirac on 0
                u_t = dists.Dirac(loc = 0)
            
            elif uni <= alpha + beta + beta_0 + gamma_u:
                #
                u_t =  
            
            else:
                #
                u_t = 
                
        #For s_t
        
        uni_s = dists.Uniform(a = 0, b = 1).rvs(size = 1) # Uniform to "choose" a move 
        if uni_s <= alpha_s:
            s_t = dists.Dirac(loc = xp[:,1])
        else:
            s_t = dists.Gamma(a = kappa_s, b = kappa_s)
        
        # Return the independant product of u_t and s_t
        return dists.IndeProd(u_t, s_t)

            
    # Emission model
    def PY(self, t, xp, x): # Distribution of Y_t given X_t = x (and possibly X_{t-1} =xp)
        
        