In [41]:
#This code implements the binning algorithm that can be used to estimate pmfs from raw data
#We first give the general code, which estimates any f(Z|Y), and then provide a tool to facilitate its use when estimating plant pmfs
#We only give the minimal interface and a few simple examples, please refer to the ReadMe file for more explainations on how to use the code

#Code author: Émiland Garrabé

In [None]:
import numpy as np

In [42]:
def discretize(X, Xdim, Xmin, Xstep):
    ###
    #Discretize a variable X using its dimension Xdim, its minimal values along each axis and the discretization steps
    res = [0]*Xdim #n-dimensional index
    for i in range(Xdim): #For each dimension
        elt = X[i] #Extract the i-th element
        ind = int((elt - Xmin[i])//Xstep[i]) #Discretize
        res[i] = ind
    return(tuple(res)) #Return as tuple for array indexing

In [43]:
def getJointPMFs(data, Xdim, Xmin, Xstep, Xdiscr, Ydim, Ymin, Ystep, Ydiscr):
    ###
    #Get the joint PMFs for Y and X,Y using all the info on the variables (dim, mins, steps)
    fullDiscr = Xdiscr + Ydiscr #List of the discretization steps (along each dimension) for both X and Y
    fullJoint = np.zeros(fullDiscr) #f(X,Y)
    Yjoint = np.zeros(Ydiscr) #f(Y)
    for history in data: #For each trajectory in the dataset
        Xhist = history[0]
        Yhist = history[1]
        for i in range(len(Xhist)): #For each data point in the trajectory
            X = Xhist[i]
            Y = Yhist[i]
            Xind = discretize(X, Xdim, Xmin, Xstep) #Find the indexes
            Yind = discretize(Y, Ydim, Xmin, Ystep)
            fullInd = Xind + Yind #Get the index of the joint variable X,Y
            fullJoint[fullInd] = fullJoint[fullInd] + 1 #Update the values
            Yjoint[Yind] = Yjoint[Yind] + 1
    fullJoint = fullJoint/np.sum(fullJoint) #Normalizing
    Yjoint = Yjoint/np.sum(Yjoint)
    return(fullJoint, Yjoint)

In [44]:
def getConditionnal(fullJoint, Yjoint, Xdiscr, Ydiscr, Xdim):
    ###
    #Get f(X|Y) using the joint pmfs
    fullDiscr = Xdiscr + Ydiscr
    conditionnal = np.zeros(fullDiscr) #Initialize the pmf
    for (index, x) in np.ndenumerate(fullJoint): #For each index and each value in f(X,Y) (we use this as it's robust w.r.t. the dimension)
        Yind = index[Xdim:] #Extract the index for Y
        if Yjoint[Yind] == 0: #Protect from dividing by zero
            conditionnal[index] = 0
        else:
            conditionnal[index] = fullJoint[index]/Yjoint[Yind] #Division
    return(conditionnal)

In [45]:
###Use case example for a 2D Z and a 1D Y
#Dimensions of the variables
Xdim = 2
Ydim = 1

#Minimum values (as lists)
Xmin = [0,0]
Ymin = [0]

#Discretization steps (as lists)
Xstep = [0.5,1]
Ystep = [0.5]

#Amount of discrete bins (as lists)
Xdiscr = [10,5]
Ydiscr = [10]

In [46]:
#Example dataset with only one (small) trajectory
Xhist = [[1,2],[1,2],[1,2],[1,2],[2,1],[2,1]]
Yhist = [[1],[1],[2],[2],[1],[1]]

history = [Xhist, Yhist]
data = [history]

In [47]:
(full, Y) = getJointPMFs(data, Xdim, Xmin, Xstep, Xdiscr, Ydim, Ymin, Ystep, Ydiscr)

In [48]:
cond = getConditionnal(full, Y, Xdiscr, Ydiscr, Xdim)

In [50]:
#Resulting conditionnal pmf
#Notice how a sparse dataset impacts the numerical pmf
cond

array([[[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
        [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
        [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
        [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
        [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ]],

       [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
        [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
        [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
        [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
        [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ]],

       [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
        [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
        [0. , 0. , 0.5, 0. , 1. , 0. , 0. , 0. , 0. , 0. ],
        [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
        [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ]],

       [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
        [0. , 0. , 0. , 0. , 0. , 

In [51]:
cond[(4,1,2)]

0.5

In [52]:
def formatHistory(sysData):
    ###
    #Translate a history in (x,u) form to (X,Y) form (this means formatting Y_{k-1}:=(x_{k-1},u_k))
    data = [] #Initialize resulting data
    for hist in sysData: #For each (x,u) trajectory
        x = hist[0] #States
        u = hist[1] #Inputs
        L = len(x)-1 #Amount of data points (one less than the full length of the history)
        newX = [0]*L #X
        newY = [0]*L #Y
        for i in range(L): #For each point
            newX[i] = x[i+1] #Simply X_k = x_k
            #The following lines are a safety: if we have 1D states or inputs, adding them will not concatenate the list but create a false value
            if isinstance(x[i], int): #If x_{k-1} is a 1D int, we turn it into a list
                newY[i] = [x[i]]
            else:
                newY[i] = x[i] #Else we use it as is
            if isinstance(u[i+1], int): #Same for u_k
                newY[i] = newY[i] + [u[i+1]]
            else:
                newY[i] = newY[i] + u[i+1]
        data = data + [newX, newY] #We add the trajectory to the data
    return(np.array(data)) #Returning a numpy array

In [53]:
#Small use case example
x = [1,2,3,4,5]
u = [5,4,3,2,1]

H = [x,u]
data = [H]

formatHistory(data)

array([[2, 3, 4, 5],
       [list([1, 4]), list([2, 3]), list([3, 2]), list([4, 1])]],
      dtype=object)