In [None]:
import numpy as np

In [None]:
class Pendulum():
    def __init__(self):
        ###
        #Default parameter initialization
        self.l = 0.6
        self.m = 1
        self.max_torque = 10
        self.max_speed = 5
        self.state = np.array([0,0])
        self.last_input = None
        self.g = 9.81
        self.dt = 0.1
        self.angle_std = 3*2*np.pi/360 #5*2*np.pi/360
        self.speed_std = 0.1
        
    def random_init(self):
        ###
        #Randomize the state, useful for binning
        self.state = np.array([np.random.uniform(-np.pi, np.pi),np.random.uniform(-self.max_speed, self.max_speed)])
        
    def step(self, u):
        ###
        #Simulation step given an output
        u = np.clip(u, -self.max_torque, self.max_torque) #Clip the input
        self.last_input = u
        accel = u/(self.m*self.l*self.l) + self.g*np.sin(self.state[0])/self.l #Dynamics
        
        speed = self.state[1]
        angle = self.state[0]
        
        new_speed = speed + accel*self.dt + np.random.normal(0,self.speed_std) #Update speed
        new_speed = np.clip(new_speed, -self.max_speed, self.max_speed)
        
        new_angle = angle + speed*self.dt + np.random.normal(0,self.angle_std) #Update angle
        if new_angle < -np.pi: #The angle is modulo 2\pi
            new_angle = new_angle + 2*np.pi
        if new_angle > np.pi:
            new_angle = new_angle - 2*np.pi
        
        self.state = np.array([new_angle, new_speed])
        
    def set_state(self, angle, speed):
        ###
        #Set the state, useful for some cases
        speed = np.clip(speed, -self.max_speed, self.max_speed)
        if angle < -np.pi:
            angle = angle + 2*np.pi
        if angle > np.pi:
            angle = angle - 2*np.pi
        self.state = np.array([angle, speed])


In [None]:
def discretize(Z, Zdim, Zmin, Zstep):
    ###
    #Discretize a variable Z using its dimension Zdim, its minimal values along each axis and the discretization steps
    res = [0]*Zdim #n-dimensional index
    for i in range(Zdim): #For each dimension
        elt = Z[i] #Extract the i-th element
        ind = int((elt - Zmin[i])//Zstep[i]) #Discretize
        res[i] = ind
    return(tuple(res)) #Return as tuple for array indexing

In [None]:
def getJointPMFs(data, Zdim, Zmin, Zstep, Zdiscr, Ydim, Ymin, Ystep, Ydiscr):
    ###
    #Get the joint PMFs for Y and Z,Y using all the info on the variables (dim, mins, steps)
    fullDiscr = Ydiscr + Zdiscr #List of the discretization steps (along each dimension) for both Z and Y
    fullJoint = np.zeros(fullDiscr) #f(Z,Y)
    Yjoint = np.zeros(Ydiscr) #f(Y)
    for history in data: #For each trajectory in the dataset
        Zhist = history[0] #Extract the histories for Z and Y
        Yhist = history[1]
        for i in range(len(Zhist)): #For each data point in the trajectory
            Z = Zhist[i] #Extract the realization of Z and Y
            Y = Yhist[i]
            Zind = discretize(Z, Zdim, Zmin, Zstep) #Find the indexes
            Yind = discretize(Y, Ydim, Ymin, Ystep)
            fullInd = Yind + Zind #Get the index of the joint variable Z,Y
            fullJoint[fullInd] = fullJoint[fullInd] + 1 #Update the values
            Yjoint[Yind] = Yjoint[Yind] + 1
    fullJoint = fullJoint/np.sum(fullJoint) #Normalizing
    Yjoint = Yjoint/np.sum(Yjoint)
    return(fullJoint, Yjoint)

In [None]:
def getConditionnal(fullJoint, Yjoint, Zdiscr, Ydiscr, Zdim):
    ###
    #Get f(Z|Y) using the joint pmfs
    fullDiscr = Ydiscr + Zdiscr
    conditionnal = np.zeros(fullDiscr) #Initialize the pmf
    for (index, x) in np.ndenumerate(fullJoint): #For each index and each value in f(Z,Y) (we use this as it's robust w.r.t. the dimension)
        Yind = index[:Ydim] #Extract the index for Y
        if Yjoint[Yind] == 0: #Protect from dividing by zero
            conditionnal[index] = 0
        else:
            conditionnal[index] = fullJoint[index]/Yjoint[Yind] #Division
    return(conditionnal)

In [None]:
def formatHistory(sysData):
    ###
    #Translate a history in (x,u) form to (Z,Y) form (this means formatting Y_{k-1}:=(x_{k-1},u_k))
    data = [] #Initialize resulting data
    for hist in sysData: #For each (x,u) trajectory
        x = hist[0] #States
        u = hist[1] #Inputs
        L = len(x)-1 #Amount of data points (one less than the full length of the history)
        newZ = [0]*L #Z
        newY = [0]*L #Y
        for i in range(L): #For each point
            #The following lines are a safety: if we have 1D states or inputs, adding them will not concatenate the list but create a false value
            if isinstance(x[i+1], int) or isinstance(x[i+1],float):
                newZ[i] = [x[i+1]] #If x_k is a number, we turn it into a list to fit the binning algorithm's requirements
            else:
                newZ[i] = x[i+1] #Else we use it as is (which simply means leaving it as a list)
            if isinstance(x[i], int) or isinstance(x[i],float): #Same for x_{k-1}
                newY[i] = [x[i]]
            else:
                newY[i] = x[i]
            if isinstance(u[i+1], int) or isinstance(u[i+1],float): #Same for u_k
                newY[i] = newY[i] + [u[i+1]]
            else:
                newY[i] = newY[i] + u[i+1]
        data = data + [[newZ, newY]] #We add the trajectory to the data
    return(np.array(data)) #Returning a numpy array

In [None]:
#Dimensions of the variables
Zdim = 2
Ydim = 3

#Minimum values (as lists)
Zmin = [-np.pi, -5]
Ymin = [-np.pi, -5, -2.5]

#Discretization steps (as lists)
Zstep = [2*np.pi/50, 0.2]
Ystep = [2*np.pi/50, 0.2, 0.25]

#Amount of discrete bins (as lists)
Zdiscr = [50, 50]
Ydiscr = [50, 50, 20]

In [None]:
P = Pendulum()
#P.m = 0.5

sysData = [0]*100000 #We will take 10000 trajectories
for j in range(100000):
    x = [0]*100 #Each trajectory is 100 steps long, with a random initial state
    u = [0]*100
    P.random_init() #Initialize pendulum
    x[0] = list(P.state) #Random state
    for i in range(99): #There is a 'useless' data point as the nature of a dynamic system implies we have one less input than state.
        uk = np.random.uniform(-2.5,2.5) #Pick a random action
        P.step(uk) #simulatin step
        x[i+1] = list(P.state)
        u[i+1] = uk
    h = [x,u] #A history, or trajectory, is made out of a history for X and Y.
    sysData[j] = h #The dataset is a list of all trajectories
dt = formatHistory(sysData) #Give the data a form that the binning code can use

(full, Y) = getJointPMFs(dt, Zdim, Zmin, Zstep, Zdiscr, Ydim, Ymin, Ystep, Ydiscr) #Get the joint pmfs with the parameters
cond = getConditionnal(full, Y, Zdiscr, Ydiscr, Zdim) #Get the conditional pmf

In [None]:
np.save('plant_l06_m1.npy',cond)
#np.save('plant_l06_m05.npy',cond)

In [None]:
#To bin the plant for the m=0.5kg pendulum, simply rerun the previous two cells after uncommenting
#the second line of each