In [None]:
import random
import numpy as np
import matplotlib.pyplot as plt
import sobol_seq
from scipy.optimize import minimize,Bounds
from CartPole import CartPole

# Task 3.2, Non-Linear Model Linear Policy

In [None]:
def K_mat(dataA,dataB,sig_invert):
    A = np.shape(dataA)[0]
    B = np.shape(dataB)[0]
    K = np.zeros(shape=(A,B))
    for i in range(A):
        for j in range(B):
            K[i][j] =kernel_gauss(dataA[i],dataB[j],sig_invert)
    return K

def kernel_func(x0,x1,sig):
    delta_x = x0-x1
    delta_x_2 = np.zeros([4,1])
    for i in range(4):
        delta_x_2[i]=(delta_x[i])**2
    delta_x_2[2]=np.sin(0.5*(delta_x[2]))**2
    expo = (-0.5)*float(np.dot(sig, delta_x_2))
    kernel = np.e**expo
    return kernel
#range of input for creating data
def r_range(i, minimum,maximum):
    d = i*(maximum-minimum)
    output = np.full(np.shape(i),minimum)+d
    return output
#generate datapoints using sobol_seq
def generate_x(n):
    x_data = sobol_seq.i4_sobol_generate(4,n)
    d_0=r_range(x_data[:,0],-50,50)
    d_1=r_range(x_data[:,1],-10,10)
    d_2=r_range(x_data[:,2],-(np.pi),np.pi)
    d_3=r_range(x_data[:,3],-15,15)
    output = np.column_stack((d_0,d_1,d_2,d_3))
    return output


CartpoleState = CartPole()
N =500
M = 200
one_over_sigma =[2.6366e-16,1.62674e-06, 2.63, 1.3831e-02, 2.4694e-04]
X = generate_x(N)
X_M = generate_x(M)
lmd = 1e-6

Y = np.zeros(shape=(N,4))
for i in range(N):
    noise = 0.05 * np.random.normal(0, 1, (4,))
    CartpoleState.setState(X[i,:4]+noise)
    CartpoleState.performAction(X[i,-1])
    Y[i] = CartpoleState.getState() - (X[i,:4] + noise)

KNN = K_mat(X,X, one_over_sigma)
KMN = K_mat(X_M, X, one_over_sigma)
KNM = K_mat(X, X_M, one_over_sigma)
KMM = K_mat(X_M, X_M, one_over_sigma)
c = KMN @ KNM + lmd * KMM
d = np.zeros(shape=(4,M))
alphas = np.zeros(shape=(4,M))
for i in range(4):
    c[i] = KMN @ Y[:,i]
    alphas[i] = np.linalg.lstsq(c,d[i], rcond=-1)[0]



def model_pred_control(p):
    CartpoleState.setState([0, 0, 0.2, 0])
    pred = np.zeros([15,15])
    l = 0
    
    for i in range(15):
        noise = 0.05 * np.random.normal(0, 1, (4,))
        l += CartpoleState.loss()
        f = np.dot(p, CartpoleState.getState())
        pred[i, :4] = CartpoleState.getState()
        pred[i, -1] = f
        K1m_pred = np.array([kernel_func(pred[i], s,one_over_sigma) for s in X_M])
        c = K1m_pred @ alphas.transpose()
        new_pred = c + pred[i,:4] + noise
        CartpoleState.setState(new_pred)
        CartpoleState.remap_angle()
    return l


In [None]:

def plot_evolution_policy(n, p):
    init_state_array = np.array([[0, 0, 0.2, 0],[0, 0, 0.2, 0], [0, 0, 0.1, 0], [0, 0, 0.15, 0.1]])

    x0 = np.zeros([4,n])
    x1 = np.zeros([4,n])
    x2 = np.zeros([4,n])
    x3 = np.zeros([4,n])
    x4 = np.zeros([4,n])

    x_0 = np.zeros([4,n])
    x_1 = np.zeros([4,n])
    x_2 = np.zeros([4,n])
    x_3 = np.zeros([4,n])
    x_4 = np.zeros([4,n])

    ind = 0
    for init in init_state_array:
        CartpoleState.setState(init)
        l = 0
        for i in range(n):
            noise = 0.05 * np.random.normal(0, 1, (4,))
            cur_state = CartpoleState.getState()
            x0[ind][i], x1[ind][i], x2[ind][i], x3[ind][i] = cur_state[0], cur_state[1], cur_state[2], cur_state[3]
            l += CartpoleState.loss()
            f = np.dot(p, CartpoleState.getState())
            x4[ind][i] = f
            K1m_pred = np.array([kernel_gauss(np.array([x0[ind][i], x1[ind][i], x2[ind][i], x3[ind][i],x4[ind][i]]), sset, one_over_sigma) for sset in Xs])
            delta_change = K1m_pred @ alphas.transpose()
            new_pred = delta_change + cur_state +noise
            CartpoleState.setState(new_pred)
            CartpoleState.remap_angle()
            # TrainingCartPole.remap_angle()
        ind += 1
    X_noise = np.array([x0, x1, x2, x3, x4])

    indx = 0 
    for init in init_state_array:
        CartpoleState.setState(init)
        l = 0
        for i in range(n):
            cur_state = CartpoleState.getState()
            x_0[indx][i], x_1[indx][i], x_2[indx][i], x_3[indx][i] = cur_state[0], cur_state[1], cur_state[2], cur_state[3]
            l += CartpoleState.loss()
            f = np.dot(p, CartpoleState.getState())
            x_4[indx][i] = f
            K1m_pred2 = np.array([kernel_gauss(np.array([x_0[indx][i], x_1[indx][i], x_2[indx][i], x_3[indx][i],x_4[indx][i]]), sset, one_over_sigma) for sset in Xs])
            delta_change2 = K1m_pred2 @ alphas.transpose()
            new_pred = delta_change2 + cur_state
            CartpoleState.setState(new_pred)
            CartpoleState.remap_angle()
        indx +=1
    X = np.array([x_0, x_1, x_2, x_3, x_4])


    fig1, axs1 = plt.subplots(4,figsize=(10,5))
    fig1.suptitle('Time Evolution of Variables with Noise in Actual Dynamics')

    time = np.arange(0, 0.2*n, 0.2)
    states = ["location", "velocity", "pole angle", "pole velocity"]

    index = 0
    for i in range(4):
        #axs1[i].plot(time, X_noise[index][0],'--', color='royalblue',label = 'Noise [0, 0, 0.2, 0]')
        axs1[i].plot(time, X_noise[index][1],'--',color = 'royalblue', label = 'Noise [0, 0, 0.2, 0]')
        axs1[i].plot(time, X_noise[index][2],'--', color='black' ,label = 'Noise  [0, 0, 0.1, 0]')
        #axs1[i].plot(time, X_noise[index][3],'--',color = 'black', label = f"X = {init_state_array[3]}")

        #axs1[i].plot(time, X[index][0], color='royalblue',label = '[0, 0, 0.2, 0]')
        axs1[i].plot(time, X[index][1],color = 'royalblue', label = '[0, 0, 0.2, 0')
        axs1[i].plot(time, X[index][2], color='black' ,label = '[0, 0, 0.1, 0]')
        #axs1[i].plot(time, X[index][3],color = 'black', label = f"X = {init_state_array[3]}")

        axs1[i].set(xlabel="Time")
        axs1[i].set(ylabel=states[index])
        index += 1
    axs1[0].legend(bbox_to_anchor=(1.1, 1.3), loc='upper right')
    plt.show()

In [None]:

plot_evolution_policy(30,[2,3,78.1,11.1])
plot_evolution_policy(30,[64.81 , 8.21 ,  1.19 ,17.2])
plot_evolution_policy(30,[23.785527, 19.0731,  1.095621 ,26.9653])
#[2,3,78,11]
#[24.1498626  18.42980936  1.10772824 26.2491273 ]

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=180073fb-9c10-447b-b721-e7b1d25fe36a' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>