In [1]:
# import CartPole.py from local directory
import CartPole, sf3utility
import matplotlib.collections
import matplotlib.pyplot as plt
import numpy as np
import scipy.interpolate, scipy.stats.qmc
import random

plt.rcParams["font.family"] = "Georgia"
#plt.rcParams['figure.figsize'] = [9.0, 7.0]
#plt.rcParams['figure.dpi'] = 400

# store results for later
cache = {}

In [2]:
# allows nice plots that can be redrawn
%matplotlib notebook

# Task 2.1 - Nonlinear Modelling

## Generating Training Data

Now that I wish to improve the state change model with nonlinear basis functions, the first task is to generate training data. For task 1.3 I used uniformly distributed random datapoints, but this time I am using a sobol sequence to generate more evenly spaced data.

In [3]:
N = 512

np.random.seed(4)

random_positions  = np.random.rand( N ) * 20 - 10
random_velocities = np.random.rand( N ) * 20 - 10
random_angles     = np.random.rand( N ) * np.pi * 2 - np.pi
random_angvels    = np.random.rand( N ) * 30 - 15

# stack random values into 512 state vectors

X_random = initial_states = np.stack( [
    random_positions,
    random_velocities,
    random_angles,
    random_angvels
] ).T


# set the random seed and create the sobol sequence generator
sobol_engine = scipy.stats.qmc.Sobol( d=4, seed=4 )

# get 512 initial states spaced in the recommended ranges
X = X_sobol = initial_states = (sobol_engine.random_base2( m=int(np.log2(N)) ) - 0.5 ) * 2 * np.array( [ 10, 10, np.pi, 15 ] )

# generate the changes in state from these states
Y = np.array( [ CartPole.perform_action( state ) - state for state in initial_states ] )


M = 32
kernel_centres = X[:M]

## Sobol Sequence

In [4]:
fig, (ax1, ax2) = plt.subplots( 1, 2, num=1, sharey=True, figsize=(9,4.5) )
fig.subplots_adjust(wspace=0.05, bottom=0.15)

c = np.zeros( N )
for i, x in enumerate( X_random ):
    
    c[i] = np.linalg.norm( np.exp( -3*(X_random[:,0:2] - x[0:2])**2 ) )

ax1.scatter( X_random[:,0], X_random[:,1], s=2, c=c, cmap="cool"  )

ax1.set_title( "Randomly Selected Points" )


c = np.zeros( N )
for i, x in enumerate( X_sobol ):
    
    c[i] = np.linalg.norm( np.exp( -3*(X_sobol[:,0:2] - x[0:2])**2 ) )


ax2.scatter( X_sobol[:,0], X_sobol[:,1], s=2, c=c, cmap="cool"  )
ax2.scatter( kernel_centres[:,0], kernel_centres[:,1], s=50, color=[0,0,0,0], edgecolors="cyan", linewidths=2  )

ax2.set_title( "Sobol Sequence Points" )


fig.text( 0.06, 0.5, "Initial Cart Velocity", rotation="vertical", ha="center", va="center", fontsize=12 )
fig.text( 0.5, 0.05, "Initial Cart Position", ha="center", va="center", fontsize=12 )


<IPython.core.display.Javascript object>

Text(0.5, 0.05, 'Initial Cart Position')

Above are scatter plots of the datapoints generated by the two methods, projected onto the plane formed by the first two state vector dimensions. The randomly selected points can be bunched up in some areas, and absent from others. The sobol sequence provides much better coverage of the state space, meaning more useful information makes it into the model.

## Selection of Kernel Centres

The sobol sequence method has another advantage: the first few generated points are also evenly spaced over the state space so they work well as kernel centres (if two kernel centres were next to each other, they would explain the same part of the data so one would be enough). This works best when the number of kernel centres is a power of two; above I have circled the first 16 points.

## Creating More Training Data

In [128]:
N = 512 * 8

np.random.seed(4)

random_positions  = np.random.rand( N ) * 20 - 10
random_velocities = np.random.rand( N ) * 20 - 10
random_angles     = np.random.rand( N ) * np.pi * 2 - np.pi
random_angvels    = np.random.rand( N ) * 30 - 15

# stack random values into 512 state vectors

X_random = initial_states = np.stack( [
    random_positions,
    random_velocities,
    random_angles,
    random_angvels
] ).T


# set the random seed and create the sobol sequence generator
sobol_engine = scipy.stats.qmc.Sobol( d=4, seed=4 )

# get 512 initial states spaced in the recommended ranges
X = X_sobol = initial_states = (sobol_engine.random_base2( m=int(np.log2(N)) ) - 0.5 ) * 2 * np.array( [ 10, 10, np.pi, 15 ] )

# generate the changes in state from these states
Y = np.array( [ CartPole.perform_action( state ) - state for state in initial_states ] )


M = 32 * 8
kernel_centres = X[:M]

## Linear Least Squares Fit

In [129]:
Xplus = np.linalg.inv(X.T @ X) @ X.T
C = Xplus @ Y

First I will generate the linear least squares fit to the data; this explains the overall trend of the data and then our kernel functions can explain the nonlinear undulations. I decided this will give a better result than asking the kernel functions to explain the whole data as the functions they can represent are inherently "wobbly", essentially consisting of different sized bumps centered at certain locations.

## Residuals of the Linear Fit

In [130]:
XC = X @ C
R = Y - XC

In [131]:
# sweep over different initial pole angles and angvels and find the subsequent change in state

# number of steps to vary the intial conditions across their range
Nsteps = 30

# setup some intial conditions to loop over, varying the intial pole angle and angular velocity

initial_cart_positions  = np.array( [2] )
initial_cart_velocities = np.array( [4] )
initial_pole_angles     = np.linspace( -np.pi, np.pi, num=Nsteps )
initial_pole_angvels    = np.linspace( -15, 15, num=Nsteps )

# create array of initial state vectors

initial_states = np.array( np.meshgrid(
    initial_cart_positions, 
    initial_cart_velocities, 
    initial_pole_angles, 
    initial_pole_angvels 
)).T.squeeze()

# get 2d arrays of subsquent state vectors and residuals

state_changes = [ CartPole.perform_action( state ) - state for state in initial_states.reshape( (Nsteps**2,4) ) ]
state_changes = np.array( state_changes ).reshape( (Nsteps, Nsteps, 4) )

residuals = state_changes - ( initial_states.reshape( (Nsteps**2,4) ) @ C ).reshape( (Nsteps, Nsteps, 4) )

In [189]:
fig, (ax1, ax2) = plt.subplots(1, 2, num=2, sharey=True, figsize=(9.8,5))
fig.subplots_adjust(wspace=0.05, top=0.84, bottom=0.15, left=0.1)

ax1.imshow( state_changes[:,:,2], interpolation="bicubic", extent=(-np.pi, np.pi, -15, 15), aspect='auto', cmap="cool", origin='lower' )
contour = ax1.contour( initial_states[0,:,2], initial_states[:,0,3], state_changes[:,:,2], colors="white", linewidths=1 )
ax1.clabel( contour, contour.levels[1::2], inline=True, fontsize=12 )    
ax1.set_title( "Without Subtraction of Linear Trend" )
    
ax2.imshow( residuals[:,:,2], interpolation="bicubic", extent=(-np.pi, np.pi, -15, 15), aspect='auto', cmap="cool", origin='lower' )
contour = ax2.contour( initial_states[0,:,2], initial_states[:,0,3], residuals[:,:,2], colors="white", linewidths=1 )
ax2.clabel( contour, contour.levels[1::2], inline=True, fontsize=12 )    
ax2.set_title( "After Subtraction of Linear Trend" )
    
fig.text(0.5, 0.94, 'Change in Pole Angle as a Function of Initial State', ha='center', va='center', fontsize=16)
fig.text(0.5, 0.06, 'Initial Pole Angle', ha='center', va='center', fontsize=14)
fig.text(0.05, 0.5, 'Initial Pole Angular Velocity', ha='center', va='center', rotation='vertical', fontsize=14)


<IPython.core.display.Javascript object>

Text(0.05, 0.5, 'Initial Pole Angular Velocity')

The above plot shows the change in pole angle (one of the things the model must explain) as a function of initial pole angle and angular velocity. The plot on the left shows the true values; on the right are the residuals of the linear model. The bottom to top linear trend is subtracted off, so all the kernels have to explain are the long bump structures left behind.

## Define the Nonlinear Kernel

In [147]:

sigma = np.std( X, axis=0 ) * 2

def K( X, Xp, sigma = sigma ):
    
    # get squared differences and substitute angle one for periodic version
    d2 = ( (X - Xp) / sigma ) ** 2
    d2[:,2] = (np.sin( 0.5 * ( X[:,2] - Xp[:,2] ) ) / sigma[2] ) ** 2
    
    # divide rows by 2 sigma and return exponential of negative sum along rows
    return np.exp( - 0.5 * np.sum( d2, axis=1 ) )
    

## Construction of Knm Matrices

In [148]:
# loop over the kernel centres and evaluate the K function across all the Xs at each
Kmn = np.zeros( (M,N) )
for i, kernel_centre in enumerate( kernel_centres ):
    
    Kmn[i] = K( X, kernel_centre[np.newaxis] )
    
# same as above but only use first M vectors from X
Kmm = np.zeros( (M,M) )
for i, kernel_centre in enumerate( kernel_centres ):
    
    Kmm[i] = K( X[:M], kernel_centre[np.newaxis] )

## Finding the Kernel Coefficients

In [153]:
l = 1e-4

KmnKnm = Kmn @ Kmn.T
a = KmnKnm + l * Kmm
b = Kmn @ R

alpha_m = np.linalg.solve( a, b )

## Building the Model

In [154]:
def nonlinear_model( state ):
    
    kernels = K( state[np.newaxis], kernel_centres )
    
    weighted_sums = kernels @ alpha_m
    
    return weighted_sums.squeeze()

## Evaluating the Model

In [155]:
# sweep over different initial pole angles and angvels and find the subsequent change in state

# number of steps to vary the intial conditions across their range
Nsteps = 30

# setup some intial conditions to loop over, varying the intial pole angle and angular velocity

initial_cart_positions  = np.array( [2] )
initial_cart_velocities = np.array( [4] )
initial_pole_angles     = np.linspace( -np.pi, np.pi, num=Nsteps )
initial_pole_angvels    = np.linspace( -15, 15, num=Nsteps )

# create array of initial state vectors

initial_states = np.array( np.meshgrid(
    initial_cart_positions, 
    initial_cart_velocities, 
    initial_pole_angles, 
    initial_pole_angvels 
)).T.squeeze()

# get 2d arrays of subsquent state vectors and residuals

modelled_changes = [ nonlinear_model( state ) for state in initial_states.reshape( (Nsteps**2,4) ) ]
modelled_changes = np.array( modelled_changes ).reshape( (Nsteps, Nsteps, 4) )

In [162]:
fig, axs = plt.subplots(2, 4, num=4, figsize=(12,6), sharex=True, sharey=True )
fig.subplots_adjust(wspace=0.05, hspace=0.2, top=0.84, bottom=0.12, left=0.1, right=0.9)

titles = [["Cart Position - Actual", "Cart Velocity - Actual", "Pole Angle - Actual", "Pole Angular Velocity - Actual"],
          ["Cart Position - Predicted", "Cart Velocity - Predicted", "Pole Angle - Predicted", "Pole Angular Velocity - Predicted"]]

# plot nonlinear prediction contours

for row, axrow in enumerate(axs):
    
    changes = [ residuals, modelled_changes ][row]
    
    for col, ax in enumerate(axrow):

        ax.imshow( changes[:,:,col], interpolation="bicubic", extent=(-np.pi, np.pi, -15, 15), aspect='auto', cmap="cool", origin='lower' )
        contour = ax.contour( initial_states[0,:,2], initial_states[:,0,3], changes[:,:,col], colors="white", linewidths=1 )
        ax.clabel( contour, contour.levels[1::2], inline=True, fontsize=12 )
        
        ax.set_title( titles[row][col] )
    
fig.text(0.5, 0.94, 'Residuals of State Changes vs. Nonlinear Prediction Thereof', ha='center', va='center', fontsize=16)
fig.text(0.5, 0.05, 'Initial Pole Angle', ha='center', va='center', fontsize=14)
fig.text(0.06, 0.5, 'Initial Pole Angular Velocity', ha='center', va='center', rotation='vertical', fontsize=14)


<IPython.core.display.Javascript object>

Text(0.06, 0.5, 'Initial Pole Angular Velocity')

Above are the contour plots of the nonlinear predictions of the residuals from the linear model, across varying intial pole angle and angular velocity. The nonlinear structure has been captured by the model quite well, especially in the change in pole position. However, the nonlinear model suffers from poor prediction accuracy near the edges of the sampled state space; the contours here differ in shape and magnitude. I think this is due to lower data density in these regions combined with larger variations in shape. This problem could perhaps be alleviated by collecting data further outside the region we would like to make predictions in, pushing errors further away from where we are using the model for prediction. Another method could be to collect more datapoints in regions with larger errors, or assign different "weights" to different data points, making them more or less important for the model to explain well.

## Plotting Predicted Residuals Against Target Residuals

In [184]:
fig, ((ax1, ax2),(ax3,ax4)) = plt.subplots(2, 2, num=19, figsize=(9,9))
fig.subplots_adjust(wspace=0.16, hspace=0.16, top=0.92, bottom=0.08, left=0.08, right=0.96)

titles = ["Cart Position", "Cart Velocity", "Pole Angle", "Pole Angular Velocity"]

predictions = np.array( [ state @ C + nonlinear_model(state) for state in X ] )

for i, ax in enumerate( [ax1, ax2, ax3, ax4] ):
    
    x, y = predictions[:,i], Y[:,i]
    c = np.abs(x - y)
    
    extent = np.max( ( np.concatenate([x, y]) ) ) * 1.2
    
    ax.scatter( y, x, s=1, c=c, cmap="cool" )
    ax.set_xlim(-extent, extent)
    ax.set_ylim(-extent, extent)
    
    ax.plot( [-extent, extent], [-extent, extent], color="black", linestyle="dotted" )
    
    ax.set_title( titles[i] )
    
fig.text(0.5, 0.97, 'Predicted State Changes vs. Target State Changes', ha='center', va='center', fontsize=16)
fig.text(0.52, 0.03, 'Target State Change', ha='center', va='center', fontsize=14)
fig.text(0.03, 0.5, 'Predicted State Change', ha='center', va='center', rotation='vertical', fontsize=14)
    

<IPython.core.display.Javascript object>

Text(0.03, 0.5, 'Predicted State Change')

Above I have plotted the target state change against the predictions from the linear plus nonlinear model. The points are all lying much closer to the ideal line y=x, so we can expect this model to preform much better than the linear model from task 1.3.

In [230]:
fig, axs = plt.subplots(1, 6, num=9, figsize=(12,5), sharey=True)
fig.subplots_adjust(wspace=0.05, top=0.86, bottom=0.12, left=0.06, right=0.99)

for p, ax in enumerate( axs ):

    M = int( 5*p**1.5 )
    kernel_centres = X[:M]

    Xplus = np.linalg.inv(X.T @ X) @ X.T
    C = Xplus @ Y

    XC = X @ C
    R = Y - XC

    # loop over the kernel centres and evaluate the K function across all the Xs at each
    Kmn = np.zeros( (M,N) )
    for i, kernel_centre in enumerate( kernel_centres ):

        Kmn[i] = K( X, kernel_centre[np.newaxis] )

    # same as above but only use first M vectors from X
    Kmm = np.zeros( (M,M) )
    for i, kernel_centre in enumerate( kernel_centres ):

        Kmm[i] = K( X[:M], kernel_centre[np.newaxis] )


    l = 1e-4

    KmnKnm = Kmn @ Kmn.T
    a = KmnKnm + l * Kmm
    b = Kmn @ R

    alpha_m = np.linalg.solve( a, b )

    Npoints = 1024
    
    predictions = np.array( [ state @ C + nonlinear_model(state) for state in X[:Npoints] ] )

    x, y = predictions[:,3], Y[:Npoints,3]
    c = np.abs(x - y)

    extent = np.max( ( np.concatenate([x, y]) ) ) * 1.2

    ax.scatter( y, x, s=1, c=c, cmap="cool" )
    ax.set_xlim(-extent, extent)
    ax.set_ylim(-extent, extent)

    ax.plot( [-extent, extent], [-extent, extent], color="black", linestyle="dotted" )

    ax.set_title( f'{M} Kernels' )

fig.text(0.5, 0.96, 'Convergence of Model with Addition of Basis Functions - Predictions of Angular Velocity Change', ha='center', va='center', fontsize=16)
fig.text(0.52, 0.03, 'Target State Change', ha='center', va='center', fontsize=14)
fig.text(0.025, 0.5, 'Predicted State Change', ha='center', va='center', rotation='vertical', fontsize=14)



<IPython.core.display.Javascript object>

Text(0.025, 0.5, 'Predicted State Change')