In [1]:
from CartPole import CartPole, remap_angle
import numpy as np
import matplotlib
import pandas as pd
import seaborn as sns
matplotlib.use('TkAgg') 

import matplotlib.pyplot as plt

***Task 1.1***

We are assuming the system is a markov state. This can be shown in this case from the equations of motion. 

We are also assuming that the system is time invariant. The system dynamics can be started at any time instant and will still behave the right way

Maybe use a heatmap to show the initial conditions that lead to flip over

In [2]:
def rollout(initial_state, initial_force, num_steps, visual=True):
    """
    Simulate the CartPole environment for a given number of steps.
    
    Args:
        initial_state (tuple): The initial state of the environment.
        it should be a tuple of the form (cart_location, cart_velocity, 
                                        pole_angle, pole_velocity).

        initial_force (float): The initial force applied to the cart.
        num_steps (int): The number of steps to simulate.
    
    Returns:
        data: A dictionary containing the cart location, cart velocity, 
              pole angle and pole angular velocity at each step.
    """
    env = CartPole(visual=visual)
    env.reset()

    data = {'cart_location': [],
            'cart_velocity': [],
            'pole_angle': [],
            'pole_velocity': []
        }
    
    # Set the initial state
    env.setState(initial_state)

    # Perform the action for the specified number of steps
    for step in range(num_steps + 1):
        # Store the current state
        data['cart_location'].append(env.cart_location)
        data['cart_velocity'].append(env.cart_velocity)
        data['pole_angle'].append(env.pole_angle)
        data['pole_velocity'].append(env.pole_velocity)

        # Perform the action
        env.performAction(initial_force)

        # remap the angle to be between -pi and pi
        env.remap_angle()
    
    # close the plot
    if visual:
        env.close_plot()
        plt.close()
        
    return data

    

In [21]:
# Plot functions

# plot y vs x
def plot_x_y(x, y, xlabel, ylabel):
    plt.figure()
    ax = plt.gca()
    plt.xlim(min(x) - 0.1, max(x) + 0.1)
    plt.ylim(min(y) - 0.1, max(y) + 0.1)
    ax.plot(x, y, 'r-')
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(f'{ylabel} vs {xlabel}')
    plt.grid()
    plt.show()

# plot y vs time
def plot_y(y, ylabel):
    plt.figure()
    ax = plt.gca()
    plt.xlim(0, len(y) + 2)
    plt.ylim(min(y) - 0.1, max(y) + 0.1)
    ax.plot(np.arange(0, len(y)),y, 'r-')
    plt.ylabel(ylabel)
    plt.title(f'{ylabel} vs Iterations')
    plt.xlabel('Iterations')
    plt.grid()
    plt.show()


# plot multiple y vs time
def plot_data_time(data, verdict):
    fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 5))
    titles = ['cart_location', 'cart_velocity', 'pole_angle', 'pole_velocity']
    for i in range(2):
        for j in range(2):
            title = titles[i * 2 + j]
            ax[i, j].plot(np.arange(0, len(data[title])), data[title], 'r-')
            ax[i, j].set_title(title + ' vs Iterations')
            ax[i, j].set_xlabel('Iterations')
            ax[i, j].set_ylabel(title)
            ax[i, j].grid()
    # center the title on top of the figure
    fig.suptitle(f"Initial Carriage velocity: {data['cart_velocity'][0]:.2f} m/s \nPole angular velocity: {data['pole_velocity'][0]:.2f} rad/s \n{verdict}")
    fig.tight_layout(rect=[0, 0.03, 1, 1])  # Adjust the rect to make space for the title
    plt.show()

# plot multiple y vs x
def plot_state(x_data, y_data):
    fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 5))
    titles = ['cart_location', 'cart_velocity', 'pole_angle', 'pole_velocity']
    for i in range(2):
        for j in range(2):
            title = titles[i * 2 + j]
            ax[i, j].plot(x_data[title], y_data[title], 'r-')
            ax[i, j].set_title("change in " + title + ' vs ' + title)
            ax[i, j].set_xlabel(title)
            ax[i, j].set_ylabel(title)
            ax[i, j].set_xlim(min(-1, min(x_data[title]) - 0.1), max(1, max(x_data[title]) + 0.1))
            ax[i, j].set_ylim(min(-1, min(y_data[title]) - 0.1), max(1, max(y_data[title]) + 0.1))
            ax[i, j].grid()
    # center the title on top of the figure
    fig.suptitle("Change in state vs State")
    fig.tight_layout(rect=[0, 0.05, 0.95, 1])  # Adjust the rect to make space for the title
    plt.show()

# plot multiple y vs x
def plot_pair_plot(data, verdict):
    fig, ax = plt.subplots(nrows=3, ncols=2, figsize=(15, 12))
    titles = ['cart_location', 'cart_velocity', 'pole_angle', 'pole_velocity']
    for i in range(3):
        title_x = titles[i]
        for j in range(2):
            if i + j + 1 >= len(titles):
                title_y = titles[0]
                title_x = titles[-1]
            else:
                title_y = titles[i + j + 1]
            ax[i, j].plot(data[title_x], data[title_y], 'r-')
            ax[i, j].set_title(title_y + ' vs ' + title_x)
            ax[i, j].set_xlabel(title_x)
            ax[i, j].set_ylabel(title_y)
            ax[i, j].grid()
    # center the title on top of the figure
    fig.suptitle(f"Initial Carriage velocity: {data['cart_velocity'][0]:.2f} m/s \nPole angular velocity: {data['pole_velocity'][0]:.2f} rad/s \n{verdict}")
    fig.tight_layout(rect=[0, 0.05, 0.95, 1])  # Adjust the rect to make space for the title
    plt.show()

# plot multiple y vs x
def plot_phase_plot(data, verdict):
    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 4))
    titles = ['cart_location', 'cart_velocity', 'pole_angle', 'pole_velocity']
    for j in range(2):
        title_x = titles[j * 2]
        title_y = titles[j * 2 + 1]
        ax[j].plot(data[title_x], data[title_y], 'r-')
        ax[j].set_title(title_y + ' vs ' + title_x)
        ax[j].set_xlabel(title_x)
        ax[j].set_ylabel(title_y)
        ax[j].grid()
    # center the title on top of the figure
    fig.suptitle(f"Initial Carriage velocity: {data['cart_velocity'][0]:.2f} m/s \nPole angular velocity: {data['pole_velocity'][0]:.2f} rad/s \n{verdict}")
    fig.tight_layout(rect=[0, 0.05, 0.95, 1])  # Adjust the rect to make space for the title
    plt.show()

In [None]:
# different experiments
# Experiment 1: Rollout with initial state (0, 0, pi, 5) and initial force 0
data = rollout(initial_state=(0, 0, np.pi, 5), initial_force=0, num_steps=100)
plot_data_time(data, "Simple osscillation of pole")
plot_phase_plot(data, "Simple osscillation of pole")

In [None]:
# Experiment 2: Rollout with initial state (0, 0, pi, 14) and initial force 0
data = rollout(initial_state=(0, 0, np.pi, 13.85), initial_force=0, num_steps=100)
plot_data_time(data, "Complete rotation")
plot_phase_plot(data, "Complete rotation")

In [16]:
# Experiment 3: Rollout with initial state (0, 1, pi, 8) and initial force 0
data = rollout(initial_state=(0, 1, np.pi, 8), initial_force=0, num_steps=100)
plot_data_time(data, "Oscillation of pole and cart")
plot_phase_plot(data, "Oscillation of pole and cart")

In [17]:
# Experiment 4: Rollout with initial state (0, 10, pi, 14) and initial force 0
data = rollout(initial_state=(0, 10, np.pi, 14), initial_force=0, num_steps=100)
plot_data_time(data, "complete rotation")
plot_phase_plot(data, "complete rotation")

In [None]:

def count_revolution(initial_state, initial_force, num_steps):
    """
    Simulate the CartPole environment for a given number of steps.
    
    Args:
        initial_state (tuple): The initial state of the environment.
        it should be a tuple of the form (cart_location, cart_velocity, 
                                        pole_angle, pole_velocity).

        initial_force (float): The initial force applied to the cart.
        num_steps (int): The number of steps to simulate.
    
    Returns:
        revolution: The number of revolutions made by the pole.
    """
    env = CartPole(visual=False)
    env.reset()
    
    # Set the initial state
    env.setState(initial_state)

    num_revolutions = 0
    new_revolution = True

    # Perform the action for the specified number of steps
    for step in range(num_steps + 1):

        # Perform the action
        env.performAction(initial_force)

        # remap the angle to be between -pi and pi
        if abs(env.pole_angle) < 0.3:
            if new_revolution:
                # count the number of revolutions
                num_revolutions += 1

            new_revolution = False
        else:
            new_revolution = True

        env.remap_angle()
    
    if num_revolutions > 1:
        num_revolutions -= 1

    return num_revolutions

def generate_heatmap(num_steps, initial_force = 0):
    cart_velocity = np.linspace(-10, 10, num_steps)
    pole_velocity= np.linspace(-15, 15, num_steps)

    initial_state = [0, 0, np.pi, 0]

    revolution = [[0 for i in range(num_steps)] for j in range(num_steps)]

    for i, cv in enumerate(cart_velocity):
        for j, pv in enumerate(pole_velocity):
            initial_state[1] = cv
            initial_state[3] = pv

            num_revolutions = count_revolution(initial_state, initial_force, num_steps)

            revolution[i][j] = num_revolutions
    
    revolution = np.array(revolution)
    df = pd.DataFrame(revolution, index = cart_velocity, columns=pole_velocity)
    fig, ax = plt.subplots(figsize=(12, 8))
    sns.heatmap(df, annot=False, fmt=".1f", cmap="coolwarm")

    # Set axis labels
    ax.set_xlabel("initial pole velocity", fontsize=14)
    ax.set_ylabel("initial cart velocity", fontsize=14)
    ax.set_title("Revolutions", fontsize=16)

    # Set ticks at regular intervals (e.g., every 5 steps)
    step = 5  # Show every 5th tick label

    xticks = np.arange(0, num_steps, step)
    yticks = np.arange(0, num_steps, step)

    ax.set_xticks(xticks + 0.5)
    ax.set_xticklabels([f"{pole_velocity[i]:.2f}" for i in xticks], rotation=45, fontsize=12)

    ax.set_yticks(yticks + 0.5)
    ax.set_yticklabels([f"{cart_velocity[i]:.2f}" for i in yticks], rotation=0, fontsize=12)


    # Show the plot
    plt.show()


generate_heatmap(num_steps=50)          

    

In [None]:
def generate_heatmap_zoomed(num_steps, initial_force = 0):
    cart_velocity = np.linspace(8, 10, num_steps)
    pole_velocity= np.linspace(13, 15, num_steps)

    initial_state = [0, 0, np.pi, 0]

    revolution = [[0 for i in range(num_steps)] for j in range(num_steps)]

    for i, cv in enumerate(cart_velocity):
        for j, pv in enumerate(pole_velocity):
            initial_state[1] = cv
            initial_state[3] = pv

            num_revolutions = count_revolution(initial_state, initial_force, num_steps)

            revolution[i][j] = num_revolutions
    
    revolution = np.array(revolution)
    df = pd.DataFrame(revolution, index = cart_velocity, columns=pole_velocity)
    fig, ax = plt.subplots(figsize=(12, 8))
    sns.heatmap(df, annot=False, fmt=".1f", cmap="coolwarm")

    # Set axis labels
    ax.set_xlabel("initial pole velocity", fontsize=14)
    ax.set_ylabel("initial cart velocity", fontsize=14)
    ax.set_title("Revolutions", fontsize=16)

    # Set ticks at regular intervals (e.g., every 5 steps)
    step = 5  # Show every 5th tick label

    xticks = np.arange(0, num_steps, step)
    yticks = np.arange(0, num_steps, step)

    ax.set_xticks(xticks + 0.5)
    ax.set_xticklabels([f"{pole_velocity[i]:.2f}" for i in xticks], rotation=45, fontsize=12)

    ax.set_yticks(yticks + 0.5)
    ax.set_yticklabels([f"{cart_velocity[i]:.2f}" for i in yticks], rotation=0, fontsize=12)


    # Show the plot
    plt.show()


generate_heatmap_zoomed(num_steps=50)  

***Task 1.2***

the delta time can be reduced to make the relationship a lot more linear. Infact, infinitesimal delta time will be perfectly linear. However, this will result in the control being very slow to respons. Also, if everything is linear, there will not be any need for non-linear control. 

In [5]:

def sweep(num_steps, initial_force):
    """
    Perform a sweep of the CartPole environment for a given number of steps.
    
    Args:
        num_steps (int): The number of steps to simulate.
        initial_force (float): The initial force applied to the cart.
    
    Returns:
        None: plots relation between current state and state after performing 
        action
    """
    state_sweeps = {
        'cart_location': np.linspace(-10, 10, num_steps),
        'cart_velocity': np.linspace(-10, 10, num_steps),
        'pole_angle': np.linspace(-np.pi, np.pi, num_steps),
        'pole_velocity': np.linspace(-15, 15, num_steps)
    }

    titles = ['cart_location', 'cart_velocity', 'pole_angle', 'pole_velocity']

    env = CartPole(visual=False)

    # this makes angle a lot more linear
    # env.setSimParams(sim_steps=5, delta_time=0.02)

    state = [np.random.uniform(-10, 10), np.random.uniform(-10, 10), 
             np.random.uniform(-np.pi, np.pi), np.random.uniform(-15, 15)]
    
    y_data = {
        'cart_location': [],
        'cart_velocity': [],
        'pole_angle': [],
        'pole_velocity': []
    }

    for i in range(len(state)):
        title = titles[i]
        for j in state_sweeps[title]:
            state[i] = j
            env.reset()
            env.setState(state)
            env.performAction(initial_force)
            new_state = env.getState()
            y_data[title].append(new_state[i])
        
        plot_x_y(state_sweeps[title], y_data[title], title, title + ' after action')

        # reset state to random values
        state = [np.random.uniform(-10, 10), np.random.uniform(-10, 10), 
             np.random.uniform(-np.pi, np.pi), np.random.uniform(-15, 15)]

sweep(num_steps=100, initial_force=0)

In [22]:
def difference_sweep(num_steps, initial_force):
    """
    Perform a sweep of the CartPole environment for a given number of steps.
    
    Args:
        num_steps (int): The number of steps to simulate.
        initial_force (float): The initial force applied to the cart.
    
    Returns:
        None: plots relation between current state and state after performing 
        action
    """
    state_sweeps = {
        'cart_location': np.linspace(-10, 10, num_steps),
        'cart_velocity': np.linspace(-10, 10, num_steps),
        'pole_angle': np.linspace(-np.pi, np.pi, num_steps),
        'pole_velocity': np.linspace(-15, 15, num_steps)
    }

    titles = ['cart_location', 'cart_velocity', 'pole_angle', 'pole_velocity']

    env = CartPole(visual=False)

    # this makes angle a lot more linear
    # env.setSimParams(sim_steps=5, delta_time=0.02)

    state = [np.random.uniform(-10, 10), np.random.uniform(-10, 10), 
             np.random.uniform(-np.pi, np.pi), np.random.uniform(-15, 15)]
    
    y_data = {
        'cart_location': [],
        'cart_velocity': [],
        'pole_angle': [],
        'pole_velocity': []
    }

    for i in range(len(state)):
        title = titles[i]
        for j in state_sweeps[title]:
            state[i] = j
            env.reset()
            env.setState(state)
            env.performAction(initial_force)
            new_state = env.getState()
            y_data[title].append(new_state[i] - state[i])
        
        # plot_x_y(state_sweeps[title], y_data[title], title, "change in " + title)

        # reset state to random values
        state = [np.random.uniform(-10, 10), np.random.uniform(-10, 10), 
             np.random.uniform(-np.pi, np.pi), np.random.uniform(-15, 15)]
        
    plot_state(state_sweeps, y_data)


difference_sweep(num_steps=100, initial_force=0)

In [None]:
def difference_sweep_2d(num_steps, initial_force):
    """
    Perform a sweep of the CartPole environment for a given number of steps.
    
    Args:
        num_steps (int): The number of steps to simulate.
        initial_force (float): The initial force applied to the cart.
    
    Returns:
        None: plots relation between current state and state after performing 
        action
    """
    state_sweeps = {
        'cart_location': np.random.uniform(-10, 10, num_steps),
        'cart_velocity': np.random.uniform(-10, 10, num_steps),
        'pole_angle': np.random.uniform(-np.pi, np.pi, num_steps),
        'pole_velocity': np.random.uniform(-15, 15, num_steps)
    }

    titles = ['cart_location', 'cart_velocity', 'pole_angle', 'pole_velocity']

    env = CartPole(visual=False)

    state = [np.random.uniform(-10, 10), np.random.uniform(-10, 10), 
             np.random.uniform(-np.pi, np.pi), np.random.uniform(-15, 15)]


    for i in range(len(state)):
        title_i = titles[i]
        for j in range(i+1,len(state)):
            title_j = titles[j]
            
            z_data_x = []
            z_data_y = []
            for k in range(len(state_sweeps[title_i])):
                state[i] = state_sweeps[title_i][k]
                state[j] = state_sweeps[title_j][k]
                env.reset()
                env.setState(state)
                env.performAction(initial_force)
                new_state = env.getState()
                z_data_x.append(new_state[i] - state[i])
                z_data_y.append(new_state[j] - state[j])

            plt.figure()
            tcf1 = plt.tricontourf(state_sweeps[title_i], state_sweeps[title_j], z_data_x, levels = 10)
            plt.title(f'Contours of change in {title_i}\n')
            plt.xlabel(title_i)
            plt.ylabel(title_j)
            plt.colorbar(tcf1)
            plt.show()

            plt.figure()
            tcf2 = plt.tricontourf(state_sweeps[title_i], state_sweeps[title_j], z_data_y, levels = 10)
            plt.title(f'Contours of change in {title_j}\n')
            plt.xlabel(title_i)
            plt.ylabel(title_j)
            plt.colorbar(tcf2)
            plt.show()

            # reset state to random values
            state = [np.random.uniform(-10, 10), np.random.uniform(-10, 10), 
                np.random.uniform(-np.pi, np.pi), np.random.uniform(-15, 15)]

difference_sweep_2d(num_steps=10000, initial_force=0)

***Task 1.3***

In [45]:
def convert_dict_to_array(data):
    return np.array(list(zip(data['cart_location'], data['cart_velocity'], data['pole_angle'], data['pole_velocity'])))

def generate_data_rollout(num_steps, initial_force):
    """ 
    Generate data from Cartpole environment for training a model.
    Args:
        num_steps (int): The number of steps to simulate.
        initial_force (float): The initial force applied to the cart.
    Returns:
        X (np.ndarray): The input data, a 2D array of shape (num_steps, 4).
        Y (np.ndarray): The output data, a 2D array of shape (num_steps, 4).
    """
    # initial_state = (np.random.uniform(-10, 10), np.random.uniform(-10, 10), np.random.uniform(-np.pi, np.pi), np.random.uniform(-15, 15))
    initial_state = [0, 0, np.pi, 5]
    # [[0, 0, np.pi, 5], [0, 0, np.pi, 14], [0, 1, np.pi, 8], [0, 10, np.pi, 14], [0, -2, 3, 0]]
    data = rollout(initial_state=initial_state, initial_force=initial_force, num_steps=num_steps, visual=False)

    X = convert_dict_to_array(data)
    Y = np.array([X[i+1] - X[i] for i in range(X.shape[0] - 1)])

    # remove the last element of X
    X = X[:-1]
    
    print("shape of X:", X.shape, "\nshape of Y:", Y.shape)
    return X, Y

def generate_data_random(num_steps, initial_force):
    env = CartPole(visual=False)
    env.reset()
    x_data = {
        'cart_location': [],
        'cart_velocity': [],
        'pole_angle': [],
        'pole_velocity': []
    }

    y_data = {
        'cart_location': [],
        'cart_velocity': [],
        'pole_angle': [],
        'pole_velocity': []
    }
    for i in range(num_steps):
        initial_state = [np.random.uniform(-10, 10), np.random.uniform(-10, 10),
                         np.random.uniform(-np.pi, np.pi), np.random.uniform(-15, 15)]
        env.reset()
        env.setState(initial_state)
        env.performAction(initial_force)

        # remap the angle to be between -pi and pi
        env.remap_angle()
        
        next_state = env.getState()
    
        x_data['cart_location'].append(initial_state[0])
        x_data['cart_velocity'].append(initial_state[1])
        x_data['pole_angle'].append(initial_state[2])
        x_data['pole_velocity'].append(initial_state[3])

        y_data['cart_location'].append(next_state[0] - initial_state[0])
        y_data['cart_velocity'].append(next_state[1] - initial_state[1])
        y_data['pole_angle'].append(next_state[2] - initial_state[2])
        y_data['pole_velocity'].append(next_state[3] - initial_state[3])

    X = convert_dict_to_array(x_data)
    Y = convert_dict_to_array(y_data)
    
    print("shape of X:", X.shape, "\nshape of Y:", Y.shape)
    return X, Y

X, Y = generate_data_random(num_steps = 500, initial_force = 0)
# X, Y = generate_data_rollout(num_steps = 500, initial_force = 0)

shape of X: (500, 4) 
shape of Y: (500, 4)


In [None]:
# calculate average condition number

sum_k = 0

for i in range(20):   
    X, Y = generate_data_random(num_steps = 500, initial_force = 0)
    condition_number = np.linalg.cond((X.T @ X))
    sum_k += condition_number

average_condition_number = sum_k/20
print(average_condition_number)

In [46]:
def linear_regression(X, Y, intercept=False):
    """
    Perform linear regression on the given data.
    
    Args:
        X (numpy.ndarray): The input data.
        Y (numpy.ndarray): The output data.
    
    Returns:
        None: plots the linear regression results.
    """
    # Add a column of ones to X for the intercept term
    if intercept:
        X = np.hstack((np.ones((X.shape[0], 1)), X))
    
    # Calculate the weights using the normal equation
    # W = np.linalg.inv(X.T @ X) @ X.T @ Y
    W = np.linalg.lstsq(X, Y)[0]
    
    # Make predictions
    Y_pred = X @ W

    return W, Y_pred

W, Y_pred = linear_regression(X, Y, intercept=False)

In [47]:
def difference_sweep_predicted(num_steps, initial_force, W):
    
    state_sweeps = {
        'cart_location': np.linspace(-10, 10, num_steps),
        'cart_velocity': np.linspace(-10, 10, num_steps),
        'pole_angle': np.linspace(-np.pi, np.pi, num_steps),
        'pole_velocity': np.linspace(-15, 15, num_steps)
    }

    titles = ['cart_location', 'cart_velocity', 'pole_angle', 'pole_velocity']

    env = CartPole(visual=False)

    state = [np.random.uniform(-10, 10), np.random.uniform(-10, 10), 
             np.random.uniform(-np.pi, np.pi), np.random.uniform(-15, 15)]
    
    x_data = {
        'cart_location': [],
        'cart_velocity': [],
        'pole_angle': [],
        'pole_velocity': []
    }

    y_data = {
        'cart_location': [],
        'cart_velocity': [],
        'pole_angle': [],
        'pole_velocity': []
    }

    y_data_pred = {
        'cart_location': [],
        'cart_velocity': [],
        'pole_angle': [],
        'pole_velocity': []
    }

    actual_next_state_data = {
        'cart_location': [],
        'cart_velocity': [],
        'pole_angle': [],
        'pole_velocity': []
    }

    pred_next_state_data = {
        'cart_location': [],
        'cart_velocity': [],
        'pole_angle': [],
        'pole_velocity': []
    }

    for i in range(len(state)):
        title = titles[i]
        for j in state_sweeps[title]:
            state[i] = j
            x_data[title].append(np.array(state))

            env.reset()
            env.setState(state)
            env.performAction(initial_force)
            env.remap_angle()
            
            new_state = env.getState()
            y_data[title].append(np.array(new_state) - np.array(state))
            actual_next_state_data[title].append(np.array(new_state))

            pred_step = np.array(state) @ W
            y_data_pred[title].append(pred_step)
            pred_next_state = np.array(state) + pred_step
            pred_next_state[2] = remap_angle(pred_next_state[2])
            pred_next_state_data[title].append(pred_next_state)

        # reset state to random values
        state = np.array([np.random.uniform(-10, 10), np.random.uniform(-10, 10), 
             np.random.uniform(-np.pi, np.pi), np.random.uniform(-15, 15)])
    
    for title in titles:
        # combine state_sweeps as columns of X
        x_data[title] = np.array(x_data[title])
        
        # combine y_data as columns of Y
        y_data[title] = np.array(y_data[title])
        
        # combine y_data_pred as columns of Y_pred
        y_data_pred[title] = np.array(y_data_pred[title])

        # combine actual_next_state_data as columns of Y
        actual_next_state_data[title] = np.array(actual_next_state_data[title])

        # combine pred_next_state_data as columns of Y_pred
        pred_next_state_data[title] = np.array(pred_next_state_data[title])
        
    return x_data, y_data, y_data_pred, actual_next_state_data, pred_next_state_data

x_sweep, y_sweep, y_sweep_pred, actual_next_state_sweep, pred_next_state_sweep = difference_sweep_predicted(num_steps=100, initial_force=0, W=W)

In [48]:
def plot_linear_regression_sweep_1(x_data, y_data, y_data_pred, actual_next_state_sweep, pred_next_state_sweep, step):
    fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 5))
    titles = ['cart_location', 'cart_velocity', 'pole_angle', 'pole_velocity']
    for i in range(2):
        for j in range(2):
            title = titles[i * 2 + j]
            
            X = x_data[title]

            if not step:
                Y = actual_next_state_sweep[title]
                Y_pred = pred_next_state_sweep[title]
            
            else:
                Y = y_data[title]
                Y_pred = y_data_pred[title]
                
            ax[i, j].plot(X[:,i * 2 + j], Y[:,i * 2 + j] ,'r-', label = "actual")
            ax[i, j].plot(X[:,i * 2 + j], Y_pred[:,i * 2 + j] ,'b--', label = "pred")
            ax[i, j].set_title(title)
            ax[i, j].set_xlabel("current state")

            if not step:
                ax[i, j].set_ylabel("next state")
            else:
                ax[i, j].set_ylabel("next step")
                ax[i, j].set_xlim(min(X[:,i * 2 + j]) - 0.1, max(X[:,i * 2 + j]) + 0.1)
                ax[i, j].set_ylim(min([-1, Y_pred[:,i * 2 + j].min() - 0.1, Y[:,i * 2 + j].min() - 0.1]), max([1, Y_pred[:,i * 2 + j].max() + 0.1, Y[:,i * 2 + j].max() + 0.1, Y[:, i * 2 + j].max() + 0.1]))

            ax[i, j].grid()
            ax[i, j].legend()
    # center the title on top of the figure
    fig.suptitle("Forecast sweep")
    fig.tight_layout(rect=[0, 0.03, 1, 1])  # Adjust the rect to make space for the title
    plt.show()

def plot_linear_regression_sweep_2(y_data, y_data_pred, actual_next_state_data, pred_next_state_data, step):
    fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 5))
    titles = ['cart_location', 'cart_velocity', 'pole_angle', 'pole_velocity']
    for i in range(2):
        for j in range(2):
            title = titles[i * 2 + j]
            
            if not step:
                Y = actual_next_state_data[title]
                Y_pred = pred_next_state_data[title]

            else:
                Y = y_data[title]
                Y_pred = y_data_pred[title]

                
            ax[i, j].plot(Y[:,i * 2 + j], Y_pred[:,i * 2 + j] ,'r-', label = "predicted")
            ax[i, j].plot(Y[:,i * 2 + j], Y[:,i * 2 + j] ,'b--', label = "Y = X")
            ax[i, j].set_title(title)

            if not step:
                ax[i, j].set_xlabel("actual state")
                ax[i, j].set_ylabel("pred state")
            else:
                ax[i, j].set_xlabel("actual step")
                ax[i, j].set_ylabel("pred step")
                ax[i, j].set_xlim(min(X[:,i * 2 + j]) - 0.1, max(X[:,i * 2 + j]) + 0.1)
                ax[i, j].set_ylim(min(-1, Y_pred[:,i * 2 + j].min() - 0.1), max(1, Y_pred[:,i * 2 + j].max() + 0.1))
            ax[i, j].legend()
            ax[i, j].grid()
    # center the title on top of the figure
    fig.suptitle("Forecast sweep")
    fig.tight_layout(rect=[0, 0.03, 1, 1])  # Adjust the rect to make space for the title
    plt.show()

plot_linear_regression_sweep_1(x_sweep, y_sweep, y_sweep_pred, actual_next_state_sweep, pred_next_state_sweep, step = True)
plot_linear_regression_sweep_2(y_sweep, y_sweep_pred, actual_next_state_sweep, pred_next_state_sweep, step = False)

***Task 1.4***

In [None]:
def forecast(initial_state, num_steps, W):
    """
    Forecast the future state of the CartPole environment using the learned model.

    Args:
        initial_state (list): The initial state of the environment.
        num_steps (int): The number of steps to forecast.
        W (numpy.ndarray): The learned model weights.

    Returns:
        X_forecast (dict): A dictionary containing the forecasted states.
    """

    # initialise the forecasted state
    X_forecast = {
        'cart_location': [],
        'cart_velocity': [],
        'pole_angle': [],
        'pole_velocity': []
    }

    state = np.array(initial_state)

    # perform the action for the specified number of steps
    for i in range(num_steps):
        X_forecast['cart_location'].append(state[0])
        X_forecast['cart_velocity'].append(state[1])
        X_forecast['pole_angle'].append(state[2])
        X_forecast['pole_velocity'].append(state[3])

        # forecast the next state
        state = state + (state @ W)

        # remap the angle to be between -pi and pi
        state[2] = remap_angle(state[2])
        
    return X_forecast   

In [None]:
print(W)

In [None]:
def plot_data_vs_forecast_time(data, forecast, graph_title):
    fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 5))
    titles = ['cart_location', 'cart_velocity', 'pole_angle', 'pole_velocity']
    for i in range(2):
        for j in range(2):
            title = titles[i * 2 + j]
            ax[i, j].plot(np.arange(0, len(data[title])), data[title], 'r-', label='actual')
            ax[i, j].plot(np.arange(0, len(forecast[title])), forecast[title], 'b-', label='forecast')
            ax[i, j].set_title(title)
            ax[i, j].set_xlabel('Iterations')
            ax[i, j].set_ylabel(title)
            ax[i, j].grid()
            ax[i, j].legend()
    # center the title on top of the figure
    fig.suptitle(graph_title)
    fig.tight_layout(rect=[0, 0.03, 1, 1])  # Adjust the rect to make space for the title
    plt.show()

# Example initial states for testing
initial_states = [[0, 0, np.pi, 5], [0, 0, np.pi, 14], [0, 1, np.pi, 8], [0, 10, np.pi, 14], [0, -2, 3, 0]]

# set the initial force to 0
initial_force = 0

# set the number of steps to forecast
num_steps = 100

# Forecast the future state of the CartPole environment using the learned model.
for initial_state in initial_states:
    # obtain the forecasted state
    X_forecast = forecast(initial_state, num_steps, W)

    # obtain the actual state
    X_actual = rollout(initial_state, initial_force, num_steps, visual=False)

    # plot with graph title as initial state formatted to 2 decimal places
    graph_title = f"Initial state: {initial_state[0]:.2f}, {initial_state[1]:.2f}, {initial_state[2]:.2f}, {initial_state[3]:.2f}"
    plot_data_vs_forecast_time(X_actual, X_forecast, graph_title=graph_title)

[0, 0, 3.141592653589793, 5]
[0, 0, 3.141592653589793, 14]
[0, 1, 3.141592653589793, 8]
[0, 10, 3.141592653589793, 14]
[0, -2, 3, 0]


***task 1.4***

Large sigma_j --> the basis function is not sensitive to that dimension as the 
sum of squares contributes nothing to basis function

lambda is a regulariser --> helps reduce overfitting
need trial and error to get the correct value of lambda

These are actually gaussian processes (number of basis functions goes to infinity)

more trial and error to choose sigmas for basis functions. rule of thumb width: standard deviation of data * scalar, where scalar can also be tuned

import jax.numpy as jnp

jit will compile code so the second time the code is run, the compiled code is run instead of redoing compilation (use @jit decorator)

Jax allows for GPU using device (check if there is mac)

Be ready for JAX to gove errors

using both sine and coside allows you to reconstruct the angle and should give really good fit

use as much data as possible, training will take a couple of minutes so use JIT

do not use for loops with JAX. use JAX scan to replace for loops (not needed for week 2, useful for week 3)