In [1]:
import os
import numpy as np
# from CartPole import CartPole
# no_tanh = False
from cartpole import CartPole
no_tanh = True

In [2]:
def safe_save(filename, array):
    if os.path.exists(filename):
        raise FileExistsError(f"File '{filename}' already exists. Aborting save.")
    np.save(filename, array)
    print(f"Array saved to {filename}")

In [3]:
env = CartPole(visual=False)
env.sim_steps = 1
env.delta_time = 0.01

__NOTE__: might need to change range of data when trying to generate

In [4]:
n_samples = 8000
include_action = True

X = []
Y = []

for i in range(n_samples):
    cart_pos = np.random.uniform(-10, 10)
    cart_vel = np.random.uniform(-25, 25)
    pole_ang = np.random.uniform(-np.pi, np.pi)
    pole_vel = np.random.uniform(-25, 25)

    if include_action:
        # don't use tanh here even for no_tanh cuz we dont want training action
        # to be concentrated near 10 and -10
        action = np.random.uniform(-10, 10)

    state = np.array([cart_pos, cart_vel, pole_ang, pole_vel])

    env.reset()
    env.setState(state.copy())

    ## noisy
    state_noisy = env.getNoisyState().copy()
    
    env.performAction(action)
    next_state = env.getState().copy()

    if include_action:
        state = np.append(state, action)

        # noisy
        state_noisy = np.append(state_noisy, action)

    # X.append(state)
    X.append(state_noisy) # noisy
    Y.append(next_state - state[:4])

    if i < 5:
        print(state_noisy)
        print(X[-1])
        print(state)
        print(next_state - state[:4])
        print(Y[-1])
    # print(state)

X = np.array(X)
Y = np.array(Y)


dt_str = f"{env.delta_time:.3f}".replace('.', 'p')
action_flag = 'act' if include_action else ''
notanh_flag = 'notanh' if no_tanh else ''
# fname_base = f"dt{dt_str}_N{n_samples}_{action_flag}_{notanh_flag}"
fname_base = f"dt{dt_str}_N{n_samples}_{action_flag}_{notanh_flag}_obsnoise"
fname_X = f"X_{fname_base}.npy"
fname_Y = f"Y_{fname_base}.npy"

safe_save(fname_X, X)
safe_save(fname_Y, Y)

[ -4.60134735 -17.60932216   2.3264818  -22.7409456    0.2258428 ]
[ -4.60134735 -17.60932216   2.3264818  -22.7409456    0.2258428 ]
[ -4.53034693 -17.62051803   2.32035938 -22.8366259    0.2258428 ]
[-0.1701707   0.60344768 -0.2138252   1.45410594]
[-0.1701707   0.60344768 -0.2138252   1.45410594]
[ 0.1093054  20.05883305 -0.70042383 -1.48977165 -6.97246391]
[ 0.1093054  20.05883305 -0.70042383 -1.48977165 -6.97246391]
[ 0.17282039 20.08581465 -0.70602683 -1.46618654 -6.97246391]
[ 0.20017401 -0.06841378 -0.01500409 -0.03422237]
[ 0.20017401 -0.06841378 -0.01500409 -0.03422237]
[-5.8010176  -0.62712101  1.77181938 22.81904865 -4.31533696]
[-5.8010176  -0.62712101  1.77181938 22.81904865 -4.31533696]
[-5.78058207 -0.57837888  1.75267726 22.81861883 -4.31533696]
[3.25082912e-04 6.10887170e-01 2.34337843e-01 6.15165509e-01]
[3.25082912e-04 6.10887170e-01 2.34337843e-01 6.15165509e-01]
[ 3.53964943 -5.99987939  0.51385985 18.48195906  8.57572406]
[ 3.53964943 -5.99987939  0.51385985 18.4

In [22]:
# n_samples = 2000

# X = []
# Y = []

# for _ in range(n_samples):
#     cart_pos = np.random.uniform(-5, 5)
#     cart_vel = np.random.uniform(-10, 10)
#     pole_ang = np.random.uniform(-np.pi, np.pi)
#     pole_vel = np.random.uniform(-15, 15)

#     init_state = np.array([cart_pos, cart_vel, pole_ang, pole_vel])

#     env.reset()
#     env.setState(init_state.copy())
#     env.performAction(action=0.0)
#     next_state = env.getState().copy()

#     # Store the input-output pair
#     X.append(init_state.copy())
#     Y.append(next_state - init_state)

# X = np.array(X)
# Y = np.array(Y)

# dt_str = f"{env.delta_time:.3f}".replace('.', 'p')
# fname_base = f"dt{dt_str}_N{n_samples}"
# fname_X = f"X_{fname_base}.npy"
# fname_Y = f"Y_{fname_base}.npy"

# safe_save(fname_X, X)
# safe_save(fname_Y, Y)

In [None]:
# Solve for linear model C such that Y â‰ˆ X @ C.T
# C_T, _, _, _ = np.linalg.lstsq(X, Y, rcond=None)
# C = C_T.T

# safe_save('linear_C.npy', C)

# print(C)

In [5]:
# verify
test_x = np.load('X_dt0p010_N8000_act_notanh_obsnoise.npy')
test_y = np.load('Y_dt0p010_N8000_act_notanh_obsnoise.npy')

env.reset()
env.setState(test_x[0][:4])
print(test_x[0])
print(env.getState())
env.performAction(action=test_x[0][4])
next_state = env.getState().copy() - test_x[0][:4]
print(next_state)
print(test_y[0])

[ -4.60134735 -17.60932216   2.3264818  -22.7409456    0.2258428 ]
[ -4.60134735 -17.60932216   2.3264818  -22.7409456 ]
[-0.17012306  0.59701609 -0.21293235  1.4477104 ]
[-0.1701707   0.60344768 -0.2138252   1.45410594]
