In [1]:
import numpy as np
import utils
np.set_printoptions(linewidth=np.inf)

In [2]:
def get_pv(idx):
    idx = idx - 1
    v = idx // 500
    p = idx % 500
    return p, v


In [11]:
data = utils.read_data('medium')
states = data[:, 0]
actions = data[:, 1]
rewards = data[:, 2]

unique_rewards = np.unique(rewards)
reward_to_idx = {reward: idx for idx, reward in enumerate(unique_rewards)}
idx_to_reward = {idx: reward for reward, idx in reward_to_idx.items()}
reward_idx = np.array([reward_to_idx[r] for r in rewards])

next_states = data[:, 3]
states_pv = np.array([get_pv(s) for s in states])
next_states_pv = np.array([get_pv(s) for s in next_states])
actions = actions.reshape(-1, 1)       # Shape (N, 1)
rewards = rewards.reshape(-1, 1)       # Shape (N, 1)
data_ = np.hstack([states_pv, actions, next_states_pv, rewards])
max_reward = data_[data_[:, -1] == 100_000]
col_max = np.max(data_, axis=0)
achieved = data_[data_[:, 3] >= 466]
achieved = achieved[achieved[:, -1].argsort()]

In [34]:
u_s_p = np.unique(states_pv[:, 0])
u_ns_p = np.unique(next_states_pv[:, 0])
u_s_v = np.unique(states_pv[:, 1])
u_ns_v = np.unique(next_states_pv[:, 1])

print(u_s_p.shape)
print(u_ns_p.shape)
print(u_s_v.shape)
print(u_ns_v.shape)


(466,)
(473,)
(98,)
(98,)


In [27]:
sorted_data = data_[np.lexsort((data_[:, 2], data_[:, 1], data_[:, 0]))]

unique_combinations = {}
for row in sorted_data:
    key = (row[0], row[1], row[2])
    result = (row[3], row[4])
    
    if key not in unique_combinations:
        unique_combinations[key] = {}
    if result not in unique_combinations[key]:
        unique_combinations[key][result] = 0
    unique_combinations[key][result] += 1

multi_resulting_params = {k: v for k, v in unique_combinations.items() if len(v) > 1}

for key, results in multi_resulting_params.items():
    print(f"Combination {key} has multiple resulting parameters with counts: {results}")



Combination (0.0, 50.0, 4.0) has multiple resulting parameters with counts: {(0.0, 51.0): 16, (0.0, 52.0): 1}
Combination (0.0, 50.0, 5.0) has multiple resulting parameters with counts: {(0.0, 51.0): 16, (1.0, 52.0): 2, (1.0, 51.0): 1}
Combination (0.0, 51.0, 4.0) has multiple resulting parameters with counts: {(1.0, 53.0): 7, (1.0, 52.0): 8}
Combination (0.0, 51.0, 5.0) has multiple resulting parameters with counts: {(1.0, 52.0): 48, (1.0, 53.0): 9, (2.0, 53.0): 1}
Combination (1.0, 45.0, 3.0) has multiple resulting parameters with counts: {(0.0, 50.0): 1, (0.0, 46.0): 1}
Combination (1.0, 47.0, 3.0) has multiple resulting parameters with counts: {(1.0, 48.0): 1, (0.0, 48.0): 1}
Combination (1.0, 52.0, 3.0) has multiple resulting parameters with counts: {(3.0, 53.0): 1, (2.0, 53.0): 5}
Combination (1.0, 52.0, 4.0) has multiple resulting parameters with counts: {(2.0, 54.0): 3, (3.0, 54.0): 1, (2.0, 53.0): 1}
Combination (1.0, 52.0, 5.0) has multiple resulting parameters with counts: {

In [None]:
# how often does may state action pairs have multiple resulting params
# in data_ check what combinations of columns 0, 1, 2 have many different combinations of col 4,5


In [21]:
print(data.shape)
print(achieved.shape)

(100000, 4)
(186, 6)


In [16]:
unique_pv = np.unique(np.vstack([states_pv, next_states_pv]), axis=0)

In [29]:
q = [.25, .5, .75, .95, .99]
print(f"Observed state space quantiles, Q = {q}\n")
for i in range(1, 8):
    print(f"Action = {i}")
    bla = data_[data_[:, 2] == i]
    print(f"Number of observations: {bla.shape[0]}")
    p_quants = np.quantile(bla[:, 0], q)
    v_quants = np.quantile(bla[:, 1], q)
    print(f"P Quantiles: {list(p_quants)}")
    print(f"V Quantiles: {list(v_quants)}\n")


Observed state space quantiles, Q = [0.25, 0.5, 0.75, 0.95, 0.99]

Action = 1
Number of observations: 11614
P Quantiles: [164.0, 196.0, 221.0, 329.35000000000036, 369.0]
V Quantiles: [38.0, 45.0, 49.0, 57.0, 62.0]

Action = 2
Number of observations: 9080
P Quantiles: [178.0, 203.0, 226.0, 314.0, 399.2099999999991]
V Quantiles: [39.0, 46.0, 51.0, 58.0, 63.0]

Action = 3
Number of observations: 21760
P Quantiles: [159.0, 198.0, 237.0, 343.0499999999993, 415.0]
V Quantiles: [36.0, 46.0, 52.0, 72.0, 86.0]

Action = 4
Number of observations: 13003
P Quantiles: [174.0, 205.0, 238.0, 362.0, 429.0]
V Quantiles: [43.0, 50.0, 57.0, 77.0, 88.0]

Action = 5
Number of observations: 22742
P Quantiles: [167.0, 208.0, 263.0, 397.0, 451.0]
V Quantiles: [48.0, 56.0, 67.0, 86.0, 93.0]

Action = 6
Number of observations: 9394
P Quantiles: [183.0, 206.0, 232.0, 354.0, 430.0]
V Quantiles: [48.0, 53.0, 60.0, 80.0, 89.0]

Action = 7
Number of observations: 12407
P Quantiles: [173.0, 207.0, 257.0, 377.0, 443.0

In [None]:
data = utils.read_data('medium')
states = data[:, 0]
actions = data[:, 1]
rewards = data[:, 2]

next_states = data[:, 3]
states_pv = np.array([get_pv(s) for s in states])
next_states_pv = np.array([get_pv(s) for s in next_states])
actions = actions.reshape(-1, 1)     
rewards = rewards.reshape(-1, 1)  

data_ = np.hstack([states_pv, actions, next_states_pv, rewards])

# next add self absorbing states, from EDA, we know that 
# big reward is obtained at p = 466
p_values = np.arange(466, 500)          
v_values = np.arange(100)               
actions = np.arange(1, 8)                

p_grid, v_grid, action_grid = np.meshgrid(p_values, v_values, actions, indexing='ij')

p_flat = p_grid.ravel()
v_flat = v_grid.ravel()
action_flat = action_grid.ravel()

# Create the new rows by stacking the columns [p, v, action, p, v, 0]
new_rows = np.column_stack((p_flat, v_flat, action_flat, p_flat, v_flat, np.zeros_like(p_flat)))

data_ = np.vstack([data_, new_rows])

states_pv = data_[:, [0,1]]
actions = data_[:, 2]
next_states_pv = data_[:, [3,4]]
rewards = data_[:, -1]
unique_rewards = np.unique(rewards)
reward_to_idx = {reward: idx for idx, reward in enumerate(unique_rewards)}
idx_to_reward = {idx: reward for reward, idx in reward_to_idx.items()}
reward_idx = np.array([reward_to_idx[r] for r in rewards])

In [15]:
data_

array([[ 200.,   49.,    3.,  200.,   49.,  -25.],
       [ 200.,   49.,    2.,  200.,   48., -100.],
       [ 200.,   48.,    3.,  199.,   48.,  -25.],
       ...,
       [ 499.,   99.,    5.,  499.,   99.,    0.],
       [ 499.,   99.,    6.,  499.,   99.,    0.],
       [ 499.,   99.,    7.,  499.,   99.,    0.]])

In [16]:
data.shape

(100000, 4)

In [17]:
data_.shape

(123800, 6)