In [None]:
import random
import numpy as np
import matplotlib.pyplot as plt

## Define the space

#### Define the actions that can be taken and the movement they result in

In [None]:
random.seed(0)

action_space = np.array([0, 1, 2, 3, 4, 5, 6, 7])
# 0: LEFT
# 1: UP
# 2: RIGHT
# 3: DOWN
# 4: LEFT-UP
# 5: RIGHT-UP
# 6: RIGHT-DOWN
# 7: LEFT-DOWN
delta = np.array([[-1, 0],[0, 1],[1, 0],[0, -1],[-1, 1],[1, 1],[1, -1],[-1, -1]])

#### Create array of invalid points within a 13x13 grid

In [None]:
#Array of obstacle coordinates
obstacle = np.array([[4,7],[5,7],[6,7],[7,7],[8,7],[9,7],[10,7]])

#Array of borders
top_border = np.zeros([11,2])
bottom_border = np.zeros([11,2])
left_border = np.zeros([9,2])
right_border = np.zeros([9,2])
for i in range(5):
    top_border[i] = [i+5,14] 
    bottom_border[i] = [i+5,0]
for i in range(7):
    left_border[i] = [0,i+4]
    right_border[i] = [14,i+4]
    

#Array of the bottom left invalid points
invalid_bl = np.array([[1,4],[1,3],[2,3],[2,2],[3,2],[3,1],[4,1],[5,1]])
#Flip along the middle axes to get all the invalid points
#Top left
invalid_tl = np.copy(invalid_bl)
invalid_tl[:,1] = -invalid_tl[:,1]+14
#Top right
invalid_tr = np.copy(invalid_tl)
invalid_tr[:,0] = -invalid_tr[:,0]+14
#Bottom right
invalid_br = np.copy(invalid_bl)
invalid_br[:,0] = -invalid_br[:,0]+14


#Full array of invalid points
invalid_all = np.concatenate((obstacle, top_border, bottom_border, left_border, right_border, invalid_bl, invalid_tl, invalid_tr, invalid_br), axis=0)
invalid = np.concatenate((top_border, bottom_border, left_border, right_border, invalid_bl, invalid_tl, invalid_tr, invalid_br), axis=0)

#### Create array of invalid transitions for conditions 3 & 4

In [None]:
# 0: LEFT
# 1: UP
# 2: RIGHT
# 3: DOWN
# 4: LEFT-UP
# 5: RIGHT-UP
# 6: RIGHT-DOWN
# 7: LEFT-DOWN

F_actions = np.array([2,3,6,7])
A_actions = np.array([0,3,6,7])
Z_actions = np.array([7])
H_actions = np.array([0,3,7])
K_actions = np.array([2,3,6])
Y_actions = np.array([6])
V_actions = np.array([7,0])
U_actions = np.array([2,6])
X_actions = np.array([0,3,4,7])
W_actions = np.array([2,3,5,6])
B_actions = np.array([3,6,7])
B_exit_actions = np.array([1,4,5])

## Generate the data

### Conditions 1 & 2

#### V1: does not record attempts to make invalid moves 
-> more similar to mouse data

In [None]:
num_trials = 8 
num_steps = 1420
data = np.zeros([num_steps,2,num_trials])

trial_count = 0
while trial_count < num_trials:
    current_position = np.array([7,12])
    data[0,:,trial_count] = current_position
    step_count = 1
    while step_count < num_steps:
        old_position = current_position
        action = random.choice(action_space)
        current_position = current_position + delta[action]
        if current_position.tolist() in invalid_all.tolist():
            current_position = old_position
            index = np.argwhere(action_space==action)
            new_action_space = np.delete(action_space, index)
            action_space = new_action_space
        else:
            data[step_count,:,trial_count] = current_position
            step_count += 1
            action_space = np.array([0, 1, 2, 3, 4, 5, 6, 7])
    trial_count += 1

#### V2: does record attempts to make invalid moves
-> more similar to how the RL database originally trains the agent

In [None]:
num_trials = 105 
num_steps = 2400 
data = np.zeros([num_steps,4,num_trials])

trial_count = 0
while trial_count < num_trials:
    current_position = np.array([7,12])
    data[0,:,trial_count] = np.concatenate((current_position,current_position))
    step_count = 1
    while step_count < num_steps:
        old_position = current_position
        action = random.choice(action_space)
        current_position = current_position + delta[action]
        if ((old_position.tolist() not in obstacle.tolist() and current_position.tolist() in obstacle.tolist() and action.tolist() in B_actions.tolist())
              or (old_position.tolist() in obstacle.tolist() and current_position.tolist() not in obstacle.tolist() and action.tolist() in B_exit_actions.tolist())
              or (current_position.tolist() in invalid.tolist())):
            data[step_count,:,trial_count] = np.concatenate((old_position,current_position))
            current_position = old_position
            index = np.argwhere(action_space==action)
            new_action_space = np.delete(action_space, index)
            action_space = new_action_space
            step_count += 1
        else:
            data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
            step_count += 1
            action_space = np.array([0, 1, 2, 3, 4, 5, 6, 7])
    trial_count += 1

In [None]:
# With practice runs
num_trials = 105 
num_steps = 2400 
data = np.zeros([num_steps,4,num_trials])

trial_count = 0
while trial_count < num_trials:
    current_position = np.array([7,12])
    data[0,:,trial_count] = np.concatenate((current_position,current_position))
    step_count = 1
    while step_count < num_steps:
        old_position = current_position
        action = random.choice(action_space)
        current_position = current_position + delta[action]
        if current_position.tolist() == [9,12]:
            data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
            step_count += 1
            action_space = np.array([0, 1, 2, 3, 4, 5, 6, 7])
            if random.random()<0.2:
                for a in [6,3,3,6,3]:
                    if step_count < num_steps:
                        old_position = current_position
                        current_position = current_position + delta[a]
                        data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
                        step_count += 1
        elif current_position.tolist() == [5,12]:
            data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
            step_count += 1
            action_space = np.array([0, 1, 2, 3, 4, 5, 6, 7])
            if random.random()<0.2:
                for a in [7,3,3,7,3]:
                    if step_count < num_steps:
                        old_position = current_position
                        current_position = current_position + delta[a]
                        data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
                        step_count += 1
        elif current_position.tolist() == [7,12]:
            data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
            step_count += 1
            action_space = np.array([0, 1, 2, 3, 4, 5, 6, 7])
            if random.random()<0.4:
                for a in [7,7,7,7,3]:
                    if step_count < num_steps:
                        old_position = current_position
                        current_position = current_position + delta[a]
                        data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
                        step_count += 1             
            elif random.random()<0.4:
                for a in [6,6,6,6,3]:
                    if step_count < num_steps:
                        old_position = current_position
                        current_position = current_position + delta[a]
                        data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
                        step_count += 1
        elif current_position.tolist() == [3,7]:
            data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
            step_count += 1
            action_space = np.array([0, 1, 2, 3, 4, 5, 6, 7])
            if random.random()<0.2:
                for a in [3,6,6,6,6,3]:
                    if step_count < num_steps:
                        old_position = current_position
                        current_position = current_position + delta[a]
                        data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
                        step_count += 1
        elif current_position.tolist() == [11,7]:
            data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
            step_count += 1
            action_space = np.array([0, 1, 2, 3, 4, 5, 6, 7])
            if random.random()<0.2:
                for a in [3,7,7,7,7,3]:
                    if step_count < num_steps:
                        old_position = current_position
                        current_position = current_position + delta[a]
                        data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
                        step_count += 1
        elif ((old_position.tolist() not in obstacle.tolist() and current_position.tolist() in obstacle.tolist() and action.tolist() in B_actions.tolist())
              or (old_position.tolist() in obstacle.tolist() and current_position.tolist() not in obstacle.tolist() and action.tolist() in B_exit_actions.tolist())
              or (current_position.tolist() in invalid.tolist())):
            data[step_count,:,trial_count] = np.concatenate((old_position,current_position))
            current_position = old_position
            index = np.argwhere(action_space==action)
            new_action_space = np.delete(action_space, index)
            action_space = new_action_space
            step_count += 1
        else:
            data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
            step_count += 1
            action_space = np.array([0, 1, 2, 3, 4, 5, 6, 7])
    trial_count += 1

In [None]:
# Save the data
np.save('random_walk_data_1_2.npy', data)

### Condition 3

In [None]:
F = np.array([[4,10],[9,10]])
A = np.array([[5,10],[10,10]])
Z = np.array([[5,9]])
H = np.array([[6,9]])
K = np.array([[8,9]])
Y = np.array([[9,9]])

#### V1: does not record attempts to make invalid moves 
-> more similar to mouse data

In [None]:
num_trials = 105 
num_steps = 2400 
data = np.zeros([num_steps,2,num_trials])

trial_count = 0
while trial_count < num_trials:
    current_position = np.array([7,12])
    data[0,:,trial_count] = current_position
    step_count = 1
    while step_count < num_steps:
        old_position = current_position
        action = random.choice(action_space)
        current_position = current_position + delta[action]
        if current_position.tolist() in invalid_all.tolist():
            current_position = old_position
            index = np.argwhere(action_space==action)
            new_action_space = np.delete(action_space, index)
            action_space = new_action_space
        elif ((current_position.tolist() in F.tolist() and action.tolist() in F_actions.tolist())
              or (current_position.tolist() in A.tolist() and action.tolist() in A_actions.tolist())
              or (current_position.tolist() in Z.tolist() and action.tolist() in Z_actions.tolist()) 
              or (current_position.tolist() in H.tolist() and action.tolist() in H_actions.tolist()) 
              or (current_position.tolist() in K.tolist() and action.tolist() in K_actions.tolist()) 
              or (current_position.tolist() in Y.tolist() and action.tolist() in Y_actions.tolist())):
            current_position = old_position
            index = np.argwhere(action_space==action)
            new_action_space = np.delete(action_space, index)
            action_space = new_action_space
        else:
            data[step_count,:,trial_count] = current_position
            step_count += 1
            action_space = np.array([0, 1, 2, 3, 4, 5, 6, 7])
    trial_count += 1

#### V2: does record attempts to make invalid moves
-> more similar to how the RL database originally trains the agent

In [None]:
num_trials = 105 
num_steps = 2400 
data = np.zeros([num_steps,4,num_trials])

trial_count = 0
while trial_count < num_trials:
    current_position = np.array([7,12])
    data[0,:,trial_count] = np.concatenate((current_position,current_position))
    step_count = 1
    while step_count < num_steps:
        old_position = current_position
        action = random.choice(action_space)
        current_position = current_position + delta[action]
        if ((current_position.tolist() in F.tolist() and action.tolist() in F_actions.tolist())
              or (current_position.tolist() in A.tolist() and action.tolist() in A_actions.tolist())
              or (current_position.tolist() in Z.tolist() and action.tolist() in Z_actions.tolist()) 
              or (current_position.tolist() in H.tolist() and action.tolist() in H_actions.tolist()) 
              or (current_position.tolist() in K.tolist() and action.tolist() in K_actions.tolist()) 
              or (current_position.tolist() in Y.tolist() and action.tolist() in Y_actions.tolist()) 
              or (old_position.tolist() not in obstacle.tolist() and current_position.tolist() in obstacle.tolist() and action.tolist() in B_actions.tolist())
              or (old_position.tolist() in obstacle.tolist() and current_position.tolist() not in obstacle.tolist() and action.tolist() in B_exit_actions.tolist())
              or (current_position.tolist() in invalid.tolist())):
            data[step_count,:,trial_count] = np.concatenate((old_position,current_position))
            current_position = old_position
            index = np.argwhere(action_space==action)
            new_action_space = np.delete(action_space, index)
            action_space = new_action_space
            step_count += 1
        else:
            data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
            action_space = np.array([0, 1, 2, 3, 4, 5, 6, 7])
            step_count += 1
    trial_count += 1

In [None]:
# With practice runs
num_trials = 105 
num_steps = 2400 
data = np.zeros([num_steps,4,num_trials])

trial_count = 0
while trial_count < num_trials:
    current_position = np.array([7,12])
    data[0,:,trial_count] = np.concatenate((current_position,current_position))
    step_count = 1
    while step_count < num_steps:
        old_position = current_position
        action = random.choice(action_space)
        current_position = current_position + delta[action]
        if current_position.tolist() == [3,7]:
            data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
            step_count += 1
            action_space = np.array([0, 1, 2, 3, 4, 5, 6, 7])
            if random.random()<0.2:
                for a in [3,6,6,6,6,3]:
                    if step_count < num_steps:
                        old_position = current_position
                        current_position = current_position + delta[a]
                        data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
                        step_count += 1
        elif current_position.tolist() == [11,7]:
            data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
            step_count += 1
            action_space = np.array([0, 1, 2, 3, 4, 5, 6, 7])
            if random.random()<0.2:
                for a in [3,7,7,7,7,3]:
                    if step_count < num_steps:
                        old_position = current_position
                        current_position = current_position + delta[a]
                        data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
                        step_count += 1
        elif ((current_position.tolist() in F.tolist() and action.tolist() in F_actions.tolist())
              or (current_position.tolist() in A.tolist() and action.tolist() in A_actions.tolist())
              or (current_position.tolist() in Z.tolist() and action.tolist() in Z_actions.tolist()) 
              or (current_position.tolist() in H.tolist() and action.tolist() in H_actions.tolist()) 
              or (current_position.tolist() in K.tolist() and action.tolist() in K_actions.tolist()) 
              or (current_position.tolist() in Y.tolist() and action.tolist() in Y_actions.tolist()) 
              or (old_position.tolist() not in obstacle.tolist() and current_position.tolist() in obstacle.tolist() and action.tolist() in B_actions.tolist())
              or (old_position.tolist() in obstacle.tolist() and current_position.tolist() not in obstacle.tolist() and action.tolist() in B_exit_actions.tolist())
              or (current_position.tolist() in invalid.tolist())):
            data[step_count,:,trial_count] = np.concatenate((old_position,current_position))
            current_position = old_position
            index = np.argwhere(action_space==action)
            new_action_space = np.delete(action_space, index)
            action_space = new_action_space
            step_count += 1
        else:
            data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
            action_space = np.array([0, 1, 2, 3, 4, 5, 6, 7])
            step_count += 1
    trial_count += 1

In [None]:
# Save the data
np.save('random_walk_data_3.npy', data)

### Condition 4

In [None]:
Z = np.array([[5,10]])
H = np.array([[5,11]])
K = np.array([[9,11]])
Y = np.array([[9,10]])
V = np.array([[6,9]])
U = np.array([[8,9]])
X = np.array([[6,10]])
W = np.array([[8,10]])

#### V1: does not record attempts to make invalid moves 
-> more similar to mouse data

In [None]:
num_trials = 105 
num_steps = 2400 
data = np.zeros([num_steps,2,num_trials])

trial_count = 0
while trial_count < num_trials:
    current_position = np.array([7,12])
    data[0,:,trial_count] = current_position
    step_count = 1
    while step_count < num_steps:
        old_position = current_position
        action = random.choice(action_space)
        current_position = current_position + delta[action]
        if current_position.tolist() in invalid_all.tolist():
            current_position = old_position
            index = np.argwhere(action_space==action)
            new_action_space = np.delete(action_space, index)
            action_space = new_action_space
        elif ((current_position.tolist() in Z.tolist() and action.tolist() in Z_actions.tolist()) 
              or (current_position.tolist() in H.tolist() and action.tolist() in H_actions.tolist()) 
              or (current_position.tolist() in K.tolist() and action.tolist() in K_actions.tolist()) 
              or (current_position.tolist() in Y.tolist() and action.tolist() in Y_actions.tolist()) 
              or (current_position.tolist() in V.tolist() and action.tolist() in V_actions.tolist()) 
              or (current_position.tolist() in U.tolist() and action.tolist() in U_actions.tolist()) 
              or (current_position.tolist() in X.tolist() and action.tolist() in X_actions.tolist()) 
              or (current_position.tolist() in W.tolist() and action.tolist() in W_actions.tolist())):
            current_position = old_position
            index = np.argwhere(action_space==action)
            new_action_space = np.delete(action_space, index)
            action_space = new_action_space
        else:
            data[step_count,:,trial_count] = current_position
            step_count += 1
            action_space = np.array([0, 1, 2, 3, 4, 5, 6, 7])
    trial_count += 1

#### V2: does record attempts to make invalid moves 
-> more similar to how the RL database originally trains the agent

In [None]:
num_trials = 105 
num_steps = 2400 
data = np.zeros([num_steps,4,num_trials])

trial_count = 0
while trial_count < num_trials:
    current_position = np.array([7,12])
    data[0,:,trial_count] = np.concatenate((current_position,current_position))
    step_count = 1
    while step_count < num_steps:
        old_position = current_position
        action = random.choice(action_space)
        current_position = current_position + delta[action]
        if ((current_position.tolist() in Z.tolist() and action.tolist() in Z_actions.tolist()) 
              or (current_position.tolist() in H.tolist() and action.tolist() in H_actions.tolist()) 
              or (current_position.tolist() in K.tolist() and action.tolist() in K_actions.tolist()) 
              or (current_position.tolist() in Y.tolist() and action.tolist() in Y_actions.tolist()) 
              or (current_position.tolist() in V.tolist() and action.tolist() in V_actions.tolist()) 
              or (current_position.tolist() in U.tolist() and action.tolist() in U_actions.tolist()) 
              or (current_position.tolist() in X.tolist() and action.tolist() in X_actions.tolist()) 
              or (current_position.tolist() in W.tolist() and action.tolist() in W_actions.tolist())
              or (old_position.tolist() not in obstacle.tolist() and current_position.tolist() in obstacle.tolist() and action.tolist() in B_actions.tolist())
              or (old_position.tolist() in obstacle.tolist() and current_position.tolist() not in obstacle.tolist() and action.tolist() in B_exit_actions.tolist())
              or (current_position.tolist() in invalid.tolist())):
            data[step_count,:,trial_count] = np.concatenate((old_position,current_position))
            current_position = old_position
            index = np.argwhere(action_space==action)
            new_action_space = np.delete(action_space, index)
            action_space = new_action_space
            step_count += 1
        else:
            data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
            step_count += 1
            action_space = np.array([0, 1, 2, 3, 4, 5, 6, 7])
    trial_count += 1

In [None]:
# With practice runs
num_trials = 105 
num_steps = 2400 
data = np.zeros([num_steps,4,num_trials])

trial_count = 0
while trial_count < num_trials:
    current_position = np.array([7,12])
    data[0,:,trial_count] = np.concatenate((current_position,current_position))
    step_count = 1
    while step_count < num_steps:
        old_position = current_position
        action = random.choice(action_space)
        current_position = current_position + delta[action]
        if current_position.tolist() == [3,7]:
            data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
            step_count += 1
            action_space = np.array([0, 1, 2, 3, 4, 5, 6, 7])
            if random.random()<0.2:
                for a in [3,6,6,6,6,3]:
                    if step_count < num_steps:
                        old_position = current_position
                        current_position = current_position + delta[a]
                        data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
                        step_count += 1
        elif current_position.tolist() == [11,7]:
            data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
            step_count += 1
            action_space = np.array([0, 1, 2, 3, 4, 5, 6, 7])
            if random.random()<0.2:
                for a in [3,7,7,7,7,3]:
                    if step_count < num_steps:
                        old_position = current_position
                        current_position = current_position + delta[a]
                        data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
                        step_count += 1
        elif ((current_position.tolist() in Z.tolist() and action.tolist() in Z_actions.tolist()) 
              or (current_position.tolist() in H.tolist() and action.tolist() in H_actions.tolist()) 
              or (current_position.tolist() in K.tolist() and action.tolist() in K_actions.tolist()) 
              or (current_position.tolist() in Y.tolist() and action.tolist() in Y_actions.tolist()) 
              or (current_position.tolist() in V.tolist() and action.tolist() in V_actions.tolist()) 
              or (current_position.tolist() in U.tolist() and action.tolist() in U_actions.tolist()) 
              or (current_position.tolist() in X.tolist() and action.tolist() in X_actions.tolist()) 
              or (current_position.tolist() in W.tolist() and action.tolist() in W_actions.tolist())
              or (old_position.tolist() not in obstacle.tolist() and current_position.tolist() in obstacle.tolist() and action.tolist() in B_actions.tolist())
              or (old_position.tolist() in obstacle.tolist() and current_position.tolist() not in obstacle.tolist() and action.tolist() in B_exit_actions.tolist())
              or (current_position.tolist() in invalid.tolist())):
            data[step_count,:,trial_count] = np.concatenate((old_position,current_position))
            current_position = old_position
            index = np.argwhere(action_space==action)
            new_action_space = np.delete(action_space, index)
            action_space = new_action_space
            step_count += 1
        else:
            data[step_count,:,trial_count] = np.concatenate((current_position,current_position))
            step_count += 1
            action_space = np.array([0, 1, 2, 3, 4, 5, 6, 7])
    trial_count += 1

In [None]:
# Save the data
np.save('random_walk_data_4.npy', data)

## Plot a trial

In [None]:
data = np.load('random_walk_data_3.npy')
trial1_x = data[:,0,2]
trial1_y = data[:,1,2]
plt.plot(trial1_x, trial1_y)

# Uncomment to plot dots corresponding to the position of the condition 3 tripwires
# plt.scatter([4,9,5,10,5,6,8,9],[10,10,10,10,9,9,9,9],c='black')
# plt.xlim([0,14])
# plt.ylim([0,14])

## Check for invalid points

In [None]:
#Load up the data if you've already saved something
data = np.load('random_walk_data_3.npy')

In [None]:
for i in range(num_trials):
    for point in invalid:
        indexes = np.where(np.all(point == data[:,0:2,i], axis=1))
        if len(indexes[0])>0:
            print(indexes[0])

#If nothing prints, all is well!