In [1]:
import gym
from gym import spaces
import numpy as np


In [2]:
class OrderImbalanceEnv(gym.Env):
    def __init__(self, data, lookback, fee):
        self.data = data
        self.lookback = lookback
        self.fee = fee
        self.action_space = spaces.Discrete(3) # 0 = sell, 1 = hold, 2 = buy
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(lookback, 3), dtype=np.float32)
        self.reset()
        
        self.reward = 0.0

    def step(self, action):
        if action == 0:
            self.balance *= 1 + reward
            self.shares -= self.shares * 2
        elif action == 2:
            self.balance *= 1 + reward
            self.shares += self.balance * 0.5 / self.data[self.current_step]
        
        self.get_rewards(action)
            
        self.current_step += 1
        done = self.current_step >= len(self.data) - 1
        obs = self._next_observation()
        return obs, reward, done, {}

    def reset(self):
        self.current_step = 0
        self.balance = 1.0
        self.shares = 0.0
        return self._next_observation()

    def _next_observation(self):
        obs = np.array([
            self.data[self.current_step - self.lookback:self.current_step, 0],
            self.data[self.current_step - self.lookback:self.current_step, 1],
            self.data[self.current_step - self.lookback:self.current_step, 2]
        ]).T
        return obs
    
    def get_rewards(self, action):
        if action == 0 or action == 2:
            self.reward = (self.data[self.current_step+1] - self.data[self.current_step]) / self.data[self.current_step] - self.fee
        else:
            reward = (self.data[self.current_step+1] - self.data[self.current_step]) / self.data[self.current_step]

In [3]:


# generate sample data
data = np.random.normal(100, 10, size=(1000, 3))

# create environment
env = OrderImbalanceEnv(data, lookback=10, fee=0.01)

# reset environment and get initial observation
obs = env.reset()

# take random actions for 100 time steps
for i in range(100):
    action = env.action_space.sample()
    obs, reward, done, info = env.step(action)
    print(f"Step {i+1}: Action={action}, Reward={reward:.4f}, Balance={env.balance:.4f}, Shares={env.shares:.4f}")

    if done:
        print("Episode finished after {} timesteps".format(i+1))
        break

NameError: name 'reward' is not defined