In [67]:
import gym
from gym import spaces
import numpy as np
import random as rnd
import matplotlib.pyplot as plt

In [46]:
class OrderImbalanceEnv(gym.Env):
    def __init__(self, data, lookback, fee):
        self.data = data
        self.lookback = lookback
        self.fee = fee
        self.action_space = spaces.Discrete(3) # 0 = sell, 1 = hold, 2 = buy
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(lookback, 3), dtype=np.float32)
        self.reset()
        
        self.temp_data = []

    def step(self, action):
        if action == 0:
            self.balance *= 1 + reward
            self.shares -= self.shares * 2
        elif action == 2:
            self.balance *= 1 + reward
            self.shares += self.balance * 0.5 / self.data[self.current_step]
        
        self.get_rewards(action)
        print(f"action : {action} - balance : {self.balance} - shares : {self.shares}")
        
        self.current_step += 1
        done = self.current_step >= len(self.data) - 1
        obs = self._next_observation()
        print(f"observation : {obs}")
        return obs, self.reward, done, {}

    def reset(self):
        self.current_step = 0
        self.balance = 1.0
        self.shares = 0.0
        
        self.reward = 0.0
        return self._next_observation()

    def _next_observation(self):
        obs = np.array([
            self.data[self.current_step - self.lookback:self.current_step, 0],
            self.data[self.current_step - self.lookback:self.current_step, 1],
            self.data[self.current_step - self.lookback:self.current_step, 2]
        ]).T
        return obs
    
    def get_rewards(self, action):
        # récompense lorsque l'agent achète ou vend
        if action == 0 or action == 2:
            self.reward = (self.data[self.current_step+1] - self.data[self.current_step]) / self.data[self.current_step] - self.fee
        else:
            reward = (self.data[self.current_step+1] - self.data[self.current_step]) / self.data[self.current_step]
            

In [143]:
class EnvTest(gym.Env):
    def __init__(self, balance=100, spread=0.001, max_variation=0.05, simulation_duration="10s"):
        self.spread = spread # valeur du spread
        self.max_variation = max_variation # variation maximale de la courbe en pourcentage
        self.simulation_duration = simulation_duration # durée de la simulation demandées
        self.action_space = spaces.Discrete(3) # 0 = sell, 1 = hold, 2 = buy
        self.observation_space = spaces.Box(low=0.0,
                                           high=2.0,
                                           shape=(1,),
                                           dtype=np.float32)
        
        self.balance = balance # valeur du porte feuille
        self.curve_values = [] # historique des données
        self.current_curve_value = self.initializeCurve() # valeur actuelle de la courbe
        
        
        self.timestep = 1.0 # simulation sur un pas de 1 seconde
        self.current_time = 0.0 # temps actuel de la simulation
        self.max_time = self.timeConverter(self.simulation_duration) # temps maximum d'un épisode
        
        self.reward = 0.0
        self.done = False
        
        self.last_state = np.zeros(self.observation_space.shape[0])
        self.state = self.last_state
        
    def step(self, action):
        """effectue un 'step' dans la simulation"""
        
        if self.current_time >= self.max_time:
            self.done = True
        self.current_time += self.timestep
        
        next_state = np.zeros(3)
        info = {}
        return next_state, self.reward, self.done, info
    
    def reset(self):
        self.balance = balance
        self.curve_values = [] 
        self.current_curve_value = self.initializeCurve()
        
        self.current_time = 0.0
        self.reward = 0.0
        self.done = False
        
        self.last_state = np.zeros(1) 
        self.state = self.last_state
        
    def render(self):
        pass
        
    def initializeCurve(self):
        """Initialise une courbe qui entre les valeurs min et max"""
        minimum_value = 1.055
        maximum_value = 1.06
        value = rnd.uniform(minimum_value, maximum_value)
        self.curve_values.append(value)
        return value
    
    def generateCurveData(self):
        """génère la prochaine valeur de la courbe en fonction du taux de variation
        sauvegarde la valeur dans l'historique"""
        variation = rnd.uniform(-self.max_variation, self.max_variation)
        self.current_curve_value += variation
        saveCurveData()
    
    def saveCurveData(self):
        """Sauvegarde les données de la courbe dans une liste"""
        self.curve_values.append(self.current_curve_value)
        
    def timeConverter(self, time):
        """Convertie la durée entrée manuellement
        Renvois la durée maximale en seconde (ou en fonction du time step)"""
        number = ""
        value = ""
        for string in time:
            if string.isdigit():
                number += string
            else:
                value += string

        if not self.translate(value):
            print("Durée de simulation incorrect...")
            return None
        time_converted = int(number) * self.translate(value)
        return time_converted
    
    def translate(self, value):
        """fonction qui permet de traduire et la renvoie en seconde"""
        
        ## Constantes de temps en secondes
        SECONDE = 1.0
        MINUTE = 60.0
        HOUR = 60.0 * MINUTE
        DAY = 24.0 * HOUR
        WEEK = 7.0 * DAY
        MONTH = 30.0 * DAY
        YEAR = 365.0 * DAY
        
        d = {"seconde" : ["s", "seconde", "secondes", "sec", "secs"],
            "minute" : ["m", "min", "minute", "minutes", "mins", "minutes"],
            "hour" : ["h", "heure", "hour", "hours", "heures"],
            "day" : ["d", "j", "jour", "day", "jours"],
            "week" : ["w", "week", "semaine", "weeks", "semaines"],
            "year" : ["y", "year", "année", "annee", "an", "années", "years"]
            }

        for key in d.keys():
            if value.lower() in key:
                if key == "seconde":
                    return MINUTE
                elif key == "minute":
                    return MINUTE
                elif key == "hour":
                    return HOUR
                elif key == "day":
                    return DAY
                elif key == "week":
                    return WEEK
                elif key == "year":
                    return YEAR
                
        return False

In [144]:
env = EnvTest()
print(f"obs space : {env.observation_space.shape}")
num_action = env.observation_space.shape[0]
for i in range(15):
    obs, r, d, i = env.step(15)
    print(f"obs : {obs} - reward : {r} - done : {d}")

AttributeError: 'EnvTest' object has no attribute 'observation_spaces'

In [48]:
env.step(1)

temp data : [0. 0. 0.]
temp data : [4770. 6292. 2823.]
action : 1 - balance : 1.0 - shares : 0.0
observation : []


(array([], shape=(0, 3), dtype=float64), 0.0, False, {})

In [None]:
class Agent():
    def __init__(self):
        pass
    
    def act(self):
        pass

In [3]:


# generate sample data
data = np.random.normal(100, 10, size=(1000, 3))

# create environment
env = OrderImbalanceEnv(data, lookback=10, fee=0.01)

# reset environment and get initial observation
obs = env.reset()

# take random actions for 100 time steps
for i in range(100):
    action = env.action_space.sample()
    obs, reward, done, info = env.step(action)
    print(f"Step {i+1}: Action={action}, Reward={reward:.4f}, Balance={env.balance:.4f}, Shares={env.shares:.4f}")

    if done:
        print("Episode finished after {} timesteps".format(i+1))
        break

NameError: name 'reward' is not defined