In [None]:
#!pip install stable_baselines3
#!pip install tensorflow
#!pip install stable-baselines
#!pip install gym


In [10]:
import pandas as pd
import numpy as np
from gym import Env
from gym.spaces import Discrete, Box


In [11]:
class TradingEnv(Env):
    def __init__(self, df):
        self.df = df
        self.balance = 10000
        self.net_worth = []

        # Dimensions des espaces d'observations et d'actions
        self.observation_space = Box(low=0, high=np.inf, shape=(6,))
        self.action_space = Discrete(3)

        # Episode
        self._start_tick = 0
        self._end_tick = len(df) - 1
        self._done = False
        self._current_tick = 0

        self.reset()

    def reset(self):
        # Réinitialiser l'environnement
        self._current_tick = self._start_tick
        self._done = False
        self.balance = 10000
        self.net_worth = [self.balance]
        self.position = 0

        return self._get_observation()

    def step(self, action):
       # Actions: 0=Acheter, 1=Vendre, 2=Rien faire

       current_price = self._get_current_close()

       if action == 0:
           # Acheter
           qty = 1000 // current_price
           self.balance -= qty * current_price
           self.position += qty
       elif action == 1:
           # Vendre
           qty = min(abs(self.position), 1000 // current_price)
           self.balance += qty * current_price
           self.position -= qty

       # Calculer reward
       self.net_worth.append(self.balance + self.position * current_price)
       reward = self.net_worth[-1] - self.net_worth[-2]

       # MAJ état
       self._current_tick += 1
       self._done = self._current_tick == self._end_tick

       return self._get_observation(), reward, self._done, {}

    def _get_observation(self):
        obs = np.array([
            self.balance,
            self.position,
            self._get_current_open(),
            self._get_current_high(),
            self._get_current_low(),
            self._get_current_close()
        ])

        return obs

    def _get_current_close(self):
        return self.df.loc[self._current_tick, 'Close']

    def _get_current_open(self):
        return self.df.loc[self._current_tick, 'Open']

    def _get_current_high(self):
        return self.df.loc[self._current_tick, 'High']

    def _get_current_low(self):
        return self.df.loc[self._current_tick, 'Low']




In [12]:
class TradingQAgent:
    def __init__(self, env):
        self.env = env
        self.lr = 0.1
        self.gamma = 0.95
        self.eps = 1.0
        self.decay = 0.99995
        self.q_table = pd.DataFrame(columns=list(range(env.action_space.n)),
                                    dtype=np.float64)

    def train(self, episodes):
        for e in range(episodes):
            state = self.env.reset()
            done = False
            score = 0

            while not done:
                action = self.get_action(state)
                next_state, reward, done, _ = self.env.step(action)

                self.update_qtable(state, action, reward, next_state)

                state = next_state
                score += reward

            self.eps = max(0.01, self.eps*self.decay)
            print("Episode {} Score {}".format(e,score))


    def get_action(self, state):
        if np.random.random() < self.eps:
            return self.env.action_space.sample()
        else:
            return np.argmax(self.q_table.loc[tuple(state)])


    def update_qtable(self, state, action, reward, next_state):
        q_1 = self.q_table.loc[tuple(state)][action]
        q_2 = reward + self.gamma*max(self.q_table.loc[tuple(next_state)])
        self.q_table.loc[tuple(state)][action] += self.lr*(q_2 - q_1)

    def test(self, episodes):
        for e in range(episodes):
            done = False
            score = 0
            state = self.env.reset()

            while not done:
                action = np.argmax(self.q_table.loc[tuple(state)])
                state, reward, done, _ = self.env.step(action)

                score += reward

            print("Test {} Score {}".format(e, score))


In [13]:
# Charger les données
data = pd.read_csv('cleaned_data/Training.csv')
data.drop(columns={'Unnamed: 0'}, inplace=True)
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2001-01-02,1320.280029,1320.280029,1276.050049,1283.27002,1129400000
1,2001-01-03,1283.27002,1347.76001,1274.619995,1347.560059,1880700000
2,2001-01-04,1347.560059,1350.23999,1329.140015,1333.339966,2131000000
3,2001-01-05,1333.339966,1334.77002,1294.949951,1298.349976,1430800000
4,2001-01-08,1298.349976,1298.349976,1276.290039,1295.859985,1115500000


In [14]:
import pandas as pd
from env import TradingEnv
from agent import TradingQAgent
agent = TradingQAgent(env)
agent.train(100)
agent.test(10)

ImportError: cannot import name 'env' from 'env' (c:\Users\ANSD\OneDrive - Azubi Africa\master\DIT\M2\REINFORCEMENT_LEARNING\Projet_Reinforcement_Learning_Gestion_De_Stock\env.py)