In [28]:
import csv
import gym
import numpy as np
from gym import spaces

In [None]:
tickers = ["AAPL", "AMZN", "GOOGL", "MSFT", "NVDA", "TSLA"]

data = {i: {ticker: float(row[ticker]) for ticker in tickers} 
                  for i, row in enumerate(csv.DictReader(open("nasdaq_stock_prices.csv", mode='r'), delimiter=','))}

In [None]:
class Environment(gym.Env):
    def __init__(self, data, tickers, initial_balance=10000):
        self.data = data
        self.max_steps = len(data) - 1
        self.tickers = tickers
        self.initial_balance = initial_balance

        self.current_step = 0
        self.current_balance = initial_balance
        self.current_shares = {ticker: 0 for ticker in tickers}
        self.current_profit = - self.initial_balance \
                              + self.current_balance \
                              + sum([self.current_shares[ticker] * self.data[self.current_step][ticker] for ticker in tickers])


        self.action_space = spaces.Box(low=-1, high=1, shape=(len(tickers),), dtype=np.float16)
        self.observation_space = spaces.Box(low=0, high=1, shape=(len(tickers) * 2,), dtype=np.float16)
        self.data = self._get_observation()

        self.done = False


    def reset(self):
        """
        Reset the environment for the beginning of a new episode
        """
        self.current_step = 0
        self.cash = self.initial_cash
        self.shares_held = 0
        self.total_profit = 0
        self.done = False

        return self._get_state()
    
    def _get_observation(self):
        obs = np.array([self.stock_dict[self.current_step][ticker] for ticker in self.tickers] + list(self.shares.values()))
        return obs
    
    def _take_action(self, action):
        for i, ticker in enumerate(self.tickers):
            if action[i] > 0:
                self.balance -= self.stock_dict[self.current_step][ticker]
                self.shares[ticker] += 1
            elif action[i] < 0:
                self.balance += self.stock_dict[self.current_step][ticker]
                self.shares[ticker] -= 1    
        self.balance += sum([self.shares[ticker] * self.stock_dict[self.current_step][ticker] for ticker in self.tickers])

    def step(self, action):
        self._take_action(action)
        self.current_step += 1
        if self.current_step > self.max_steps:
            self.done = True
        else:
            self.data = self._get_observation()
        return self.data, self.balance, self.done, {}

In [None]:
import gym
from gym import spaces
import numpy as np
import pandas as pd

class TradingEnvironment(gym.Env):
    def __init__(self, csv_file, ticker="AAPL", initial_cash=10000):
        super(TradingEnvironment, self).__init__()

        # Charger les données depuis le CSV
        self.data = stock_dict
        self.prices = self.data[ticker].values
        self.dates = self.data["Date"].values
        self.initial_cash = initial_cash

        # Définir l'espace d'actions :
        # 0 : Hold, 1 : Acheter 1 action, 2 : Vendre 1 action
        self.action_space = spaces.Discrete(3)

        # Observations : [prix_actuel, nombre_actions_détenues, cash_disponible]
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0]),
            high=np.array([np.inf, np.inf, np.inf]),
            dtype=np.float32
        )

        self.reset()

    def reset(self):
        """Réinitialiser l'environnement à son état initial."""
        self.current_step = 0
        self.cash = self.initial_cash
        self.stock_held = 0
        return self._get_observation()

    def step(self, action):
        """Exécuter une action."""
        done = False
        current_price = self.prices[self.current_step]

        # Exécuter l'action
        if action == 1:  # Acheter
            if self.cash >= current_price:
                self.stock_held += 1
                self.cash -= current_price

        elif action == 2:  # Vendre
            if self.stock_held > 0:
                self.stock_held -= 1
                self.cash += current_price

        # Calculer la récompense (variation de la valeur du portefeuille)
        next_step = self.current_step + 1
        if next_step < len(self.prices):
            next_price = self.prices[next_step]
            portfolio_value = self.stock_held * next_price + self.cash
            previous_value = self.stock_held * current_price + self.cash
            reward = portfolio_value - previous_value
        else:
            reward = 0
            done = True

        self.current_step += 1

        # Vérifier si la fin des données est atteinte
        if self.current_step >= len(self.prices) - 1:
            done = True

        obs = self._get_observation()
        info = {}

        return obs, reward, done, info

    def _get_observation(self):
        """Construire l'observation courante."""
        return np.array([
            self.prices[self.current_step],
            self.stock_held,
            self.cash
        ], dtype=np.float32)

    def render(self, mode="human"):
        """Afficher l'état courant pour debug."""
        print(f"Date: {self.dates[self.current_step]} | Prix: {self.prices[self.current_step]:.2f} | Actions: {self.stock_held} | Cash: {self.cash:.2f}")