In [1]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [4]:
# Fonction pour télécharger les données via yfinance
def get_commodities_data(tickers, start="2010-01-01", end="2023-01-01", interval="1d"):
    data = {}
    for ticker in tickers:
        df = yf.download(ticker, start=start, end=end, interval=interval)
        data[ticker] = df
    return data

In [6]:
commodities = ['GC=F', 'SI=F', 'CL=F']

commodities_data = get_commodities_data(commodities)

# Définition des périodes
training_data_time_range = ('2010-01-01', '2017-12-31')
validation_data_time_range = ('2018-01-01', '2019-12-31')
test_data_time_range = ('2020-01-01', '2023-01-01')

# Création des jeux de données
training_data = {}
validation_data = {}
test_data = {}
for ticker, df in commodities_data.items():
    training_data[ticker] = df.loc[training_data_time_range[0]:training_data_time_range[1]].copy()
    validation_data[ticker] = df.loc[validation_data_time_range[0]:validation_data_time_range[1]].copy()
    test_data[ticker] = df.loc[test_data_time_range[0]:test_data_time_range[1]].copy()

print("Shapes pour GC=F (Training, Validation, Test) :",
training_data['GC=F'].shape, validation_data['GC=F'].shape, test_data['GC=F'].shape)


YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Shapes pour GC=F (Training, Validation, Test) : (2011, 5) (502, 5) (756, 5)





In [7]:
# Fonction d'ajout des indicateurs techniques
def add_technical_indicators(df):
    delta = df['Close'].diff()
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)
    avg_gain = gain.rolling(window=14).mean()
    avg_loss = loss.rolling(window=14).mean()
    rs = avg_gain / avg_loss
    df['RSI'] = 100 - (100 / (1 + rs))
    df['EMA12'] = df['Close'].ewm(span=12, adjust=False).mean()
    df['EMA26'] = df['Close'].ewm(span=26, adjust=False).mean()
    df['MACD'] = df['EMA12'] - df['EMA26']
    df['Signal'] = df['MACD'].ewm(span=9, adjust=False).mean()
    tp = (df['High'] + df['Low'] + df['Close']) / 3
    sma_tp = tp.rolling(window=20).mean()
    mean_dev = tp.rolling(window=20).apply(lambda x: np.mean(np.abs(x - x.mean())))
    df['CCI'] = (tp - sma_tp) / (0.015 * mean_dev)

    high_diff = df['High'].diff()
    low_diff = df['Low'].diff()
    df['+DM'] = np.where((high_diff > low_diff) & (high_diff > 0), high_diff, 0)
    df['-DM'] = np.where((low_diff > high_diff) & (low_diff > 0), low_diff, 0)
    tr = pd.concat([df['High'] - df['Low'],
    np.abs(df['High'] - df['Close'].shift(1)),
    np.abs(df['Low'] - df['Close'].shift(1))], axis=1).max(axis=1)
    atr = tr.ewm(span=14, adjust=False).mean()
    df['+DI'] = 100 * (df['+DM'].ewm(span=14, adjust=False).mean() / atr)
    df['-DI'] = 100 * (df['-DM'].ewm(span=14, adjust=False).mean() / atr)
    dx = 100 * np.abs(df['+DI'] - df['-DI']) / (df['+DI'] + df['-DI'])
    df['ADX'] = dx.ewm(span=14, adjust=False).mean()
    df.dropna(inplace=True)
    df = df[['Open', 'High', 'Low', 'Close', 'Volume', 'MACD', 'Signal', 'RSI', 'CCI', 'ADX']]
    return df

In [8]:
# Application des indicateurs aux 3 jeux de données
for ticker in training_data:
    training_data[ticker] = add_technical_indicators(training_data[ticker])
for ticker in validation_data:
    validation_data[ticker] = add_technical_indicators(validation_data[ticker])
for ticker in test_data:
    test_data[ticker] = add_technical_indicators(test_data[ticker])

print(training_data['GC=F'].head())

Price              Open         High          Low        Close Volume  \
Ticker             GC=F         GC=F         GC=F         GC=F   GC=F   
Date                                                                    
2010-02-01  1081.000000  1107.300049  1076.800049  1104.300049   2452   
2010-02-02  1117.400024  1118.500000  1100.199951  1117.400024   3326   
2010-02-03  1115.800049  1124.900024  1108.599976  1111.400024    853   
2010-02-04  1110.000000  1110.699951  1059.000000  1062.400024   1426   
2010-02-05  1052.199951  1068.500000  1045.199951  1052.199951   1956   

Price            MACD    Signal        RSI         CCI        ADX  
Ticker                                                             
Date                                                               
2010-02-01  -8.662289 -5.106665  34.836633  -65.366222  28.495614  
2010-02-02  -6.795084 -5.444349  46.015247  -10.964782  26.065814  
2010-02-03  -5.733371 -5.502153  41.246502   -0.115025  25.602020  
2010-02

In [20]:
# Définition de l'environnement de trading avec espace d'action discret
import gymnasium as gym
from gymnasium import spaces

class CommodityTradingEnv(gym.Env):
    metadata = {'render_modes': ['human']}
    def __init__(self, commodity_data, transaction_cost_percent=0.005, initial_balance=10000):
        super(CommodityTradingEnv, self).__init__()
        # On garde uniquement les DataFrames non vides
        self.commodity_data = {ticker: df for ticker, df in commodity_data.items() if not df.empty}
        self.tickers = list(self.commodity_data.keys())
        if not self.tickers:
            raise ValueError("Aucune donnée disponible pour les commodities.")
        sample_df = next(iter(self.commodity_data.values()))
        self.n_features = len(sample_df.columns)
        # Définition d'un espace d'actions discret :
        # Pour chaque commodity, l'action possible est dans la liste suivante :
        self.action_list = [-1, -0.75, -0.50, -0.25, 0, 0.25, 0.50, 0.75, 1]
        self.num_actions_per_commodity = len(self.action_list)
        # L'action globale est une combinaison (codée en base-9)
        self.action_space = spaces.Discrete(self.num_actions_per_commodity ** len(self.tickers))
        # L'espace d'observation reste inchangé
        self.obs_shape = self.n_features * len(self.tickers) + 2 + len(self.tickers) + 2
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.obs_shape,), dtype=np.float32)
        # Initialisation du portefeuille
        self.initial_balance = initial_balance
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.max_net_worth = self.initial_balance
        self.shares_held = {ticker: 0 for ticker in self.tickers}
        self.total_shares_sold = {ticker: 0 for ticker in self.tickers}
        self.total_sales_value = {ticker: 0 for ticker in self.tickers}
        self.current_step = 0
        self.max_steps = max(0, min(len(df) for df in self.commodity_data.values()) - 1)
        self.transaction_cost_percent = transaction_cost_percent

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.max_net_worth = self.initial_balance
        self.shares_held = {ticker: 0 for ticker in self.tickers}
        self.total_shares_sold = {ticker: 0 for ticker in self.tickers}
        self.total_sales_value = {ticker: 0 for ticker in self.tickers}
        self.current_step = 0
        return self._next_observation(), {}

    def _next_observation(self):
        frame = np.zeros(self.obs_shape)
        idx = 0
        for ticker in self.tickers:
            df = self.commodity_data[ticker]
            if self.current_step < len(df):
                frame[idx:idx+self.n_features] = df.iloc[self.current_step].values
            else:
                frame[idx:idx+self.n_features] = df.iloc[-1].values
        idx += self.n_features
        # Ajout des informations supplémentaires
        frame[-4-len(self.tickers)] = self.balance
        frame[-3-len(self.tickers):-3] = [self.shares_held[ticker] for ticker in self.tickers]
        frame[-3] = self.net_worth
        frame[-2] = self.max_net_worth
        frame[-1] = self.current_step
        return frame

    def decode_action(self, action):
        """Décode l'action discrète en un vecteur d'actions pour chaque commodity."""
        decoded_indices = []
        temp = action
        n = len(self.tickers)
        for _ in range(n):
            decoded_indices.append(temp % self.num_actions_per_commodity)
            temp //= self.num_actions_per_commodity
        decoded_indices.reverse()
        decoded_actions = [self.action_list[idx] for idx in decoded_indices]
        return decoded_actions

    def step(self, action):
        # Décodage de l'action discrète
        actions = self.decode_action(action)
        self.current_step += 1
        if self.current_step > self.max_steps:
            return self._next_observation(), 0, True, False, {}

        current_prices = {}
        for i, ticker in enumerate(self.tickers):
            # Correction FutureWarning : accès direct à la colonne 'Close' pour obtenir une valeur scalaire
            current_prices[ticker] = float(self.commodity_data[ticker]['Close'].iloc[self.current_step])
            act = actions[i]

        if act > 0: # Achat
            shares_to_buy = int(self.balance * act / current_prices[ticker])
            cost = shares_to_buy * current_prices[ticker]
            transaction_cost = cost * self.transaction_cost_percent
            self.balance -= (cost + transaction_cost)
            self.shares_held[ticker] += shares_to_buy
        elif act < 0: # Vente
            shares_to_sell = int(self.shares_held[ticker] * abs(act))
            sale = shares_to_sell * current_prices[ticker]
            transaction_cost = sale * self.transaction_cost_percent
            self.balance += (sale - transaction_cost)
            self.shares_held[ticker] -= shares_to_sell
            self.total_shares_sold[ticker] += shares_to_sell
            self.total_sales_value[ticker] += sale

        self.net_worth = self.balance + sum(self.shares_held[ticker] * current_prices[ticker] for ticker in self.tickers)
        self.max_net_worth = max(self.net_worth, self.max_net_worth)
        reward = self.net_worth - self.initial_balance
        done = self.net_worth <= 0 or self.current_step >= self.max_steps

        return self._next_observation(), reward, done, False, {}

    def render(self, mode='human'):
        profit = self.net_worth - self.initial_balance
        print(f"Step: {self.current_step}")
        print(f"Balance: {self.balance:.2f}")
        for ticker in self.tickers:
            print(f"{ticker} Shares held: {self.shares_held[ticker]}")
            print(f"Net worth: {self.net_worth:.2f} | Profit: {profit:.2f}")

    def close(self):
        pass


In [21]:
# Entraînement avec Double DQN (via DQN de Stable Baselines3) en utilisant cuda
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv

# Création des environnements d'entraînement, de validation et de test
env_train = DummyVecEnv([lambda: CommodityTradingEnv(training_data, transaction_cost_percent=0.005)])
env_val = DummyVecEnv([lambda: CommodityTradingEnv(validation_data, transaction_cost_percent=0.005)])
env_test = DummyVecEnv([lambda: CommodityTradingEnv(test_data, transaction_cost_percent=0.005)])

# Création du modèle DQN avec GPU (cuda)
model = DQN("MlpPolicy", env_train, verbose=1, tensorboard_log="./dqn_tensorboard/", device="cuda")

# Boucle d'entraînement avec sauvegarde du meilleur modèle (selon la récompense cumulée sur validation)
best_val_reward = -np.inf
best_model_path = "best_dqn_model.zip"
num_epochs = 200
timesteps_per_epoch = 1000

for epoch in range(num_epochs):
    print(f"\n=== Epoch {epoch+1}/{num_epochs} ===")
    model.learn(total_timesteps=timesteps_per_epoch, reset_num_timesteps=False)
    # Évaluation sur l'environnement de validation (un épisode)
    val_obs = env_val.reset() # Modification : on récupère directement l'observation
    done = False
    cumulative_reward = 0
    while not done:
        action, _ = model.predict(val_obs)
        val_obs, reward, done, info = env_val.step(action)
        cumulative_reward += reward[0]
        print(f"Récompense cumulée sur validation : {cumulative_reward}")
        if cumulative_reward > best_val_reward:
            best_val_reward = cumulative_reward
            model.save(best_model_path)
            print(f"Nouveau meilleur modèle sauvegardé (reward = {best_val_reward})")


Using cpu device

=== Epoch 1/200 ===


ImportError: Trying to log data to tensorboard but tensorboard is not installed.

In [None]:
# Test du meilleur modèle sur l'environnement de test
best_model = DQN.load(best_model_path, env=env_test)
obs = env_test.reset() # Modification : récupération directe de l'observation
done = False
test_cumulative_reward = 0
while not done:
    action, _ = best_model.predict(obs)
    obs, reward, done, info = env_test.step(action)
    test_cumulative_reward += reward[0]
    print("\nRécompense cumulée sur test :", test_cumulative_reward)


In [None]:
# (Optionnel) Visualisation de l'évolution du Net Worth sur le set de test
def test_agent(env, model, n_tests=1000, visualize=False):
    metrics = {
    'steps': [],
    'balances': [],
    'net_worths': [],
    'shares_held': {ticker: [] for ticker in env.envs[0].commodity_data.keys()}
    }
    obs = env.reset() # Modification ici également
    for i in range(n_tests):
        metrics['steps'].append(i)
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        if visualize:
            env.envs[0].render()
            metrics['balances'].append(env.envs[0].balance)
            metrics['net_worths'].append(env.envs[0].net_worth)
        for ticker in env.envs[0].commodity_data.keys():
            metrics['shares_held'][ticker].append(env.envs[0].shares_held[ticker])
        if done:
            obs = env.reset() # Modification ici
    return metrics

In [None]:
test_metrics = test_agent(env_test, best_model, n_tests=1000, visualize=False)

plt.figure(figsize=(12,6))
plt.plot(test_metrics['steps'], test_metrics['net_worths'], label="Net Worth")
plt.title("Évolution du Net Worth sur le set de Test")
plt.xlabel("Étapes")
plt.ylabel("Net Worth")
plt.legend()
plt.show()