In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.stats.diagnostic import acorr_ljungbox
import yfinance
import warnings
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.tree import export_graphviz
import vectorbt as vbt
import anywidget
warnings.filterwarnings('ignore')

data = pd.read_csv('data\df.csv',sep='|')
data['date'] = pd.to_datetime(data['date'])

DXY = yfinance.download('DX-Y.NYB', start='1999-01-04', end='2025-09-19')
DXY = DXY.reset_index()[['Date','Close']]
DXY.columns = ['date','DXY']
data=data.merge(DXY, on='date', how='left')

GDPM = pd.read_csv('GDP Monthly.csv',sep=',')
GDPM['date'] = pd.to_datetime(GDPM['timestamp'])
GDPM['year'] = GDPM['date'].dt.year
GDPM['month'] = GDPM['date'].dt.month
GDPM['CroissM'] = GDPM['EIA/GDPQXUS/USA'].diff()

data['year'] = data['date'].dt.year
data['month'] = data['date'].dt.month

data = data.merge(GDPM, on=['year', 'month'], how='left')
data.drop(['timestamp','date_y','year','month'], axis=1, inplace=True)
data['DXY'].fillna(method='ffill', inplace=True)
print(data.columns)
data.set_index('date_x', inplace=True)

  data = pd.read_csv('data\df.csv',sep='|')
[*********************100%***********************]  1 of 1 completed

Index(['date_x', 'close', 'volume', 'high', 'low', 'pe',
       'num_daily_adv_minus_decl', 'mov_avg_20d', 'best_eps', 'dvd_sh_last',
       'rsi_3d', 'rsi_9d', 'rsi_14d', 'rsi_30d', 'mov_avg_10d', 'mov_avg_30d',
       'mov_avg_50d', 'pb', 'pib_pct', 'pib', 'vix', 'i', 'i_future',
       'inflation', 'gold', 'gold_pct', 'gold_volume', 'brent', 'brent_pct',
       'brent_volume', 'cible', 'returns', 'vol', 'score', 'momentum_10d',
       'macd', 'macd_signal', 'bb_upper', 'bb_lower', 'mov_avg_100d',
       'mov_avg_200d', 'mov_avg_10_50_diff', 'mov_avg_20_50_diff',
       'close_minus_10d', 'close_minus_20d', 'close_minus_30d',
       'close_minus_50d', 'close_minus_100d', 'close_minus_200d', 'DXY',
       'EIA/GDPQXUS/USA', 'CroissM'],
      dtype='object')





In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt

window_years = 3  # nombre d'années de données pour entraîner (ex : 3 ans)
trading_days_per_year = 250
window_size = window_years * trading_days_per_year  # ~750 jours
start_date = '2023-01-15'
end_date = '2025-09-18'
df = data.copy()

features = df.drop(columns=['cible'])
target = df['cible']

# === Initialisation des listes === #
predictions = []
true_values = []
prediction_dates = []
signals = []          # signal d'achat (True) ou non (False)
portfolio_returns = []  # rendement journalier du portefeuille

for current_date in pd.date_range(start=start_date, end=end_date, freq='B'):  # freq='B' = jours ouvrés
    if current_date not in df.index:
        continue

    end_train_idx = df.index.get_loc(current_date)
    start_train_idx = end_train_idx - window_size

    if start_train_idx < 0:
        continue  # pas assez d'historique

    # Définir les fenêtres d'entraînement et de test
    X_train = features.iloc[start_train_idx:end_train_idx]
    y_train = target.iloc[start_train_idx:end_train_idx]

    X_test = features.loc[[current_date]]
    y_test = target.loc[current_date]

    # Entraîner le modèle
    model = RandomForestRegressor(n_estimators=250, random_state=42, n_jobs=-1)
    model.fit(X_train, y_train)

    # Prédire
    y_pred = model.predict(X_test)[0]

    # Stocker
    predictions.append(y_pred)
    true_values.append(y_test)
    prediction_dates.append(current_date)

    # Calculer le signal : acheter si la prediction est plus haute que le dernier prix connu
    last_price = y_train.iloc[-1]
    signal = y_pred > last_price
    signals.append(signal)

    print(f"Date {current_date.date()} : entraînement modèle, prediction et backtesting...")

# Pour aligner les dates des rendements (décalage d’un jour par rapport aux signaux)
returns_dates = prediction_dates[1:]

# === Résultats === #
results = pd.DataFrame({
    'cible': true_values,
    'prediction': predictions,
    'Signal achat': signals
}, index=prediction_dates)

results.to_csv('rf.csv', sep= '|')

previous_price = df['cible'].reindex(results.index)
long_entries = results['prediction'] > previous_price
long_exits = results['prediction'] < previous_price

short_entries = results['prediction'] < previous_price
short_exits = results['prediction'] > previous_price
price = df['cible'].reindex(results.index)

portfolio = vbt.Portfolio.from_signals(
    close=price,
    entries=long_entries,
    exits = long_exits,
    short_entries = short_entries,
    short_exits = short_exits
)

portfolio.plot().show()
print(portfolio.stats())

# === Évaluation === #
mae = mean_absolute_error(results['cible'], results['prediction'])
print(f"\n✅ MAE global ({start_date} ➜ {end_date}): {mae:.2f}\n")

print(results.head())

# # === Performance portefeuille === #
# portfolio_returns = np.array(portfolio_returns)
# cumulative_returns = np.cumprod(1 + portfolio_returns) - 1

# print(f"Retour cumulé du portefeuille: {cumulative_returns[-1]:.2%}")

# # === Affichage === #
plt.figure(figsize=(12,6))
plt.plot(results.index, results['cible'], label='cible', color='blue')
plt.plot(results.index, results['prediction'], label='prediction', color='red', alpha=0.7)
plt.title("Modèle Random Forest avec fenêtre glissante")
plt.xlabel("Date")
plt.ylabel("S&P 500 (ou cible)")
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()



Date 2023-01-17 : entraînement modèle, prediction et backtesting...
Date 2023-01-18 : entraînement modèle, prediction et backtesting...
Date 2023-01-19 : entraînement modèle, prediction et backtesting...
Date 2023-01-20 : entraînement modèle, prediction et backtesting...
Date 2023-01-23 : entraînement modèle, prediction et backtesting...
Date 2023-01-24 : entraînement modèle, prediction et backtesting...
Date 2023-01-25 : entraînement modèle, prediction et backtesting...
Date 2023-01-26 : entraînement modèle, prediction et backtesting...
Date 2023-01-27 : entraînement modèle, prediction et backtesting...
Date 2023-01-30 : entraînement modèle, prediction et backtesting...
Date 2023-01-31 : entraînement modèle, prediction et backtesting...
Date 2023-02-01 : entraînement modèle, prediction et backtesting...
Date 2023-02-02 : entraînement modèle, prediction et backtesting...
Date 2023-02-03 : entraînement modèle, prediction et backtesting...
Date 2023-02-06 : entraînement modèle, predictio

            Valeur réelle  Prédiction  Signal achat
2025-01-02        5975.38   5939.7612         False
2025-01-03        5909.03   5980.9155          True
2025-01-06        5918.25   5949.0268          True
2025-01-07        5827.04   5932.6917          True
2025-01-08        5836.22   5956.4753          True
