# üè¶ Projet Fil Rouge ‚Äì GBP/USD Trading System
## Vue d'ensemble des donn√©es et r√©sultats

Ce notebook pr√©sente :
1. **Donn√©es brutes M1** ‚Äì Exploration des CSV source
2. **T02 ‚Äì Agr√©gation M15** ‚Äì R√©sultat de l'agr√©gation
3. **T04 ‚Äì Analyse exploratoire** ‚Äì Distribution, volatilit√©, ACF, ADF
4. **T06 ‚Äì Baselines** ‚Äì Comparaison des strat√©gies de r√©f√©rence
5. **T08 ‚Äì RL** ‚Äì Environnement de trading

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from scipy import stats as sp_stats
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('deep')
plt.rcParams['figure.figsize'] = (14, 6)
plt.rcParams['figure.dpi'] = 120

PROJECT_ROOT = Path('.').resolve().parent
print(f'Projet : {PROJECT_ROOT}')

---
## 1. üìÇ Donn√©es brutes M1
Exploration des fichiers CSV source (HISTDATA)

In [None]:
# Charger un √©chantillon M1 pour chaque ann√©e
m1_files = {
    2022: PROJECT_ROOT / 'HISTDATA_COM_MT_GBPUSD_M12022' / 'DAT_MT_GBPUSD_M1_2022.csv',
    2023: PROJECT_ROOT / 'HISTDATA_COM_MT_GBPUSD_M12023' / 'DAT_MT_GBPUSD_M1_2023.csv',
    2024: PROJECT_ROOT / 'HISTDATA_COM_MT_GBPUSD_M12024' / 'DAT_MT_GBPUSD_M1_2024.csv',
}

cols = ['date', 'time', 'open', 'high', 'low', 'close', 'volume']

for year, path in m1_files.items():
    df = pd.read_csv(path, header=None, names=cols)
    print(f'\nüìÖ {year} : {len(df):,} lignes M1')
    display(df.head(5))
    print(f'   P√©riode : {df["date"].iloc[0]} ‚Üí {df["date"].iloc[-1]}')

In [None]:
# Statistiques descriptives M1 (2022 comme exemple)
df_m1 = pd.read_csv(m1_files[2022], header=None, names=cols)
print('üìä Statistiques M1 (2022) :')
display(df_m1[['open', 'high', 'low', 'close', 'volume']].describe().round(6))

---
## 2. üìä T02 ‚Äì Agr√©gation M1 ‚Üí M15
R√©sultat de l'agr√©gation en bougies 15 minutes

In [None]:
# Charger les donn√©es M15
m15 = {}
for year in [2022, 2023, 2024]:
    path = PROJECT_ROOT / 'data' / 'm15' / f'GBPUSD_M15_{year}.csv'
    m15[year] = pd.read_csv(path, parse_dates=['timestamp'], index_col='timestamp')
    print(f'‚úÖ {year} : {len(m15[year]):,} bougies M15')

print(f'\nColonnes : {list(m15[2022].columns)}')
display(m15[2022].head(10))

In [None]:
# Statistiques descriptives M15
for year in [2022, 2023, 2024]:
    label = {2022: 'Train', 2023: 'Validation', 2024: 'Test'}[year]
    print(f'\nüìä {year} ({label}) :')
    display(m15[year].describe().round(6))

In [None]:
# Visualisation des prix M15
fig, axes = plt.subplots(3, 1, figsize=(16, 10), sharex=False)
colors = {2022: 'steelblue', 2023: 'darkorange', 2024: 'green'}
labels = {2022: 'Train', 2023: 'Validation', 2024: 'Test'}

for i, year in enumerate([2022, 2023, 2024]):
    axes[i].plot(m15[year].index, m15[year]['close_15m'], 
                linewidth=0.5, color=colors[year])
    axes[i].set_title(f'{year} ({labels[year]}) ‚Äì {len(m15[year]):,} bougies', 
                      fontsize=12, fontweight='bold')
    axes[i].set_ylabel('GBP/USD')
    axes[i].grid(True, alpha=0.3)

plt.suptitle('Prix GBP/USD ‚Äì Close M15', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

---
## 3. üîç T04 ‚Äì Analyse exploratoire
Distribution, volatilit√©, analyse horaire, autocorr√©lation, test ADF

In [None]:
# Concat√©ner toutes les donn√©es M15
df_all = pd.concat([m15[y].assign(year=y) for y in [2022, 2023, 2024]]).sort_index()
df_all['return_15m'] = df_all['close_15m'].pct_change()
df_all['log_return'] = np.log(df_all['close_15m'] / df_all['close_15m'].shift(1))
print(f'Total : {len(df_all):,} bougies M15')

In [None]:
# 3.1 Distribution des rendements
returns = df_all['return_15m'].dropna()

fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Histogramme
axes[0].hist(returns, bins=150, density=True, alpha=0.7, color='steelblue', edgecolor='white', linewidth=0.3)
x = np.linspace(returns.min(), returns.max(), 200)
axes[0].plot(x, sp_stats.norm.pdf(x, returns.mean(), returns.std()), 'r-', lw=2, label='Loi normale')
axes[0].set_title('Distribution des rendements M15', fontweight='bold')
axes[0].legend()

# Par ann√©e
for year in [2022, 2023, 2024]:
    r = df_all[df_all['year'] == year]['return_15m'].dropna()
    axes[1].hist(r, bins=100, density=True, alpha=0.5, label=f'{year}')
axes[1].set_title('Distribution par ann√©e', fontweight='bold')
axes[1].legend()

# QQ Plot
sp_stats.probplot(returns, dist='norm', plot=axes[2])
axes[2].set_title('QQ Plot vs Normale', fontweight='bold')

plt.tight_layout()
plt.show()

# Stats
print(f'Moyenne   : {returns.mean():.8f}')
print(f'Std       : {returns.std():.6f}')
print(f'Skewness  : {returns.skew():.4f}')
print(f'Kurtosis  : {returns.kurtosis():.4f}')
jb, pval = sp_stats.jarque_bera(returns)
print(f'Jarque-Bera : stat={jb:.2f}, p={pval:.2e} ‚Üí {"NON normal" if pval < 0.05 else "Normal"}')

In [None]:
# 3.2 Volatilit√© dans le temps
fig, axes = plt.subplots(2, 1, figsize=(16, 8))

# Rolling std
rolling_vol = df_all['return_15m'].rolling(20).std()
axes[0].plot(df_all.index, rolling_vol, lw=0.5, color='steelblue', alpha=0.8)
axes[0].fill_between(df_all.index, 0, rolling_vol, alpha=0.15, color='steelblue')
axes[0].set_title('Volatilit√© glissante (rolling std 20)', fontweight='bold')
axes[0].set_ylabel('√âcart-type')

# Volatilit√© mensuelle
monthly_vol = df_all['return_15m'].resample('ME').std()
colors_bar = ['steelblue' if d.year == 2022 else 'darkorange' if d.year == 2023 else 'green' for d in monthly_vol.index]
axes[1].bar(monthly_vol.index, monthly_vol.values, width=25, color=colors_bar, alpha=0.7)
axes[1].set_title('Volatilit√© mensuelle', fontweight='bold')

plt.tight_layout()
plt.show()

In [None]:
# 3.3 Analyse horaire
df_all['hour'] = df_all.index.hour

fig, axes = plt.subplots(1, 2, figsize=(16, 5))

hourly_ret = df_all.groupby('hour')['return_15m'].mean()
colors_h = ['green' if r > 0 else 'red' for r in hourly_ret]
axes[0].bar(hourly_ret.index, hourly_ret.values * 10000, color=colors_h, alpha=0.7)
axes[0].set_title('Rendement moyen par heure (bps)', fontweight='bold')
axes[0].set_xlabel('Heure (UTC)')
axes[0].set_xticks(range(24))
axes[0].axhline(y=0, color='black', lw=0.5)

hourly_vol = df_all.groupby('hour')['return_15m'].std()
axes[1].bar(hourly_vol.index, hourly_vol.values * 10000, color='steelblue', alpha=0.7)
axes[1].set_title('Volatilit√© par heure (bps)', fontweight='bold')
axes[1].set_xlabel('Heure (UTC)')
axes[1].set_xticks(range(24))

for ax in axes:
    ax.axvspan(7, 16, alpha=0.05, color='blue', label='Londres')
    ax.axvspan(13, 21, alpha=0.05, color='red', label='New York')
axes[0].legend(fontsize=8)

plt.tight_layout()
plt.show()

In [None]:
# 3.4 Autocorr√©lation
fig, axes = plt.subplots(2, 2, figsize=(16, 10))

ret = df_all['return_15m'].dropna()
plot_acf(ret, lags=50, ax=axes[0,0], title='ACF ‚Äì Rendements')
plot_pacf(ret, lags=50, ax=axes[0,1], title='PACF ‚Äì Rendements', method='ywm')
plot_acf(ret**2, lags=50, ax=axes[1,0], title='ACF ‚Äì Rendements¬≤ (effet ARCH)')
plot_acf(ret.abs(), lags=50, ax=axes[1,1], title='ACF ‚Äì |Rendements| (persistance vol.)')

plt.suptitle('Autocorr√©lation GBP/USD M15', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.show()

In [None]:
# 3.5 Test ADF
print('=' * 60)
print('TEST ADF (Augmented Dickey-Fuller)')
print('H0 : Racine unitaire (non stationnaire)')
print('=' * 60)

for name, series in [('Prix (close)', df_all['close_15m']), 
                      ('Rendements', df_all['return_15m']),
                      ('Log-rendements', df_all['log_return'])]:
    s = series.dropna()
    result = adfuller(s, autolag='AIC')
    stationary = '‚úÖ STATIONNAIRE' if result[1] < 0.05 else '‚ùå NON STATIONNAIRE'
    print(f'\n{name}:')
    print(f'  ADF stat : {result[0]:.4f}')
    print(f'  p-value  : {result[1]:.2e}')
    print(f'  ‚Üí {stationary}')

---
## 4. üìà T06 ‚Äì Baselines
Comparaison des strat√©gies de r√©f√©rence : Random, Buy & Hold, EMA Cross + RSI

In [None]:
import sys
sys.path.insert(0, str(PROJECT_ROOT))
from evaluation.backtester import Backtester

bt = Backtester(transaction_cost=0.0002)

def add_indicators(df):
    df = df.copy()
    df['ema_20'] = df['close_15m'].ewm(span=20, adjust=False).mean()
    df['ema_50'] = df['close_15m'].ewm(span=50, adjust=False).mean()
    delta = df['close_15m'].diff()
    gain = delta.where(delta > 0, 0.0).rolling(14).mean()
    loss = (-delta.where(delta < 0, 0.0)).rolling(14).mean()
    df['rsi_14'] = 100 - (100 / (1 + gain / loss))
    return df

def strategy_ema_rsi(df):
    signals = np.zeros(len(df), dtype=int)
    for i in range(1, len(df)):
        e20, e50, rsi = df['ema_20'].iloc[i], df['ema_50'].iloc[i], df['rsi_14'].iloc[i]
        if pd.isna(e20) or pd.isna(e50) or pd.isna(rsi): continue
        if e20 > e50 and rsi < 70: signals[i] = 1
        elif e20 < e50 and rsi > 30: signals[i] = -1
    return pd.Series(signals)

all_results = {}
for year in [2022, 2023, 2024]:
    df = add_indicators(m15[year])
    prices = df['close_15m']
    n = len(df)
    
    rng = np.random.RandomState(42)
    results = {
        'Random': bt.run(prices, pd.Series(rng.choice([1,-1,0], size=n))),
        'Buy & Hold': bt.run(prices, pd.Series(np.concatenate([[1], np.zeros(n-1, dtype=int)]))),
        'EMA + RSI': bt.run(prices, strategy_ema_rsi(df)),
    }
    all_results[year] = results

print('‚úÖ Baselines calcul√©es')

In [None]:
# Equity curves
fig, axes = plt.subplots(1, 3, figsize=(20, 5))
colors_s = {'Random': '#e74c3c', 'Buy & Hold': '#3498db', 'EMA + RSI': '#2ecc71'}
labels_y = {2022: 'Train', 2023: 'Validation', 2024: 'Test'}

for i, year in enumerate([2022, 2023, 2024]):
    for name, res in all_results[year].items():
        eq = res['equity_curve']
        axes[i].plot(eq / eq[0] * 100, lw=1.2, label=name, color=colors_s[name])
    axes[i].axhline(y=100, color='black', lw=0.5, ls='--', alpha=0.5)
    axes[i].set_title(f'{year} ({labels_y[year]})', fontweight='bold')
    axes[i].set_ylabel('Equity (base 100)')
    axes[i].legend(fontsize=9)
    axes[i].grid(True, alpha=0.3)

plt.suptitle('Courbes d\'equity ‚Äì Strat√©gies Baseline', fontweight='bold', fontsize=14)
plt.tight_layout()
plt.show()

In [None]:
# Tableau r√©capitulatif
rows = []
for year in [2022, 2023, 2024]:
    for name, res in all_results[year].items():
        m = res['metrics']
        rows.append({
            'Ann√©e': year, 'P√©riode': labels_y[year], 'Strat√©gie': name,
            'Profit (%)': round(m['profit_cumule_pct'], 2),
            'Max DD (%)': round(m['max_drawdown_pct'], 2),
            'Sharpe': round(m['sharpe'], 3),
            'Profit Factor': round(m['profit_factor'], 3),
            'Win Rate (%)': round(m['win_rate'], 1),
            'Trades': m['nb_trades']
        })

df_summary = pd.DataFrame(rows)
display(df_summary)

---
## 5. ü§ñ T08 ‚Äì Environnement RL
Test de l'environnement de trading Gymnasium

In [None]:
from training.trading_env import TradingEnv

# Cr√©er l'environnement
env = TradingEnv(m15[2022].copy(), window_size=20)
print(f'Observation space : {env.observation_space.shape}')
print(f'Action space      : {env.action_space}')
print(f'Features          : {env.feature_columns}')
print(f'Nb features       : {env.n_features}')

In [None]:
# Simuler un √©pisode avec actions al√©atoires
obs, info = env.reset(seed=42)
rewards = []
equities = [info['equity']]
positions = [0]

done = False
rng = np.random.RandomState(42)
while not done:
    action = rng.randint(0, 3)
    obs, reward, terminated, truncated, info = env.step(action)
    rewards.append(reward)
    equities.append(info['equity'])
    positions.append(info['position'])
    done = terminated or truncated

perf = env.get_performance_summary()
print(f'√âpisode termin√© :')
print(f'  Profit      : {perf["profit_pct"]:+.2f}%')
print(f'  Equity      : {perf["final_equity"]:,.2f}')
print(f'  Max DD      : {perf["max_drawdown_pct"]:.2f}%')
print(f'  Nb Trades   : {perf["nb_trades"]}')

fig, axes = plt.subplots(3, 1, figsize=(16, 10), sharex=True)

axes[0].plot(equities, lw=0.8, color='steelblue')
axes[0].set_title('Equity curve (agent al√©atoire)', fontweight='bold')
axes[0].set_ylabel('Capital')

axes[1].plot(np.cumsum(rewards), lw=0.8, color='darkorange')
axes[1].set_title('Reward cumul√©e', fontweight='bold')
axes[1].set_ylabel('Reward')

axes[2].plot(positions, lw=0.3, color='gray', alpha=0.5)
axes[2].fill_between(range(len(positions)), positions, alpha=0.3,
                     where=[p > 0 for p in positions], color='green', label='Long')
axes[2].fill_between(range(len(positions)), positions, alpha=0.3,
                     where=[p < 0 for p in positions], color='red', label='Short')
axes[2].set_title('Positions', fontweight='bold')
axes[2].set_ylabel('Position')
axes[2].legend()

plt.tight_layout()
plt.show()

---
## üìã R√©sum√© du projet

| T√¢che | Description | Status |
|-------|-------------|--------|
| T01 | Import M1 + contr√¥le r√©gularit√© | ‚úÖ (bin√¥me) |
| T02 | Agr√©gation M1 ‚Üí M15 | ‚úÖ |
| T04 | Analyse exploratoire + ADF/ACF | ‚úÖ |
| T06 | Baseline r√®gles + backtest | ‚úÖ |
| T08 | RL (env + reward + training) | ‚úÖ |