In [1]:
import requests
import pandas as pd
import ta
import time
import numpy as np
from datetime import datetime, timedelta

# === CONFIGURATION ===
api_key = 'f5754d3325dc4149ba98663c7dd9821e'
symbol = 'XAU/USD'
interval = '1h'
start_date = '2025-01-01'
end_date = '2025-07-01'

# === T√©l√©chargement ===
def fetch_data_page(start_date, end_date):
    url = 'https://api.twelvedata.com/time_series'
    params = {
        'symbol': symbol,
        'interval': interval,
        'start_date': start_date,
        'end_date': end_date,
        'apikey': api_key,
        'format': 'JSON',
        'order': 'ASC',
        'timezone': 'UTC',
        'outputsize': 5000
    }
    response = requests.get(url, params=params)
    data = response.json()
    if 'values' in data:
        return pd.DataFrame(data['values'])
    else:
        print("Erreur:", data)
        return None

# === Boucle pour historique ===
all_data = pd.DataFrame()
current_start = datetime.strptime(start_date, "%Y-%m-%d")
final_end = datetime.strptime(end_date, "%Y-%m-%d")

print("üì• T√©l√©chargement des donn√©es en 5min par batch...")

while current_start < final_end:
    current_end = current_start + timedelta(days=20)
    if current_end > final_end:
        current_end = final_end

    print(f" - {current_start.date()} -> {current_end.date()}")
    df_page = fetch_data_page(current_start.strftime("%Y-%m-%d"), current_end.strftime("%Y-%m-%d"))
    if df_page is not None and not df_page.empty:
        all_data = pd.concat([all_data, df_page])

    current_start = current_end + timedelta(days=1)
    time.sleep(10)

# === Nettoyage ===
if all_data.empty:
    print("‚ùå ERREUR : aucune donn√©e t√©l√©charg√©e.")
    exit()

print("‚úÖ Donn√©es t√©l√©charg√©es. Nettoyage...")
all_data['datetime'] = pd.to_datetime(all_data['datetime'])
all_data = all_data.sort_values('datetime').reset_index(drop=True)

# Colonnes num√©riques
for col in ['open', 'high', 'low', 'close']:
    all_data[col] = all_data[col].astype(float)

# === Indicateurs techniques ===
print("‚úÖ Calcul des indicateurs techniques...")

close_series = all_data['close']
high_series = all_data['high']
low_series = all_data['low']

# RSI
all_data['rsi'] = ta.momentum.RSIIndicator(close_series, window=10).rsi()

# EMA9 & EMA21
all_data['ema_9'] = close_series.ewm(span=9, adjust=False).mean()
all_data['ema_21'] = close_series.ewm(span=21, adjust=False).mean()

# MACD
macd_calc = ta.trend.MACD(close_series)
all_data['macd_line'] = macd_calc.macd()

# ‚úÖ ‚ûú Distance EMA
all_data['ema_diff'] = all_data['ema_9'] - all_data['ema_21']
all_data['ema_distance'] = abs(all_data['ema_diff'])

# ‚úÖ ‚ûú ATR (Average True Range)
atr_indicator = ta.volatility.AverageTrueRange(high_series, low_series, close_series, window=14)
all_data['atr'] = atr_indicator.average_true_range()

# ‚úÖ ‚ûú Volatilit√© locale : std des closes sur 10 p√©riodes
all_data['volatility_close_std'] = close_series.rolling(window=10).std()

# ‚úÖ ‚ûú Pente EMA (slope)
all_data['ema_9_slope'] = all_data['ema_9'].diff(3)
all_data['ema_21_slope'] = all_data['ema_21'].diff(3)

# === Cr√©er la target automatique : pr√©dire la hausse/baisse dans 12 bougies (~1h)
print("‚úÖ Calcul de la target pour hausse/baisse future...")

future_horizon = 1
threshold_up = 0.002
threshold_down = -0.002

# Calcul rendement futur
all_data['future_return'] = all_data['close'].shift(-future_horizon) / all_data['close'] - 1

# Label target
conditions = [
    (all_data['future_return'] > threshold_up),
    (all_data['future_return'] < threshold_down)
]
choices = [1, -1]
all_data['target'] = np.select(conditions, choices, default=0)

# === Nettoyage final
all_data = all_data.dropna()

# ‚úÖ Retirer les cas neutres (target == 0)
all_data = all_data[all_data['target'] != 0]

print(all_data.head())

# === Sauvegarde CSV ===
all_data.to_csv('gold_features_probabilite_hausse_baisse_filtered.csv', index=False)
print("‚úÖ Donn√©es sauvegard√©es dans gold_features_probabilite_hausse_baisse_filtered.csv")


üì• T√©l√©chargement des donn√©es en 5min par batch...
 - 2025-01-01 -> 2025-01-21
 - 2025-01-22 -> 2025-02-11
 - 2025-02-12 -> 2025-03-04
 - 2025-03-05 -> 2025-03-25
 - 2025-03-26 -> 2025-04-15
 - 2025-04-16 -> 2025-05-06
 - 2025-05-07 -> 2025-05-27
 - 2025-05-28 -> 2025-06-17
 - 2025-06-18 -> 2025-07-01
‚úÖ Donn√©es t√©l√©charg√©es. Nettoyage...
‚úÖ Calcul des indicateurs techniques...
‚úÖ Calcul de la target pour hausse/baisse future...
              datetime        open         high          low       close  \
30 2025-01-03 06:00:00  2658.67993  2660.379880  2656.840090  2659.77002   
36 2025-01-03 12:00:00  2657.68994  2658.340090  2654.939940  2655.40991   
57 2025-01-06 10:00:00  2633.15991  2635.070068  2631.020020  2633.85010   
59 2025-01-06 12:00:00  2645.61011  2647.850100  2642.060059  2645.56006   
60 2025-01-06 13:00:00  2645.63989  2648.889890  2622.060059  2624.14990   

          rsi        ema_9       ema_21  macd_line  ema_diff  ema_distance  \
30  64.207460  2659.

In [2]:
import pandas as pd
import numpy as np
import pickle

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.utils import class_weight

# === 1Ô∏è‚É£ Chargement des donn√©es ===
df = pd.read_csv('gold_features_probabilite_hausse_baisse_filtered.csv')
print("‚úÖ Donn√©es charg√©es")
print(df.head())

# === 2Ô∏è‚É£ S√©lection des features ===
features = [
    'rsi',
    'ema_9', 'ema_21', 'ema_distance',
    'macd_line',
    'atr',
    'volatility_close_std',
    'ema_9_slope', 'ema_21_slope'
]

X = df[features]
y = df['target']

# ‚úÖ V√©rification des classes
print("‚úÖ Distribution des classes:", y.value_counts())

# === 3Ô∏è‚É£ Encodage √©ventuel des classes (optionnel)
# Ex: -1 ‚Üí 0, 1 ‚Üí 1
y_encoded = y.replace({-1: 0, 1: 1})

# === 4Ô∏è‚É£ Calcul des poids des classes ===
cw = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_encoded),
    y=y_encoded
)
class_weights = {cls: weight for cls, weight in zip(np.unique(y_encoded), cw)}
print("‚úÖ Class Weights:", class_weights)

# === 5Ô∏è‚É£ Split des donn√©es ===
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded,
    test_size=0.2,
    random_state=42,
    stratify=y_encoded
)
print("‚úÖ Split termin√©")

# === 6Ô∏è‚É£ Entra√Ænement du mod√®le ===
model = RandomForestClassifier(
    n_estimators=200,
    random_state=42,
    class_weight=class_weights,
    max_depth=12,
    n_jobs=-1
)
model.fit(X_train, y_train)
print("‚úÖ Mod√®le entra√Æn√©")

# === 7Ô∏è‚É£ √âvaluation ===
y_pred = model.predict(X_test)
print("\n‚úÖ Accuracy:", accuracy_score(y_test, y_pred))
print("\n‚úÖ Classification Report:\n", classification_report(y_test, y_pred, target_names=['BAISSE (-1)', 'HAUSSE (+1)']))

# === 8Ô∏è‚É£ Sauvegarde du mod√®le ===
with open('gold_binary_hausse_baisse_model.pkl', 'wb') as f:
    pickle.dump(model, f)

print("‚úÖ Mod√®le sauvegard√© sous gold_binary_hausse_baisse_model.pkl")


‚úÖ Donn√©es charg√©es
              datetime        open         high          low       close  \
0  2025-01-03 06:00:00  2658.67993  2660.379880  2656.840090  2659.77002   
1  2025-01-03 12:00:00  2657.68994  2658.340090  2654.939940  2655.40991   
2  2025-01-06 10:00:00  2633.15991  2635.070068  2631.020020  2633.85010   
3  2025-01-06 12:00:00  2645.61011  2647.850100  2642.060059  2645.56006   
4  2025-01-06 13:00:00  2645.63989  2648.889890  2622.060059  2624.14990   

         rsi        ema_9       ema_21  macd_line  ema_diff  ema_distance  \
0  64.207460  2659.477573  2653.824426   6.799714  5.653147      5.653147   
1  50.672909  2656.722095  2654.568010   3.655618  2.154084      2.154084   
2  40.351657  2634.376041  2638.929365  -4.478728 -4.553324      4.553324   
3  60.315772  2638.416662  2640.087571  -2.432451 -1.670909      1.670909   
4  34.386427  2635.563310  2638.638692  -3.394762 -3.075383      3.075383   

        atr  volatility_close_std  ema_9_slope  ema_21_sl