In [1]:
import requests
import pandas as pd
import ta
import time
from datetime import datetime, timedelta

# === CONFIGURATION ===
api_key = 'f5754d3325dc4149ba98663c7dd9821e'
symbol = 'XAU/USD'
interval = '5min'
start_date = '2024-01-01'
end_date = '2024-06-30'
future_horizon = 1

# === T√©l√©chargement d'une page ===
def fetch_data_page(start_date, end_date):
    url = 'https://api.twelvedata.com/time_series'
    params = {
        'symbol': symbol,
        'interval': interval,
        'start_date': start_date,
        'end_date': end_date,
        'apikey': api_key,
        'format': 'JSON',
        'order': 'ASC',
        'timezone': 'UTC',
        'outputsize': 5000
    }
    response = requests.get(url, params=params)
    data = response.json()
    if 'values' in data:
        return pd.DataFrame(data['values'])
    else:
        print("Erreur:", data)
        return None

# === Boucle historique ===
all_data = pd.DataFrame()
current_start = datetime.strptime(start_date, "%Y-%m-%d")
final_end = datetime.strptime(end_date, "%Y-%m-%d")

print("üì• T√©l√©chargement des donn√©es en 5 min par batch...")

while current_start < final_end:
    current_end = current_start + timedelta(days=10)
    if current_end > final_end:
        current_end = final_end

    print(f" - {current_start.date()} -> {current_end.date()}")
    df_page = fetch_data_page(current_start.strftime("%Y-%m-%d"), current_end.strftime("%Y-%m-%d"))
    if df_page is not None and not df_page.empty:
        all_data = pd.concat([all_data, df_page])

    current_start = current_end + timedelta(days=1)
    time.sleep(10)

# === Nettoyage ===
if all_data.empty:
    print("‚ùå ERREUR : aucune donn√©e t√©l√©charg√©e.")
    exit()

print("‚úÖ Donn√©es t√©l√©charg√©es. Nettoyage...")

all_data['datetime'] = pd.to_datetime(all_data['datetime'])
all_data = all_data.sort_values('datetime').reset_index(drop=True)

for col in ['open', 'high', 'low', 'close']:
    all_data[col] = all_data[col].astype(float)

# === Indicateurs techniques ===
print("‚úÖ Calcul des indicateurs techniques...")

close_series = all_data['close']
high_series = all_data['high']
low_series = all_data['low']

all_data['rsi'] = ta.momentum.RSIIndicator(close_series, window=10).rsi()
all_data['ema_9'] = close_series.ewm(span=9, adjust=False).mean()
all_data['ema_21'] = close_series.ewm(span=21, adjust=False).mean()
all_data['ema_distance'] = abs(all_data['ema_9'] - all_data['ema_21'])

macd_calc = ta.trend.MACD(close_series)
all_data['macd_line'] = macd_calc.macd()

atr_indicator = ta.volatility.AverageTrueRange(high_series, low_series, close_series, window=14)
all_data['atr'] = atr_indicator.average_true_range()

all_data['volatility_close_std'] = close_series.rolling(window=10).std()
all_data['ema_9_slope'] = all_data['ema_9'].diff(3)
all_data['ema_21_slope'] = all_data['ema_21'].diff(3)

# ‚úÖ Nouvelle target : pr√©dire la CLOSE dans 12 heures
print(f"‚úÖ Calcul de la target future_close √† {future_horizon} bougies...")
all_data['future_close'] = all_data['close'].shift(-future_horizon)

# Nettoyage final
all_data = all_data.dropna()

print(all_data[['datetime', 'close', 'future_close']].head())

# ‚úÖ Sauvegarde CSV
all_data.to_csv('gold_features_hourly_future_close.csv', index=False)
print("‚úÖ Donn√©es sauvegard√©es dans gold_features_hourly_future_close.csv")


üì• T√©l√©chargement des donn√©es en 5 min par batch...
 - 2024-01-01 -> 2024-01-11
 - 2024-01-12 -> 2024-01-22
 - 2024-01-23 -> 2024-02-02
 - 2024-02-03 -> 2024-02-13
 - 2024-02-14 -> 2024-02-24
 - 2024-02-25 -> 2024-03-06
 - 2024-03-07 -> 2024-03-17
 - 2024-03-18 -> 2024-03-28
 - 2024-03-29 -> 2024-04-08
 - 2024-04-09 -> 2024-04-19
 - 2024-04-20 -> 2024-04-30
 - 2024-05-01 -> 2024-05-11
 - 2024-05-12 -> 2024-05-22
 - 2024-05-23 -> 2024-06-02
 - 2024-06-03 -> 2024-06-13
 - 2024-06-14 -> 2024-06-24
 - 2024-06-25 -> 2024-06-30
‚úÖ Donn√©es t√©l√©charg√©es. Nettoyage...
‚úÖ Calcul des indicateurs techniques...
‚úÖ Calcul de la target future_close √† 1 bougies...
              datetime       close  future_close
25 2024-01-02 01:05:00  2067.61011    2069.34009
26 2024-01-02 01:10:00  2069.34009    2068.34009
27 2024-01-02 01:15:00  2068.34009    2068.73999
28 2024-01-02 01:20:00  2068.73999    2068.62012
29 2024-01-02 01:25:00  2068.62012    2067.71997
‚úÖ Donn√©es sauvegard√©es dans gold

In [2]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# === 1Ô∏è‚É£ Chargement des donn√©es ===
df = pd.read_csv('gold_features_hourly_future_close.csv')
print("‚úÖ Donn√©es charg√©es")
print(df.head())

# === 2Ô∏è‚É£ S√©lection des features ===
features = [
    'rsi',
    'ema_9', 'ema_21', 'ema_distance',
    'macd_line',
    'atr',
    'volatility_close_std',
    'ema_9_slope', 'ema_21_slope'
]

X = df[features]
y = df['future_close']

# === 3Ô∏è‚É£ Split ===
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)
print("‚úÖ Split termin√©")

# === 4Ô∏è‚É£ Entra√Ænement ===
model = RandomForestRegressor(
    n_estimators=200,
    max_depth=12,
    random_state=42,
    n_jobs=-1
)
model.fit(X_train, y_train)
print("‚úÖ Mod√®le entra√Æn√©")

# === 5Ô∏è‚É£ √âvaluation ===
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("\n‚úÖ Metrics:")
print(f" - MSE : {mse:.4f}")
print(f" - RMSE: {rmse:.4f}")
print(f" - R2  : {r2:.4f}")

# === 6Ô∏è‚É£ Sauvegarde du mod√®le
with open('gold_future_close_regressor.pkl', 'wb') as f:
    pickle.dump(model, f)
print("‚úÖ Mod√®le sauvegard√© sous gold_future_close_regressor.pkl")


‚úÖ Donn√©es charg√©es
              datetime        open         high         low       close  \
0  2024-01-02 01:05:00  2066.98999  2069.739990  2066.98999  2067.61011   
1  2024-01-02 01:10:00  2067.55005  2069.610110  2067.40991  2069.34009   
2  2024-01-02 01:15:00  2069.46997  2069.889890  2067.73999  2068.34009   
3  2024-01-02 01:20:00  2068.34009  2069.010010  2067.42993  2068.73999   
4  2024-01-02 01:25:00  2068.55005  2069.070068  2068.32007  2068.62012   

         rsi        ema_9       ema_21  ema_distance  macd_line       atr  \
0  74.858231  2065.766109  2065.168574      0.597536   0.382136  1.443656   
1  81.484355  2066.480906  2065.547802      0.933103   0.660521  1.497695   
2  69.688226  2066.852742  2065.801647      1.051096   0.791329  1.544281   
3  71.520167  2067.230192  2066.068769      1.161423   0.916698  1.546838   
4  70.108962  2067.508178  2066.300710      1.207468   0.994912  1.489921   

   volatility_close_std  ema_9_slope  ema_21_slope  future_clos