In [3]:

import requests
import pandas as pd
import ta
import time
from datetime import datetime, timedelta

# === CONFIGURATION ===
api_key = 'f5754d3325dc4149ba98663c7dd9821e'  # Remplace par ta clé TwelveData
symbol = 'XAU/USD'
interval = '1h'
start_date = '2024-01-01'
end_date = '2024-06-30'

# === Helper : télécharger une page de données ===
def fetch_data_page(start_date, end_date):
    url = 'https://api.twelvedata.com/time_series'
    params = {
        'symbol': symbol,
        'interval': interval,
        'start_date': start_date,
        'end_date': end_date,
        'apikey': api_key,
        'format': 'JSON',
        'order': 'ASC',
        'timezone': 'UTC',
        'outputsize': 5000
    }

    response = requests.get(url, params=params)
    data = response.json()
    
    if 'values' in data:
        return pd.DataFrame(data['values'])
    else:
        print("Erreur:", data)
        return None

# === Boucle sur l'historique ===
all_data = pd.DataFrame()
current_start = datetime.strptime(start_date, "%Y-%m-%d")
final_end = datetime.strptime(end_date, "%Y-%m-%d")

print("📥 Téléchargement des données en 1h par batch...")

while current_start < final_end:
    current_end = current_start + timedelta(days=10)
    if current_end > final_end:
        current_end = final_end

    print(f" - Téléchargement de {current_start.date()} à {current_end.date()}...")

    df_page = fetch_data_page(current_start.strftime("%Y-%m-%d"), current_end.strftime("%Y-%m-%d"))
    if df_page is not None and not df_page.empty:
        all_data = pd.concat([all_data, df_page])

    current_start = current_end + timedelta(days=1)
    time.sleep(1)  # Respecter limite API

# === Nettoyage ===
if all_data.empty:
    print("❌ ERREUR : aucune donnée téléchargée.")
    exit()

print("✅ Données téléchargées. Nettoyage...")

all_data['datetime'] = pd.to_datetime(all_data['datetime'])
all_data = all_data.sort_values('datetime')
all_data = all_data.reset_index(drop=True)

# Assurer les colonnes numériques
for col in ['open', 'high', 'low', 'close']:
    all_data[col] = all_data[col].astype(float)

# === Calcul des indicateurs ===
print("✅ Calcul des indicateurs techniques...")

close_series = all_data['close']

all_data['rsi'] = ta.momentum.RSIIndicator(close_series, window=10).rsi()
all_data['ema_9'] = close_series.ewm(span=9, adjust=False).mean()
all_data['ema_21'] = close_series.ewm(span=21, adjust=False).mean()
macd_calc = ta.trend.MACD(close_series)
all_data['macd_line'] = macd_calc.macd()

# === Créer la target ===
all_data['target'] = (all_data['close'].shift(-1) > all_data['close']).astype(int)

# Supprimer les NA
all_data = all_data.dropna()

print(all_data.head())

# === Sauvegarde CSV ===
all_data.to_csv('gold_features_hourly.csv', index=False)
print("✅ Données sauvegardées dans gold_features_hourly.csv")


📥 Téléchargement des données en 1h par batch...
 - Téléchargement de 2024-01-01 à 2024-01-11...
 - Téléchargement de 2024-01-12 à 2024-01-22...
 - Téléchargement de 2024-01-23 à 2024-02-02...
 - Téléchargement de 2024-02-03 à 2024-02-13...
 - Téléchargement de 2024-02-14 à 2024-02-24...
 - Téléchargement de 2024-02-25 à 2024-03-06...
 - Téléchargement de 2024-03-07 à 2024-03-17...
 - Téléchargement de 2024-03-18 à 2024-03-28...
 - Téléchargement de 2024-03-29 à 2024-04-08...
Erreur: {'code': 429, 'message': 'You have run out of API credits for the current minute. 9 API credits were used, with the current limit being 8. Wait for the next minute or consider switching to a higher tier plan at https://twelvedata.com/pricing', 'status': 'error'}
 - Téléchargement de 2024-04-09 à 2024-04-19...
Erreur: {'code': 429, 'message': 'You have run out of API credits for the current minute. 10 API credits were used, with the current limit being 8. Wait for the next minute or consider switching to a h

In [4]:

df=pd.read_csv("gold_features_hourly.csv", sep=",",header=0, index_col=0)
df = df.dropna()
df.head(100)

Unnamed: 0_level_0,open,high,low,close,rsi,ema_9,ema_21,macd_line,target
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-01-03 01:00:00,2060.139890,2062.61011,2059.63989,2062.46997,46.534320,2061.784961,2064.079983,-1.938432,1
2024-01-03 02:00:00,2062.639890,2065.21997,2061.65991,2064.76001,52.071399,2062.379971,2064.141804,-1.619057,1
2024-01-03 03:00:00,2064.739990,2065.65991,2063.42993,2065.15991,53.015517,2062.935959,2064.234359,-1.318482,0
2024-01-03 04:00:00,2065.070068,2065.63989,2064.51001,2064.92993,52.356498,2063.334753,2064.297593,-1.086310,0
2024-01-03 05:00:00,2064.909910,2066.01001,2063.57007,2064.13989,49.984852,2063.495780,2064.283256,-0.955052,1
...,...,...,...,...,...,...,...,...,...
2024-01-09 09:00:00,2034.319950,2037.10999,2033.20996,2035.48999,57.136717,2033.764183,2032.870686,0.173511,1
2024-01-09 10:00:00,2035.550050,2038.58997,2034.88000,2035.51001,57.183028,2034.113349,2033.110624,0.338420,0
2024-01-09 11:00:00,2035.459960,2035.73999,2033.79004,2034.93994,55.292931,2034.278667,2033.276926,0.418290,1
2024-01-09 12:00:00,2035.089966,2036.40002,2033.98999,2036.39001,59.112591,2034.700935,2033.559933,0.591774,1


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2323 entries, 2024-01-03 01:00:00 to 2024-06-28 20:00:00
Data columns (total 9 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   open       2323 non-null   float64
 1   high       2323 non-null   float64
 2   low        2323 non-null   float64
 3   close      2323 non-null   float64
 4   rsi        2323 non-null   float64
 5   ema_9      2323 non-null   float64
 6   ema_21     2323 non-null   float64
 7   macd_line  2323 non-null   float64
 8   target     2323 non-null   int64  
dtypes: float64(8), int64(1)
memory usage: 181.5+ KB


In [7]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import pickle

# ✅ 2. Préparer features et target
X = df[['rsi', 'ema_9', 'ema_21', 'macd_line']]
y = df['target']

# ✅ 3. Split Train/Test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# ✅ 4. Entraîner RandomForest
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# ✅ 5. Évaluer
y_pred = model.predict(X_test)
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n✅ Classification Report:\n", classification_report(y_test, y_pred))

# ✅ 6. Sauvegarder le modèle
with open('gold_signal_model.pkl', 'wb') as f:
    pickle.dump(model, f)

print("✅ Modèle sauvegardé dans gold_signal_model.pkl")

✅ Accuracy: 0.546236559139785

✅ Classification Report:
               precision    recall  f1-score   support

           0       0.53      0.57      0.55       228
           1       0.56      0.52      0.54       237

    accuracy                           0.55       465
   macro avg       0.55      0.55      0.55       465
weighted avg       0.55      0.55      0.55       465

✅ Modèle sauvegardé dans gold_signal_model.pkl


In [8]:

# ✅ Charger le modèle
with open('gold_signal_model.pkl', 'rb') as f:
    model = pickle.load(f)

print("✅ Modèle chargé avec succès !")

# ✅ Exemples de nouvelles données (RSI, EMA9, EMA21, MACD)
nouvelle_observation = {
    'rsi': 56.92,
    'ema_9': 2411.01,
    'ema_21': 2397.23,
    'macd_line': 16.04
}

X_new = pd.DataFrame([nouvelle_observation])

# ✅ Faire la prédiction
prediction = model.predict(X_new)[0]
proba = model.predict_proba(X_new)[0][1]

# ✅ Afficher le résultat
print("✅ Prédiction (0 = baisse, 1 = hausse):", prediction)
print(f"✅ Probabilité de hausse : {proba*100:.2f}%")


✅ Modèle chargé avec succès !
✅ Prédiction (0 = baisse, 1 = hausse): 1
✅ Probabilité de hausse : 53.00%
