#### Récupération des données

In [18]:
import requests
import pandas as pd
import time
from datetime import datetime, timedelta

# ---- CONFIGURATION ----
cryptos = ['BTCUSDT', 'ETHUSDT', 'XRPUSDT', 'LTCUSDT', 'DOGEUSDT']
interval = '1h'
start_date = '2020-02-01'
end_date = '2025-05-01'
limit = 1000  # maximum autorisé par requête

def date_to_ms(date_str):
    dt = datetime.strptime(date_str, "%Y-%m-%d")
    return int(dt.timestamp() * 1000)

def fetch_binance_ohlc(symbol, interval, start_ms, end_ms):
    url = 'https://api.binance.com/api/v3/klines'
    all_data = []
    while start_ms < end_ms:
        params = {
            'symbol': symbol,
            'interval': interval,
            'startTime': start_ms,
            'endTime': end_ms,
            'limit': limit
        }
        response = requests.get(url, params=params)
        data = response.json()
        if not data:
            break
        all_data.extend(data)
        start_ms = data[-1][0] + 1
        time.sleep(0.3)  # évite le blocage par Binance
    return all_data

def process_ohlc(data, symbol):
    df = pd.DataFrame(data, columns=[
        'timestamp', 'open', 'high', 'low', 'close', 'volume',
        'close_time', 'quote_volume', 'nb_trades', 'taker_buy_base',
        'taker_buy_quote', 'ignore'
    ])
    df['datetime'] = pd.to_datetime(df['timestamp'], unit='ms')
    df['close'] = df['close'].astype(float)
    return df[['datetime', 'close']].rename(columns={'close': f'{symbol}_close'})

# ---- TÉLÉCHARGEMENT ----
start_ms = date_to_ms(start_date)
end_ms = date_to_ms(end_date)

merged_df = None
for symbol in cryptos:
    print(f"Téléchargement de {symbol}...")
    raw_data = fetch_binance_ohlc(symbol, interval, start_ms, end_ms)
    df = process_ohlc(raw_data, symbol)
    if merged_df is None:
        merged_df = df
    else:
        merged_df = pd.merge(merged_df, df, on='datetime', how='outer')

# ---- EXPORT ----
merged_df.sort_values('datetime', inplace=True)
merged_df.to_csv("crypto_hourly_prices_2020_2025.csv", index=False)
print("Fichier exporté : crypto_hourly_prices_2020_2025.csv")


Téléchargement de BTCUSDT...
Téléchargement de ETHUSDT...
Téléchargement de XRPUSDT...
Téléchargement de LTCUSDT...
Téléchargement de DOGEUSDT...
Fichier exporté : crypto_hourly_prices_2020_2025.csv


In [19]:
df_crypto = pd.read_csv("crypto_hourly_prices_2020_2025.csv")

In [20]:
df_crypto.shape

(45952, 6)

In [25]:
df_crypto.head(5)

Unnamed: 0,datetime,BTCUSDT_close,ETHUSDT_close,XRPUSDT_close,LTCUSDT_close,DOGEUSDT_close
0,2020-01-31 23:00:00,9352.89,179.99,0.23946,68.0,0.002378
1,2020-02-01 00:00:00,9383.07,181.09,0.24097,68.46,0.002388
2,2020-02-01 01:00:00,9429.86,183.76,0.24265,72.01,0.0024
3,2020-02-01 02:00:00,9441.88,183.88,0.24251,71.7,0.00241
4,2020-02-01 03:00:00,9432.33,183.51,0.24175,72.74,0.002401


In [24]:
df_crypto.tail(5)

Unnamed: 0,datetime,BTCUSDT_close,ETHUSDT_close,XRPUSDT_close,LTCUSDT_close,DOGEUSDT_close
45947,2025-04-30 18:00:00,94078.24,1782.48,2.1907,83.62,0.17158
45948,2025-04-30 19:00:00,94152.01,1786.69,2.1946,83.69,0.17226
45949,2025-04-30 20:00:00,94555.99,1794.01,2.1981,84.07,0.17347
45950,2025-04-30 21:00:00,94419.0,1792.52,2.2014,83.88,0.17256
45951,2025-04-30 22:00:00,94114.05,1795.39,2.1939,83.55,0.1724


In [21]:
df_crypto.isna().sum()

datetime          0
BTCUSDT_close     0
ETHUSDT_close     0
XRPUSDT_close     0
LTCUSDT_close     0
DOGEUSDT_close    0
dtype: int64

#### Normalisation des données 

In [28]:
import pandas as pd
import numpy as np

def compute_returns(df_prices):
    """
    À partir d’un DataFrame contenant les colonnes *_close, 
    calcule log-return, centered, standardized, abs-standardized pour chaque crypto.
    """
    df = df_prices.copy()
    crypto_cols = [col for col in df.columns if col.endswith('_close')]

    for col in crypto_cols:
        base = col.replace('_close', '')
        
        # Log-returns
        df[f'{base}_log_return'] = np.log(df[col] / df[col].shift(1))
        
        # Moyenne et écart-type (en excluant le premier NaN)
        r = df[f'{base}_log_return']
        mean_r = r.mean()
        std_r = r.std()
        
        # Centrage
        df[f'{base}_log_return_centered'] = r - mean_r
        
        # Standardisation classique
        df[f'{base}_log_return_std'] = (r - mean_r) / std_r
        
        # Valeur absolue (pour NNAR ou Kalman)
        df[f'{base}_abs_std_return'] = df[f'{base}_log_return_std'].abs()

    return df


In [30]:
# Supposons que tu as déjà un DataFrame df_crypto avec les colonnes :
# datetime, BTCUSDT_close, ETHUSDT_close, etc.

df_crypto = pd.read_csv("crypto_hourly_prices_2020_2025.csv", parse_dates=['datetime'])
df_cryto_prepared = compute_returns(df_crypto)

df_cryto_prepared.head(5)

Unnamed: 0,datetime,BTCUSDT_close,ETHUSDT_close,XRPUSDT_close,LTCUSDT_close,DOGEUSDT_close,BTCUSDT_log_return,BTCUSDT_log_return_centered,BTCUSDT_log_return_std,BTCUSDT_abs_std_return,...,XRPUSDT_log_return_std,XRPUSDT_abs_std_return,LTCUSDT_log_return,LTCUSDT_log_return_centered,LTCUSDT_log_return_std,LTCUSDT_abs_std_return,DOGEUSDT_log_return,DOGEUSDT_log_return_centered,DOGEUSDT_log_return_std,DOGEUSDT_abs_std_return
0,2020-01-31 23:00:00,9352.89,179.99,0.23946,68.0,0.002378,,,,,...,,,,,,,,,,
1,2020-02-01 00:00:00,9383.07,181.09,0.24097,68.46,0.002388,0.003222,0.003171,0.453242,0.453242,...,0.540048,0.540048,0.006742,0.006737,0.647038,0.647038,0.00449,0.004397,0.306001,0.306001
2,2020-02-01 01:00:00,9429.86,183.76,0.24265,72.01,0.0024,0.004974,0.004924,0.703722,0.703722,...,0.597325,0.597325,0.050555,0.050551,4.854713,4.854713,0.004679,0.004585,0.319107,0.319107
3,2020-02-01 02:00:00,9441.88,183.88,0.24251,71.7,0.00241,0.001274,0.001224,0.174875,0.174875,...,-0.054139,0.054139,-0.004314,-0.004319,-0.414754,0.414754,0.004366,0.004273,0.29738,0.29738
4,2020-02-01 03:00:00,9432.33,183.51,0.24175,72.74,0.002401,-0.001012,-0.001062,-0.151807,0.151807,...,-0.275919,0.275919,0.014401,0.014396,1.382557,1.382557,-0.003825,-0.003918,-0.272662,0.272662
