In [12]:
%pip install ccxt pandas pandas_ta

Note: you may need to restart the kernel to use updated packages.


### Importers

In [13]:
import ccxt
import pandas as pd
import pandas_ta as pta
import time
from datetime import datetime, timedelta

### Retrieve Online Data

In [14]:
def fetch_ohlcv_between_dates(
    symbol: str,
    timeframe: str,
    start_date: str,
    end_date: str,
    exchange=None
):
    if exchange is None:
        exchange = ccxt.binance({
            'enableRateLimit': True
        })

    since = exchange.parse8601(start_date)
    end_ts = exchange.parse8601(end_date)
    all_ohlcv = []

    print("📥 Téléchargement des bougies en cours...")

    while since < end_ts:
        ohlcv = exchange.fetch_ohlcv(symbol, timeframe, since=since, limit=1000)
        if not ohlcv:
            break
        all_ohlcv.extend(ohlcv)

        last_timestamp = ohlcv[-1][0]
        if last_timestamp == since:
            break
        since = last_timestamp + 1  # éviter de boucler sur la même bougie

        time.sleep(exchange.rateLimit / 1000)  # respecter le rate limit

    df = pd.DataFrame(
        all_ohlcv,
        columns=['timestamp', 'open', 'high', 'low', 'close', 'volume']
    )
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    df.set_index('timestamp', inplace=True)

    start = pd.to_datetime(start_date).tz_localize(None)
    end = pd.to_datetime(end_date).tz_localize(None)
    df = df[(df.index >= start) & (df.index <= end)]


    print(f"✅ {len(df)} bougies récupérées entre {start_date} et {end_date}")
    return df


### Create DataFrame

In [17]:
df = fetch_ohlcv_between_dates(
    symbol='ETH/EUR',
    timeframe='4h',
    start_date='2025-01-01T00:00:00Z',
    end_date='2025-05-05T00:00:00Z'
)

df.tail()

📥 Téléchargement des bougies en cours...
✅ 745 bougies récupérées entre 2025-01-01T00:00:00Z et 2025-05-05T00:00:00Z


Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-05-04 08:00:00,1631.76,1631.91,1615.0,1619.93,599.9547
2025-05-04 12:00:00,1619.64,1625.74,1612.43,1623.22,509.4224
2025-05-04 16:00:00,1622.65,1625.09,1615.4,1620.79,412.3683
2025-05-04 20:00:00,1620.9,1632.77,1595.22,1599.42,1214.4687
2025-05-05 00:00:00,1599.06,1607.91,1573.07,1585.12,1402.5132


### Add some indics to improve training

In [19]:
import ta

# RSI
df['rsi'] = ta.momentum.RSIIndicator(df['close']).rsi()

# MACD
macd = ta.trend.MACD(df['close'])
df['macd'] = macd.macd()
df['macd_signal'] = macd.macd_signal()

# EMA et SMA
df['ema_20'] = ta.trend.EMAIndicator(df['close'], window=20).ema_indicator()
df['sma_50'] = ta.trend.SMAIndicator(df['close'], window=50).sma_indicator()

# Bollinger Bands
boll = ta.volatility.BollingerBands(df['close'], window=20)
df['bollinger_m'] = boll.bollinger_mavg()
df['bollinger_h'] = boll.bollinger_hband()
df['bollinger_l'] = boll.bollinger_lband()

# ADX
df['adx'] = ta.trend.ADXIndicator(df['high'], df['low'], df['close']).adx()

# OBV
df['obv'] = ta.volume.OnBalanceVolumeIndicator(df['close'], df['volume']).on_balance_volume()

df.tail()

Unnamed: 0_level_0,open,high,low,close,volume,rsi,macd,macd_signal,ema_20,sma_50,bollinger_m,bollinger_h,bollinger_l,adx,obv
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2025-05-04 08:00:00,1631.76,1631.91,1615.0,1619.93,599.9547,52.212564,8.097296,9.775744,1619.120096,1601.5984,1624.433,1643.622518,1605.243482,11.944796,-15075.6027
2025-05-04 12:00:00,1619.64,1625.74,1612.43,1623.22,509.4224,53.461405,7.490697,9.318735,1619.510563,1602.3376,1625.5435,1641.47685,1609.61015,11.115343,-14566.1803
2025-05-04 16:00:00,1622.65,1625.09,1615.4,1620.79,412.3683,52.372741,6.736232,8.802234,1619.632414,1603.2762,1625.109,1641.063198,1609.154802,10.345137,-14978.5486
2025-05-04 20:00:00,1620.9,1632.77,1595.22,1599.42,1214.4687,43.905283,4.363631,7.914514,1617.707422,1603.5492,1622.954,1640.484451,1605.423549,10.866209,-16193.0173
2025-05-05 00:00:00,1599.06,1607.91,1573.07,1585.12,1402.5132,39.323673,1.314287,6.594468,1614.603858,1603.3046,1620.603,1644.153237,1597.052763,12.351968,-17595.5305


### Verification and filtering

In [23]:
df.info()
df.isna().sum()

df.dropna(inplace=True)
df.isna().sum()
df.head()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 696 entries, 2025-01-09 04:00:00 to 2025-05-05 00:00:00
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   open         696 non-null    float64
 1   high         696 non-null    float64
 2   low          696 non-null    float64
 3   close        696 non-null    float64
 4   volume       696 non-null    float64
 5   rsi          696 non-null    float64
 6   macd         696 non-null    float64
 7   macd_signal  696 non-null    float64
 8   ema_20       696 non-null    float64
 9   sma_50       696 non-null    float64
 10  bollinger_m  696 non-null    float64
 11  bollinger_h  696 non-null    float64
 12  bollinger_l  696 non-null    float64
 13  adx          696 non-null    float64
 14  obv          696 non-null    float64
dtypes: float64(15)
memory usage: 87.0 KB


Unnamed: 0_level_0,open,high,low,close,volume,rsi,macd,macd_signal,ema_20,sma_50,bollinger_m,bollinger_h,bollinger_l,adx,obv
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2025-01-09 04:00:00,3223.81,3242.34,3188.17,3199.21,208.9777,27.571689,-67.489326,-44.385096,3327.12409,3391.8488,3377.8605,3673.135531,3082.585469,49.66024,-1824.9093
2025-01-09 08:00:00,3196.94,3230.31,3174.58,3205.29,431.7211,28.870805,-68.359493,-49.179975,3315.520844,3391.278,3360.732,3654.313252,3067.150748,49.920375,-1393.1882
2025-01-09 12:00:00,3205.82,3241.44,3115.71,3225.0,686.5435,33.062381,-66.689915,-52.681963,3306.899811,3391.237,3345.2765,3633.212449,3057.340551,50.629763,-706.6447
2025-01-09 16:00:00,3225.65,3233.64,3101.83,3105.77,603.9169,23.890807,-74.133066,-56.972184,3287.744591,3388.8268,3325.149,3620.856088,3029.441912,51.387097,-1310.5616
2025-01-09 20:00:00,3104.62,3151.03,3068.78,3132.68,688.5189,28.698339,-76.973108,-60.972369,3272.976535,3386.803,3303.0005,3586.442153,3019.558847,52.31473,-622.0427


### Save it in a CSV

In [24]:
df.to_csv('df.csv', index=False)
