# Original code

In [None]:
import ccxt
from ccxt.base.errors import RequestTimeout
import pandas as pd
from datetime import datetime
from datetime import timedelta
import time


binance = ccxt.binance()


def to_timestamp(dt):
    return binance.parse8601(dt.isoformat())


def download(symbol, start, end):
    '''
    Download all the transaction for a given symbol from the start date to the end date
    @param symbol: the symbol of the coin for which download the transactions
    @param start: the start date from which download the transaction
    @param end: the end date from which download the transaction
    '''

    records = []
    since = start
    ten_minutes = 60000 * 10

    print('Downloading {} from {} to {}'.format(symbol, binance.iso8601(start), binance.iso8601(end)))

    while since < end:
        #print('since: ' + binance.iso8601(since)) #uncomment this line of code for verbose download
        try:
            orders = binance.fetch_trades(symbol + '/BTC', since)
        except RequestTimeout:
            time.sleep(5)
            orders = binance.fetch_trades(symbol + '/BTC', since)

        if len(orders) > 0:

            latest_ts = orders[-1]['timestamp']
            if since != latest_ts:
                since = latest_ts
            else:
                since += ten_minutes

            for l in orders:
                records.append({
                    'symbol': l['symbol'],
                    'timestamp': l['timestamp'],
                    'datetime': l['datetime'],
                    'side': l['side'],
                    'price': l['price'],
                    'amount': l['amount'],
                    'btc_volume': float(l['price']) * float(l['amount']),
                })
        else:
            since += ten_minutes

    return pd.DataFrame.from_records(records)


def download_binance(days_before=7, days_after=7):
    '''
    Download all the transactions for all the pumps in binance in a given interval
    @param days_before: the number of days before the pump
    @param days_after: the number of days after the pump
    '''

    df = pd.read_csv('pump_telegram.csv')
    binance_only = df[df['exchange'] == 'binance']

    for i, pump in binance_only.iterrows():
        symbol = pump['symbol']
        date = pump['date'] + ' ' + pump['hour']
        pump_time = datetime.strptime(date, "%Y-%m-%d %H:%M")
        before = to_timestamp(pump_time - timedelta(days=days_before))
        after = to_timestamp(pump_time + timedelta(days=days_after))
        # to comment out
        import os
        if os.path.exists('data/{}_{}'.format(symbol, str(date).replace(':', '.') + '.csv')):
            print(symbol)
            continue
        #
        df = download(symbol, before, after)
        df.to_csv('data/{}_{}'.format(symbol, str(date).replace(':', '.') + '.csv'), index=False)


if __name__ == '__main__':
    download_binance(days_before=12, days_after=7)


# using different API

In [15]:
import os
import time
import ccxt
import pandas as pd
from datetime import datetime, timedelta

In [16]:
binance = ccxt.binance({
    "enableRateLimit": True,           # pace requests automatically
})
TIMEFRAME   = '1m'                     # 1-minute candles
TF_MS       = 60_000                   # ms in one TF bucket
MAX_LIMIT   = 1000                     # Binance max rows / fetch_ohlcv call


In [17]:

def to_ts(dt):
    """Datetime → milliseconds epoch."""
    return binance.parse8601(dt.isoformat())

def fetch_ohlcv(symbol, start_ts, end_ts):
    """
    Pull 1-minute candles from start_ts → end_ts (exclusive).
    Returns a DataFrame with timestamp, o/h/l/c/volume.
    """
    rows, since = [], start_ts
    print(f"\n→ {symbol}: downloading OHLCV {binance.iso8601(start_ts)} → {binance.iso8601(end_ts)}")
    while since < end_ts:
        batch = binance.fetch_ohlcv(symbol, timeframe=TIMEFRAME,
                                    since=since, limit=MAX_LIMIT)
        if not batch:
            break
        rows.extend(batch)
        since = batch[-1][0] + TF_MS         # next minute
        pct   = 100 * (since - start_ts) / (end_ts - start_ts)
        print(f"  …{pct:5.1f}% ({binance.iso8601(since)})")
        time.sleep(binance.rateLimit / 1000) # polite; enableRateLimit mostly handles this
    cols = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
    return pd.DataFrame(rows, columns=cols)



In [18]:
def download_binance(days_before=7, days_after=7, single_file=False, window_ms=60_000):
    """
    Pull OHLCV around pump-times from pump_telegram.csv.
    * window_ms: pump flag half-width (default ±1 minute)
    """
    os.makedirs('data', exist_ok=True)
    pumps      = pd.read_csv("pump_telegram.csv")
    pumps      = pumps[pumps['exchange'] == 'binance']
    all_frames = []

    for _, pump in pumps.iterrows():
        coin     = pump['symbol']
        pair     = f"{coin}/BTC"             # change to /USDT if needed
        dt_str   = f"{pump['date']} {pump['hour']}"
        pump_dt  = datetime.strptime(dt_str, "%Y-%m-%d %H:%M")
        pump_ts  = to_ts(pump_dt)
        start_ts = to_ts(pump_dt - timedelta(days=days_before))
        end_ts   = to_ts(pump_dt + timedelta(days=days_after))

        df = fetch_ohlcv(pair, start_ts, end_ts)

        # add labels & metadata
        df['symbol']          = coin
        df['pump_time']       = pump_dt
        df['is_pump']         = df['timestamp'] == pump_ts
        df['is_pump_window']  = df['timestamp'].between(pump_ts - window_ms,
                                                        pump_ts + window_ms)

        if single_file:
            all_frames.append(df)
        else:
            out = f"data/{coin}_{dt_str.replace(':','.')}.csv"
            df.to_csv(out, index=False)
            print(f"✓ saved {out}")

    if single_file:
        master = pd.concat(all_frames, ignore_index=True)
        master.to_csv("data/all_pumps_ohlcv.csv", index=False)
        print("✓ saved data/all_pumps_ohlcv.csv")

In [19]:
download_binance(days_before=12, days_after=7, single_file=False)



→ BRD/BTC: downloading OHLCV 2018-12-10T17:00:00.000Z → 2018-12-29T17:00:00.000Z
  …  3.7% (2018-12-11T09:40:00.000Z)
  …  7.3% (2018-12-12T02:20:00.000Z)
  … 11.0% (2018-12-12T19:00:00.000Z)
  … 14.6% (2018-12-13T11:40:00.000Z)
  … 18.3% (2018-12-14T04:20:00.000Z)
  … 21.9% (2018-12-14T21:00:00.000Z)
  … 25.6% (2018-12-15T13:40:00.000Z)
  … 29.2% (2018-12-16T06:20:00.000Z)
  … 32.9% (2018-12-16T23:00:00.000Z)
  … 36.5% (2018-12-17T15:40:00.000Z)
  … 40.2% (2018-12-18T08:20:00.000Z)
  … 43.9% (2018-12-19T01:00:00.000Z)
  … 47.5% (2018-12-19T17:40:00.000Z)
  … 51.2% (2018-12-20T10:20:00.000Z)
  … 54.8% (2018-12-21T03:00:00.000Z)
  … 58.5% (2018-12-21T19:40:00.000Z)
  … 62.1% (2018-12-22T12:20:00.000Z)
  … 65.8% (2018-12-23T05:00:00.000Z)
  … 69.4% (2018-12-23T21:40:00.000Z)
  … 73.1% (2018-12-24T14:20:00.000Z)
  … 76.8% (2018-12-25T07:00:00.000Z)
  … 80.4% (2018-12-25T23:40:00.000Z)
  … 84.1% (2018-12-26T16:20:00.000Z)
  … 87.7% (2018-12-27T09:00:00.000Z)
  … 91.4% (2018-12-28T01:40:00