In [1]:
import ccxt
import datetime
import pandas as pd
from time import sleep

In [64]:
class BinanceDataPuller:

    """Binance data puller"""

    def __init__(self, 
                 symbol='BTC/USDT', 
                 timeframe='1h'):
        self.exchange = ccxt.binance()
        self.exchange.proxies = {
                            'http': 'socks5h://127.0.0.1:7890',
                            'https': 'socks5h://127.0.0.1:7890'
                        }
        self.markets = self.exchange.load_markets()
        self.symbol = symbol
        self.timeframe = timeframe


    def get_ohlcv(self, start, end):
        """Fetch OHLCV data from Binance"""
        if self.exchange.has['fetchOHLCV']:
            data = []
            limit = 1000
            start_time = int(start.timestamp() * 1000)
            end_time = int(end.timestamp() * 1000)

            while start_time < end_time:
                # 设置获取时间段的起止时间
                fetch_since = start_time
                # print(fetch_since)
                print(datetime.datetime.fromtimestamp(fetch_since / 1000).strftime('%Y-%m-%d %H:%M:%S'))
                fetch_until = min(start_time + limit * 1000 * self.exchange.parse_timeframe(self.timeframe), end_time)

                # 获取数据
                raw_data = self.exchange.fetch_ohlcv(self.symbol, self.timeframe, fetch_since, limit)

                # 处理数据
                for row in raw_data:
                    timestamp = int(row[0])
                    dt = datetime.datetime.utcfromtimestamp(timestamp / 1000)
                    data.append([dt, row[1], row[2], row[3], row[4], row[5]])

                # 更新起始时间
                start_time = fetch_until

                # 等待一段时间，避免过于频繁访问API
                sleep(self.exchange.rateLimit / 1000)

            # 将数据转换为DataFrame
            data = pd.DataFrame(data, columns=['datetime', 'open', 'high', 'low', 'close', 'volume'])
            data['datetime'] = pd.to_datetime(data['datetime'], unit='ms')
            data = data.set_index('datetime')
            
        return data
    
    def get_trades(self, start, end):
        """Fetch trades data from Binance"""
        if self.exchange.has['fetchTrades']:
            data = []
            limit = 1000
            start_time = int(start.timestamp() * 1000)
            end_time = int(end.timestamp() * 1000)

            while start_time < end_time:
                # 设置获取时间段的起止时间
                fetch_since = start_time
                # print(fetch_since)
                print(datetime.datetime.fromtimestamp(fetch_since / 1000).strftime('%Y-%m-%d %H:%M:%S'))

                # 获取数据
                raw_data = self.exchange.fetch_trades(self.symbol, fetch_since, limit)

                # 处理数据
                for row in raw_data:
                    timestamp = int(row['info']['T'])
                    dt = datetime.datetime.utcfromtimestamp(timestamp / 1000)
                    data.append([dt, row['info']['p'], row['info']['q'], row['info']['m']])

                # 更新起始时间
                start_time = int(raw_data[-1]['info']['T'])

                # 等待一段时间，避免过于频繁访问API
                sleep(self.exchange.rateLimit / 1000)

            # 将数据转换为DataFrame
            data = pd.DataFrame(data, columns=['datetime', 'price', 'amount', 'side'])
            data['datetime'] = pd.to_datetime(data['datetime'], unit='ms')
            data = data.set_index('datetime')
            
        return data
    
    def get_tickers(self):
        """Fetch tickers data from Binance"""
        print(self.symbol)
        if self.exchange.has['fetchTickers']:
            d = self.exchange.fetch_ticker(self.symbol)
            return d

In [66]:
btc = BinanceDataPuller('BTC/USDT', '1h')
d = btc.get_tickers()
print(len(d))

BTC/USDT
20


In [55]:
import datetime
btc = BinanceDataPuller('BTC/USDT', '1h')
start = datetime.datetime(2023, 5, 1, 0, 0)
end = datetime.datetime(2023, 5, 2, 0, 0)

data1 = btc.get_trades(start, end)
data1.to_csv('trades_230517.csv')


2023-05-01 00:00:00
2023-05-01 00:00:36
2023-05-01 00:01:07
2023-05-01 00:01:19
2023-05-01 00:01:41
2023-05-01 00:02:11
2023-05-01 00:02:30
2023-05-01 00:03:06
2023-05-01 00:03:15
2023-05-01 00:03:28
2023-05-01 00:03:44
2023-05-01 00:04:10
2023-05-01 00:04:29
2023-05-01 00:05:06
2023-05-01 00:05:23
2023-05-01 00:06:06
2023-05-01 00:07:13
2023-05-01 00:08:35
2023-05-01 00:09:13
2023-05-01 00:09:47
2023-05-01 00:10:35
2023-05-01 00:11:29
2023-05-01 00:12:13
2023-05-01 00:13:20
2023-05-01 00:13:52
2023-05-01 00:14:21
2023-05-01 00:14:47
2023-05-01 00:15:18
2023-05-01 00:16:15
2023-05-01 00:16:51
2023-05-01 00:17:50
2023-05-01 00:18:44
2023-05-01 00:19:52
2023-05-01 00:21:17
2023-05-01 00:22:19
2023-05-01 00:23:14
2023-05-01 00:24:07
2023-05-01 00:25:03
2023-05-01 00:25:38
2023-05-01 00:26:56
2023-05-01 00:27:37
2023-05-01 00:27:46
2023-05-01 00:28:03
2023-05-01 00:28:39
2023-05-01 00:29:12
2023-05-01 00:29:42
2023-05-01 00:30:26
2023-05-01 00:31:20
2023-05-01 00:32:17
2023-05-01 00:33:30


KeyboardInterrupt: 

In [42]:
data1.describe()

Unnamed: 0,price,amount,side
count,1059000.0,1059000.0,1059000
unique,111258.0,61085.0,1
top,28600.0,0.00064,True
freq,1788.0,58744.0,1059000
