In [1]:
import pandas as pd
import requests
from datetime import datetime, timedelta
import time

def get_binance_futures_klines(symbol, interval, start_time, end_time):

    endpoint = "https://fapi.binance.com/fapi/v1/klines"
    
    params = {
        "symbol": symbol,
        "interval": interval,
        "startTime": int(start_time.timestamp() * 1000),
        "endTime": int(end_time.timestamp() * 1000),
        "limit": 1000 #max
    }
    
    response = requests.get(endpoint, params=params)
    data = response.json()
    
    return data

def process_klines_data(klines):
  
    df = pd.DataFrame(klines, columns=[
        'timestamp', 'open', 'high', 'low', 'close', 'volume',
        'close_time', 'quote_volume', 'trades_count',
        'taker_buy_volume', 'taker_buy_quote_volume', 'ignore'
    ])
    
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    
    numeric_columns = ['open', 'high', 'low', 'close', 'volume']
    df[numeric_columns] = df[numeric_columns].astype(float)
    
    df['p_n_l'] = ((df['close'] - df['open']) / df['open']) * 100
    
    df = df[['timestamp', 'open', 'close', 'high', 'low', 'volume', 'p_n_l']]
    df = df.rename(columns={'timestamp': 'date'})
    
    return df

def download_full_history(symbol="BTCUSDT", interval="1h"):

    start_date = datetime(2021, 1, 1)
    end_date = datetime.now()
    
    all_data = []
    current_start = start_date
    
    while current_start < end_date:
        current_end = min(current_start + timedelta(days=7), end_date)
        
        print(f"Downloading data from {current_start} to {current_end}")
        
        klines = get_binance_futures_klines(symbol, interval, current_start, current_end)
        all_data.extend(klines)
        
        current_start = current_end
        time.sleep(1) 
    
    df = process_klines_data(all_data)
    return df

def clean_data(df):

    df = df.drop_duplicates()
    
    df = df.sort_values('date')
    
    df = df.reset_index(drop=True)    
    return df

df = download_full_history()
df = clean_data(df)

df.to_csv('./binance_data/usdt_btc_1h.csv', index=False)


Downloading data from 2021-01-01 00:00:00 to 2021-01-08 00:00:00
Downloading data from 2021-01-08 00:00:00 to 2021-01-15 00:00:00
Downloading data from 2021-01-15 00:00:00 to 2021-01-22 00:00:00
Downloading data from 2021-01-22 00:00:00 to 2021-01-29 00:00:00
Downloading data from 2021-01-29 00:00:00 to 2021-02-05 00:00:00
Downloading data from 2021-02-05 00:00:00 to 2021-02-12 00:00:00
Downloading data from 2021-02-12 00:00:00 to 2021-02-19 00:00:00
Downloading data from 2021-02-19 00:00:00 to 2021-02-26 00:00:00
Downloading data from 2021-02-26 00:00:00 to 2021-03-05 00:00:00
Downloading data from 2021-03-05 00:00:00 to 2021-03-12 00:00:00
Downloading data from 2021-03-12 00:00:00 to 2021-03-19 00:00:00
Downloading data from 2021-03-19 00:00:00 to 2021-03-26 00:00:00
Downloading data from 2021-03-26 00:00:00 to 2021-04-02 00:00:00
Downloading data from 2021-04-02 00:00:00 to 2021-04-09 00:00:00
Downloading data from 2021-04-09 00:00:00 to 2021-04-16 00:00:00
Downloading data from 202

KeyboardInterrupt: 