# Binance market data collection

### Imports of librairies

In [46]:
#Used to connect to the binance API
from binance.client import Client

#Used to save the data collected into a python object
import pandas as pd

#Used to pause the script regularly when collecting binance data to deal with the call limits
import time

### Parameters

In [47]:
#Choose the keys to connect to the binance client
api_key='API_KEY'
private_key='PRIVATE_KEY'

#Choose the crypto that you want to collect
base_crypto = ["APE", "BTC", "ETH"]
quote_crypto = 'BUSD'

#Choose the period you want to have (format : "YYYY-MM-DD")
start_date = "2019-03-25"
end_date = "2022-10-22"

#Choose the columns names and types
column_names = [
    'Date',
    'Open',
    'High',
    'Low',
    'Close',
    'Volume',
    'Kline_Close_time',
    'Quote_asset_volume',
    'number_of_trade',
    'Taker_buy_base_asset_volume',
    'Taker_buy_quote_asset_volume',
    'Unused field'
    ]

column_types = {
    'Open': 'float',
    'High': 'float',
    'Low': 'float',
    'Close': 'float',
    'Volume': 'float',
    'Kline_Close_time': 'int',
    'Quote_asset_volume': 'float',
    'number_of_trade': 'int',
    'Taker_buy_base_asset_volume': 'float',
    'Taker_buy_quote_asset_volume': 'float',
    'Unused field': 'float'
    }

#Build the URL path and the file name
BASE_URL = "data/"
BASE_FILENAME = "_historical_klines.csv"

### Functions to build the crypto pairs and the list of dates

In [48]:
#Generate the crypto pairs
def crytoPairs(base_crypto, quote_crypto):
    crypto_pairs = []
    for base in base_crypto:
        crypto_pairs.append(base+quote_crypto)
    
    return crypto_pairs

#Generate all the dates between the startdate and the endate to facilitates the calls to the Binance API
def dateList(start_date, end_date):
    dateArr = []
    months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
    start_year = int(start_date[0:4])
    end_year = int(end_date[0:4])
    start_month = int(start_date[5:7])
    end_month = int(end_date[5:7])
    start_day = start_date[8:10]
    end_day = end_date[8:10]

    for year in range(start_year, end_year+1):
        if not dateArr:
            dateArr.append(start_day + " " + months[start_month-1] + ", " + str(year))
        
        if len(dateArr)!=0:
            dateArr.append('1 jan, ' + str(year))
        
        if year == end_year:
            dateArr.append(end_day + " " + months[end_month-1] + ", " + str(year))
    
    return dateArr

### Build the crypto pairs and the list of dates

In [49]:
#Build the crypto pairs
crypto_pairs = crytoPairs(base_crypto, quote_crypto)

#Build the list of dates
dates = dateList(start_date, end_date)

### Authentification

In [50]:
client = Client(api_key, private_key)

### Function to get the historical Klines

In [51]:
#Function to collect the klines data from all the important coins with the coin that I want to study
def getHistoricalKlinesData(crypto_pairs, dates, column_names, column_types):
    
    pairs_data = {}
    
    #Collecter les données depuis l'API
    for pairs in crypto_pairs:
        
        i = 0
        while i + 1 < len(dates):
            #collecter les données
            candles = client.get_historical_klines(symbol=pairs,
                                                interval=Client.KLINE_INTERVAL_1DAY,
                                                start_str=dates[i],
                                                end_str=dates[i+1])
            
            #stop la boucle pendant 2 seconds pour attendre la fin de la limite de collecte de données sur Binance
            time.sleep(2)
            i += 1
            
        #Rentrer les données dans un dataframe
        df = pd.DataFrame(candles, columns=column_names)
        
        #Convertir chaque colonne dans le bon format
        df['Date'] = pd.to_datetime(df['Date'], unit='ms')
        df = df.astype(column_types)
        
        pairs_data[pairs] = df
    return pairs_data

### Save the raw data collected to a dictionnary of dataframes

In [52]:
dict_df = getHistoricalKlinesData(crypto_pairs, dates, column_names, column_types)

### Save the multiple dataframes to multiple csv files

In [53]:
for coins in crypto_pairs:
    dict_df[coins].to_csv(BASE_URL + coins + BASE_FILENAME, encoding="utf-8")