In [1]:
import os
import time
import json

import pandas as pd
import ccxt

import yahoo_fin.stock_info as yahoo
from progressbar import progressbar

In [2]:
class GetPairs:
    ''' This class collects all names of the top trading pairs'''
    
    def __init__(self):
        self.path = "data/market_cap"

    def _clean_list(self, dirty_list):
        ''' Remove items in list that contain special characters or lowercase letters'''

        clean_list = dirty_list.copy()
    #     clean_list = [str(i) for i in clean_list]
        dirty_items = [".", ",", '/', "$", "%", "!", "@"]

        for item in dirty_list:
            for dirty_item in dirty_items:
                try:
                    if dirty_item in item:
                        clean_list.remove(item)
                except Exception as e:
                    pass

            if item[1].islower() or item[1].isnumeric():
                try:
                    clean_list.remove(item)
                except Exception as e:
                    pass

        return clean_list

    def get_historic_top_crypto(self):
        ''' Returns top crypto currencies by market cap since 2016 (dependent on the data in path)
            
            The historic crypto data is a bit dirty and has to be cleaned with the _clean_list
            function. 
            
            returns:
                pair_list_clean: list - List of historical trading pairs
        '''

        pair_set = set()

        for filename in os.listdir(self.path):
            try:
                df_temp = pd.read_csv(f"data/market_cap/{filename}")
                pair_set = set.union(pair_set, set(df_temp['Ticker']))
            except KeyError:
                pass

        pair_list_dirty = list(pair_set)
        pair_list_dirty = [str(i) for i in pair_list_dirty]
        pair_list_clean = self._clean_list(pair_list_dirty)

        return pair_list_clean
    
    def get_current_top_crypto(self):
        ''' Get current 100 crypto trading pairs'''
        # Automatische check toevoegen om te zien of de ticker al in de lijst staat.
        top100 = yahoo.get_top_crypto()

        return list(top100['Symbol'])
    

In [3]:
GetPairs().get_historic_top_crypto()


['MOF-USD',
 'SALT-USD',
 'ETC-USD',
 'SUB-USD',
 'BSV-USD',
 'BEAM-USD',
 'MATIC-USD',
 'ADK-USD',
 'GRC-USD',
 'ETHOS-USD',
 'ODE-USD',
 'BTT-USD',
 'KNC-USD',
 'KBC-USD',
 'OBITS-USD',
 'TRIG-USD',
 'XCP-USD',
 'MANA-USD',
 'KLAY-USD',
 'QORA-USD',
 'DBIC-USD',
 'UBT-USD',
 'DRGN-USD',
 'QRK-USD',
 'HNT-USD',
 'RDD-USD',
 'FC2-USD',
 'NEXO-USD',
 'TUSD-USD',
 'UBQ-USD',
 'EKT-USD',
 'WIN-USD',
 'AAVE-USD',
 'BTCD-USD',
 'MOON-USD',
 'LUNA-USD',
 'HEAT-USD',
 'ATOM-USD',
 'ONT-USD',
 'XRB-USD',
 'TIPS-USD',
 'NAS-USD',
 'LOOM-USD',
 'QASH-USD',
 '1ST-USD',
 'YFII-USD',
 'MLN-USD',
 'ARDR-USD',
 'SOAR-USD',
 'LRC-USD',
 'FIL-USD',
 'CAKE-USD',
 'EOS-USD',
 'QRL-USD',
 '3-USD',
 'WDC-USD',
 'GTO-USD',
 'AUDIO-USD',
 'WAXP-USD',
 'BRX-USD',
 'EDG-USD',
 'AMP-USD',
 'ALPHA-USD',
 'FET-USD',
 'DNT-USD',
 'RADS-USD',
 'XPA-USD',
 'ELA-USD',
 'AE-USD',
 'VLX-USD',
 'SAI-USD',
 'SLR-USD',
 'SWT-USD',
 'CRPT-USD',
 'ENRG-USD',
 'TIME-USD',
 'ABBC-USD',
 'STEEM-USD',
 'SAR-USD',
 'EXP-USD',
 '

In [4]:
timeframe = '1h'
exchange = 'kraken'
pair_name = 'BTC/USD'
timestamp_name = '1645450000'
save_directory = (f'data/request_{timeframe}'
                               f'/{exchange}_{pair_name}_{timestamp_name}.csv')
save_directory


'data/request_1h/kraken_BTC/USD_1645450000.csv'

In [5]:
def download_binance_data(pair, timeframe='1h', since=None):
    
    binance = ccxt.binance()
#     pair = 'BTC/USDT'
    data = binance.fetch_ohlcv(pair, timeframe='1h', since=since)
    columns = ["Date", "Open", "High", "Low", "Close", "Volume"]

    df = pd.DataFrame.from_records(data, columns=columns)
    
#     pair_name = pair.replace("/", "-")
#     since_name = int(since/10000)
#     df.to_csv(f'data/request_{timeframe}/{pair_name}_{since_name}.csv', index=False)
    
    return df
    
pair = "BTC/USDT"
timeframe = "1h"
since = "1634774400000"

download_binance_data(pair, timeframe, since=None)

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,1638626400000,47387.01,47872.47,47251.02,47828.29,3265.95713
1,1638630000000,47828.28,48573.78,47752.40,48203.73,4313.45686
2,1638633600000,48203.74,48480.51,47742.09,47842.97,2980.03624
3,1638637200000,47842.98,48388.00,47779.71,48241.05,1840.32245
4,1638640800000,48241.04,48877.00,48154.71,48866.12,2315.71546
...,...,...,...,...,...,...
495,1640408400000,50886.42,51010.42,50858.56,50886.99,484.58350
496,1640412000000,50886.99,50986.23,50656.43,50965.88,640.70759
497,1640415600000,50965.89,51084.13,50822.17,50919.17,782.58942
498,1640419200000,50919.18,50968.57,50789.00,50886.35,643.88073


In [6]:
1640188800000 - 1638392400000


1796400000

In [7]:
class FetchCryptoData:
    ''' This class requests data from the API of a specific exchange. 
        The API call will return the OHLCV data for a specified crypto trading pair (e.g. BTC/USD)
        
        params:
            pair: list - List trading pairs that need to be looked up
            timeframe: string - Time interval for data (e.g. 1m, 5m, 15m, 1h, 1d, 1wk, 1mo)
            start: int - Timestamp for the first record in the requested data
            req_number: int - Number of requests
            exchange: string - Name of the exchange (in lowercase)
            
    '''
    
    
    def __init__(self, pairs, timeframe, start, reduction, req_number, exchange):
        self.pairs = pairs
        self.timeframe = timeframe
        self.start = start
        self.reduction = reduction
        self.req_number = req_number
        self.exchange = exchange
        
        self.usdt_exchange = ['binance', 'huobi', 'kucoin', 'bitstamp'
                              'bittrex_usdt', 'gateio', 'okex'
                             ]
        
        self.columns = ["Date", "Open", "High", "Low", "Close", "Volume"]
#         self.check_df = pd.DataFrame(columns=self.columns)

        
    def __get_exchange(self):
        ''' Get exchange API function'''
        exchanges = {
            "binance": ccxt.binance(),
            "ftx": ccxt.ftx(),
            "kraken": ccxt.kraken(),
            "bitfinex": ccxt.bitfinex(), # max 100 rows
            "huobi": ccxt.huobi(), # 
            "gateio": ccxt.gateio(), # max 100 rows
            "kucoin": ccxt.kucoin(),
            "bittrex": ccxt.bittrex(),
            "bittrex_usdt": ccxt.bittrex(), #USD en USDT
            "coinbase": ccxt.coinbase(),
            "coinbasepro": ccxt.coinbasepro(),
            "bybit": ccxt.bybit(),
            "bitstamp": ccxt.bitstamp(),
            "gemini": ccxt.gemini(),
            "okex": ccxt.okex(),
            "hitbtc", ccxt.hitbtc()
                    }
        return exchanges[self.exchange]
    
    def __change_pairs(self):
        ''' Changes trading pair name from */USD to */USDT'''
        # Moet logica inbouwen om USDTT te voorkomen
        return [pair.replace("USD", "USDT") for pair in self.pairs]
        
    def _download_data(self, pair, timestamp):
        ''' Downloads and saves trading pair data'''
        exchange = self.__get_exchange()
        
        data = exchange.fetch_ohlcv(pair, self.timeframe, timestamp)
        df = pd.DataFrame.from_records(data, columns=self.columns)
        
        pair_name = pair.replace("/", "-")
        timestamp_name = int(timestamp/10000)
        self.save_directory = (f'data/request_{self.timeframe}'
                               f'/{self.exchange}_{pair_name}_{timestamp_name}.csv')

        return df
    
    def request_data(self):
        ''' Calls the _download_data function for a specified pair and timestamp. 
            
            If the request fails, the name of the failed pair is returned as the "failed" list. 
            This list can later be used to try and get the data from another exchange
            
            returns:
                success: list - A list of all the trading pairs that are downloaded
                failed: list - A list of all the trading pairs that failed to download
                timestamp_dict: dict - A dictionary of the earliest succesfully downloaded trading pair
        '''
        
        # Some exchanges only have */USDT trading pairs
        if self.exchange in self.usdt_exchange: 
            self.pairs = self.__change_pairs()
            
        pairs_slash = [x.replace("-", "/") for x in self.pairs]
        timestamp = self.start

        success = []
        failed = []
        timestamp_dict = {}
        error_dict = {}

        for pair in pairs_slash:
            try:
                for req in range(self.req_number):
                    time.sleep(10)
                    df = self._download_data(pair, timestamp)
                    print(pair)
                    print(df.head(5))
#                     if df.shape[0]<1:
#                         error = f"Maximum amount of avalaible {pair} data retrieved from {self.exchange}"
#                         error_dict[pair] = error
#                         failed.append(pair)
#                         break

                    df.to_csv(self.save_directory, index=False)

                    timestamp_dict[pair] = timestamp
                    timestamp -= self.reduction                    

    #             if df.values.all() != self.check_df.values.all(): # beetje omslagtig. Alternatief is om een self variable aan te maken
                success.append(pair)
                    
            # Error handling verbeteren
            except ccxt.BadSymbol as bs:
                print(f"Could not download {pair}")
                print(bs)
                error_dict[pair] = str(bs)
                failed.append(pair)
                pass
            except ccxt.RateLimitExceeded as re:
                print(f"Too Many Requests {pair}")
                print(re)
                error_dict[pair] = str(re)
                pass
            except Exception as e:
                print(f"Error: {pair}")
                print(e)
                error_dict[pair] = str(e)
                pass
            
        return success, failed, timestamp_dict, error_dict

    # Logica inbouwen om duplicates te voorkomen

SyntaxError: invalid syntax (<ipython-input-7-f3bfcced91fd>, line 49)

In [None]:
# path = 'data/market_cap'
# pairs = ['ETH/USD']


# crypto = FetchCryptoData(pairs, timeframe, start, reduction, req_number, exchange)
# success, failed, final_timestamp = crypto.request_data()


def search_exchanges(exchanges, timeframe, start, reduction, req_number):
    ''' Loops through list of exchanges to find data for all pairs'''
    
    pairs = GetPairs().get_current_top_crypto()
    logs = {}
    
    for exchange in progressbar(exchanges):
        crypto = FetchCryptoData(pairs, timeframe, start, reduction, req_number, exchange)
        success, failed, final_timestamp, error_dict = crypto.request_data()
        pairs = failed
        logs[exchange] = {
            "success": success, 
            "failed": failed, 
            "final_timestamp": final_timestamp,
            "error_dict": error_dict
                         }
        
        # Hier kan nog ingebouwd worden dat er een request wordt gemaakt
        # voor onvolledige data van een bepaalde ticker dmv een dubbele for loop
    
    return logs


In [None]:
timeframe = "1h"
start = 1640332800000 #1640188800000
reduction = 356400000 #1796400000
req_number = 5


exchanges = ["binance","ftx","kraken","bitfinex","huobi",  
            "gateio", "kucoin","bittrex","bittrex_usdt", 
            "coinbase","coinbasepro", "bybit","bitstamp",
            "gemini","okex","hitbtc"]

logs = search_exchanges(exchanges, timeframe, start, reduction, req_number)

In [None]:
logs

In [None]:
import json

with open("data/request_1h/logs.json", "w") as fp:
    json.dump(logs, fp, indent=4)
    
# for element in logs:
#     textfile.write(element + "\n")
# textfile.close()

In [None]:
def save_json(path, filename, logs):
    with open(f"{path}/{filename}.json", "w") as fp:
        json.dump(logs, fp, indent=4)

In [None]:
def download_general_data(pair, timeframe='1h', since=None):
    
    ftx = ccxt.gateio()
#     pair = 'BTC/USDT'
    data = ftx.fetch_ohlcv(pair, timeframe='1h', since=since)
    columns = ["Date", "Open", "High", "Low", "Close", "Volume"]

    df = pd.DataFrame.from_records(data, columns=columns)
    
    return df

timeframe = "1h"
start = 1634774400000
reduction = 5400000000
req_number = 11

download_general_data('LTC/USD', timeframe='1h', since=None)

In [None]:
1640332800000 - 1639976400000

In [None]:
def download_ftx_data(pair, timeframe='1h', since=None):
    
    ftx = ccxt.ftx()
#     pair = 'BTC/USDT'
    data = ftx.fetch_ohlcv(pair, timeframe='1h', since=since)
    columns = ["Date", "Open", "High", "Low", "Close", "Volume"]

    df = pd.DataFrame.from_records(data, columns=columns)
    
    pair_name = pair.replace("/", "-")
    since_name = int(since/10000)
    df.to_csv(f'data/request_{timeframe}/{pair_name}_{since_name}.csv', index=False)
    
def request_data(pairs, timeframe, start, reduction, req_number):
    
    pairs_slash = [x.replace("-", "/") for x in pairs]
    since = start
    
    success = []
    failed = []
    
    for pair in progressbar(pairs_slash):
        try:
            for req in range(req_number):

                download_ftx_data(pair, timeframe, since)
                since -= reduction
            success.append(pair)
        except Exception as e:
            print(f"Could not download {pair}")
            print(e)
            failed.append(pair)
    
    return success, failed



path = 'data/market_cap'
pairs = get_historic_top_crypto(path)
timeframe = "1h"
start = 1634774400000
reduction = 5400000000
req_number = 11

success, fail = request_data(pairs, timeframe, start, reduction, req_number)

# Refactor functions into class
# Create function to merge all files together
# Store names of pairs that could not be downloaded
# Downloaded van weekelijkse top 100. Mogelijk verkrijgbaar uit maandelijkse marketcap data.


            

In [None]:
current_top_100 = set(get_top_crypto()['Symbol'])
fail_list = [x.replace("/","-") for x in fail]
missing_crypto = list(current_top_100.intersection(set(fail_list)))
len(missing_crypto)

In [None]:
import ccxt

binance = ccxt.ftx()
binance.fetch_currencies()
# binance.fetch_trading_fees()


In [None]:
# path = 'data/market_cap'
# pairs = get_top_crypto(path)
# pairs

In [None]:
# import requests
# import pandas as pd
# import ftplib
# import io
# import re
# import json
# import datetime

# try:
#     from requests_html import HTMLSession
# except Exception:
#     print("""Warning - Certain functionality 
#              requires requests_html, which is not installed.
             
#              Install using: 
#              pip install requests_html
             
#              After installation, you may have to restart your Python session.""")

# def get_top_crypto_yahoo():
    
#     '''Gets the top 100 Cryptocurrencies by Market Cap'''      

#     session = HTMLSession()
    
#     resp = session.get("https://finance.yahoo.com/cryptocurrencies?offset=0&count=100")
    
#     tables = pd.read_html(resp.html.raw_html)               
                    
#     df = tables[0].copy()

    
#     df["% Change"] = df["% Change"].map(lambda x: float(str(x).strip("%").\
#                                                                strip("+").\
#                                                                replace(",", "")))
#     del df["52 Week Range"]
#     del df["1 Day Chart"]
    
#     fields_to_change = [x for x in df.columns.tolist() if "Volume" in x \
#                         or x == "Market Cap" or x == "Circulating Supply"]
    
#     for field in fields_to_change:
        
#         if type(df[field][0]) == str:
#             df[field] = df[field].map(lambda x: _convert_to_numeric(str(x)))
            
            
#     session.close()        
                
#     return df

# def force_float(elt):
    
#     try:
#         return float(elt)
#     except:
#         return elt

# def _convert_to_numeric(s):

#     if "M" in s:
#         s = s.strip("M")
#         return force_float(s) * 1_000_000
    
#     if "B" in s:
#         s = s.strip("B")
#         return force_float(s) * 1_000_000_000
    
#     return force_float(s)

In [None]:
# def get_tickers():
    
#     # Automatische check toevoegen om te zien of de ticker al in de lijst staat.
#     top100 = get_top_crypto()
    
#     return list(top100['Symbol'])
# get_tickers()

In [None]:
# kucoin = ccxt.kucoin()

# data = kucoin.fetch_ohlcv('BTC/USDT', timeframe='1h', since=1634608800000)
# columns = ["Date", "Open", "High", "Low", "Close", "Volume"]

# df = pd.DataFrame.from_records(data, columns=columns)
# df
# df['Date'] = pd.to_datetime(df['Date'], unit='ms')

In [None]:
# df = pd.DataFrame.from_records(data, columns=columns)
# df['Date'] = pd.to_datetime(df['Date'], unit='ms')
# df

In [None]:
# import time
# from progressbar import progressbar

# def download_data(pair, timeframe='1h', since=1634608800000):
    
#     kucoin = ccxt.kucoin()

#     data = kucoin.fetch_ohlcv(pair, timeframe=timeframe, since=since)
#     columns = ["Date", "Open", "High", "Low", "Close", "Volume"]
    
#     df = pd.DataFrame.from_records(data, columns=columns)
    
#     timestamp = since - 5396400000
    
#     for i in progressbar(range(100)):
        
#         time.sleep(5)
#         temp_data = kucoin.fetch_ohlcv(pair, timeframe=timeframe, since=timestamp)
#         df_temp = pd.DataFrame.from_records(temp_data, columns=columns)
#         df = pd.concat([df_temp, df])
#         timestamp = timestamp - 5396400000
        
#     df['Date'] = pd.to_datetime(df['Date'], unit='ms')
    
#     return df
        
# df_test = download_data("BTC/USDT", timeframe='1h', since=1634608800000) 
# df_test

In [None]:
# 1640174400000 - 1634774400000

In [None]:
# 1634774400000 - 5400000000

In [None]:
# test = 100
# for i in range(5):
#     print(test)
#     test -= 1

In [None]:
# df['Date'] = pd.to_datetime(df['Date'], unit='ms')
# df

In [None]:
# import time

# # def download_data(ticker, timeframe='1h', since=None):
    
# kraken = ccxt.kraken()
# pair = 'BTC/USDT'
# data = kraken.fetch_ohlcv(pair, timeframe='1h', since=None)
# columns = ["Date", "Open", "High", "Low", "Close", "Volume"]

# df = pd.DataFrame.from_records(data, columns=columns)
# df
      

In [None]:
# df['Date'] = pd.to_datetime(df['Date'], unit='ms')
# df

In [None]:
# import time

# def download_data(ticker, timeframe='1h', since=None):
    
#     kraken = ccxt.kraken()

#     data = kraken.fetch_ohlcv(pair, timeframe=timeframe, since=since)
#     columns = ["Date", "Open", "High", "Low", "Close", "Volume"]
    
#     df = pd.DataFrame.from_records(data, columns=columns)
# #     df['Date'] = pd.to_datetime(df['Date'], unit='ms')
    
    
#     latest_datetime = df.Date.iloc[0]
#     increment = 5396400000
#     error =  False
#     timestamp = None
#     i = 0 
    
#     while error == False:
#         time.sleep(5)
# #         try:

#         temp_data = kraken.fetch_ohlcv(pair, timeframe=timeframe, since=timestamp)
#         df_temp = pd.DataFrame.from_records(temp_data, columns=columns)
#         print('Raar')
#         print("timestamp:", timestamp)
#         print(temp_data)
# #         print(df_temp.Date.values)
# #         print(df_temp.Date.values[0])
#         timestamp = df_temp.Date.values[0] - increment
#         df = pd.concat([df, df_temp])
#         print(i)
#         i += 1
            
# #         except Exception as e:
# #             error=True
# #             print(e)
        
#     df['Date'] = pd.to_datetime(df['Date'], unit='ms')
        
#     return df

In [None]:
# 1634616000000 - 5396400000

In [None]:
# kucoin.fetch_ohlcv('BTC/USDT', timeframe='1h', since=1629219600000)

In [None]:
# pair = 'BTC/USDT'
# download_data(pair, timeframe='1h', since=None)