# Initial Test

* Aluno: Artur Chiaperini Grover
* Exchange: CEX ([CEX.IO](https://cex.io))

In [1]:
import pickle
import pandas as pd
import numpy as np
import ccxt
import datetime
import requests
import json
import time
import sqlite3
import sqlalchemy
from IPython.display import clear_output

----
## Function definition

In [2]:
def create_date_list(start, end):
    '''
    create_date_list(start, end):
    returns a list of dates from start to end with format 'YYYYMMDD' and type string.
    
    parameters:
    start -> Starting date with format 'YYYY-MM-DD' (string).
    end   -> Ending date with format 'YYYY-MM-DD' (string).
    '''
    start = datetime.datetime.strptime(start, '%Y-%m-%d')
    end = datetime.datetime.strptime(end, '%Y-%m-%d')
    step = datetime.timedelta(days = 1)
    date_list = list()
    while start <= end:
        date_list.append(str(start.date()).replace('-', ''))
        start += step
        
    return date_list

In [3]:
def create_url_list(symbol1, symbol2, start, end):
    '''
    create_url_list(symbol1, symbol2, start, end):
    returns a list of urls from start to end date of the symbol1/symbol2 transaction.
    
    parameters:
    symbol1 -> Cryptocurrency acronym (string).
    symbol2 -> Currency acronym (string).
    start   -> Starting date with format 'YYYY-MM-DD' (string).
    end     -> Ending date with format 'YYYY-MM-DD' (string).
    '''
    
    date_list = create_date_list(start, end)
    url_list = list()
    for date in date_list:
        url = 'http://cex.io/api/ohlcv/hd/{}/{}/{}'.format(date, symbol1, symbol2)
        url_list.append(url)
        
    return url_list

In [11]:
def get_ohlcv(symbol1, symbol2, start, end, data_rate):
    '''
    get_ohlcv(symbol1, symbol2, start, end, data_rate):
    returns a string with all read fetched data from cex.io for the given pair symbol1/symbol2 between
    the starting and ending dates and with data rate (1m, 1h or 1d).
    
    Function has a one second sleep to avoid exploding rate limit of exchange (cex.io).
    Cex rate limit is 600 requests per 10 minutes.
    
    parameters:
    symbol1   -> Cryptocurrency acronym (string).
    symbol2   -> Currency acronym (string).
    start     -> Starting date with format 'YYYY-MM-DD' (string).
    end       -> Ending date with format 'YYYY-MM-DD' (string).
    data_rate -> Data rate, cex.io gives the following options: 'data1m', 'data1h' or 'data1d'.
    '''
    
    url_list = create_url_list(symbol1, symbol2, start, end)
    # time_sleep: guarantees that no more than 1 request are done per second.
    time_sleep = 1.15
    print('Number of urls = {}'.format(len(url_list)))
    print('Time sleep = {}'.format(time_sleep))
    count_input = 0
    data_ohlcv_list = []
    for url in url_list:
        print('Reading {} -----'.format(url))
        ohlcv = eval(requests.get(url).json()[data_rate])
        data_ohlcv_list.append(ohlcv)
        print('Fetch input data size {}'.format(len(ohlcv)))
        count_input += len(ohlcv)
        print('Finished -----')
        time.sleep(time_sleep)
        
    print('Total number of inputs {}'.format(count_input))
    return str([item for sublist in data_ohlcv_list for item in sublist])

----
## Fetching data from Exchange and Saving Pickle File

In [5]:
%%time
# cex_eth_usd = get_ohlcv('ETH', 'USD', '2017-01-22', '2018-01-22', 'data1m')

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.91 µs


In [6]:
# file_ohlcv_eth_usd = "ohlcv_eth_usd.pkl"
# pickle.dump(cex_eth_usd, open(file_ohlcv_eth_usd, "wb"))

In [None]:
%%time
cex_eth_eur = get_ohlcv('ETH', 'EUR', '2017-01-22', '2018-01-22', 'data1m')

Number of urls = 366
Time sleep = 1.1
Reading http://cex.io/api/ohlcv/hd/20170122/ETH/EUR -----
Fetch input data size 11
Finished -----
Reading http://cex.io/api/ohlcv/hd/20170123/ETH/EUR -----
Fetch input data size 6
Finished -----
Reading http://cex.io/api/ohlcv/hd/20170124/ETH/EUR -----
Fetch input data size 7
Finished -----
Reading http://cex.io/api/ohlcv/hd/20170125/ETH/EUR -----
Fetch input data size 8
Finished -----
Reading http://cex.io/api/ohlcv/hd/20170126/ETH/EUR -----
Fetch input data size 7
Finished -----
Reading http://cex.io/api/ohlcv/hd/20170127/ETH/EUR -----
Fetch input data size 12
Finished -----
Reading http://cex.io/api/ohlcv/hd/20170128/ETH/EUR -----
Fetch input data size 14
Finished -----
Reading http://cex.io/api/ohlcv/hd/20170129/ETH/EUR -----
Fetch input data size 11
Finished -----
Reading http://cex.io/api/ohlcv/hd/20170130/ETH/EUR -----
Fetch input data size 9
Finished -----
Reading http://cex.io/api/ohlcv/hd/20170131/ETH/EUR -----
Fetch input data size 10
Fi

Fetch input data size 38
Finished -----
Reading http://cex.io/api/ohlcv/hd/20170416/ETH/EUR -----
Fetch input data size 36
Finished -----
Reading http://cex.io/api/ohlcv/hd/20170417/ETH/EUR -----
Fetch input data size 32
Finished -----
Reading http://cex.io/api/ohlcv/hd/20170418/ETH/EUR -----
Fetch input data size 51
Finished -----
Reading http://cex.io/api/ohlcv/hd/20170419/ETH/EUR -----
Fetch input data size 53
Finished -----
Reading http://cex.io/api/ohlcv/hd/20170420/ETH/EUR -----
Fetch input data size 35
Finished -----
Reading http://cex.io/api/ohlcv/hd/20170421/ETH/EUR -----
Fetch input data size 62
Finished -----
Reading http://cex.io/api/ohlcv/hd/20170422/ETH/EUR -----
Fetch input data size 34
Finished -----
Reading http://cex.io/api/ohlcv/hd/20170423/ETH/EUR -----
Fetch input data size 43
Finished -----
Reading http://cex.io/api/ohlcv/hd/20170424/ETH/EUR -----
Fetch input data size 54
Finished -----
Reading http://cex.io/api/ohlcv/hd/20170425/ETH/EUR -----
Fetch input data siz

In [8]:
file_ohlcv_eth_eur = 'ohlcv_eth_eur.pkl'
pickle.dump(cex_eth_eur, open(file_ohlcv_eth_eur, "wb"))

NameError: name 'cex_eth_eur' is not defined

----
## Renaming and Correcting Datetime

In [None]:
loaded_ohlcv_eth_usd = pickle.load(open('ohlcv_eth_usd.pkl', "rb"))

In [None]:
df = pd.read_json(loaded_ohlcv_eth_usd)
df.columns = ['date', 'open', 'high', 'low', 'close', 'volume']

In [None]:
format_date = lambda x: datetime.datetime.fromtimestamp(x) 
df['date'] = pd.to_datetime(df['date'].apply(format_date))

In [None]:
df.set_index('date', inplace = True)

In [None]:
df