## Config

In [114]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
from common import *
import qgrid

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [117]:
df = pd.read_csv('polienix_btc_usd.csv')
epochsec = [datetime.datetime.fromtimestamp(t) for t in df['date'].values]
df = df.assign(time_utc = epochsec)
df.sort_values(by='time_utc', ascending=True, inplace=True)

In [118]:
df

Unnamed: 0,date,high,low,open,close,volume,quoteVolume,weightedAverage,time_utc
0,1424372400,0.330000,225.000000,0.330000,225.000000,9.999990e-01,0.004444,225.000000,2015-02-19 11:00:00
1,1424374200,225.000000,225.000000,225.000000,225.000000,0.000000e+00,0.000000,225.000000,2015-02-19 11:30:00
2,1424376000,240.000000,225.000000,225.000000,240.000000,3.062465e+01,0.128625,238.092076,2015-02-19 12:00:00
3,1424377800,244.000000,244.000000,244.000000,244.000000,1.465123e+01,0.060046,244.000000,2015-02-19 12:30:00
4,1424379600,244.000000,244.000000,244.000000,244.000000,0.000000e+00,0.000000,244.000000,2015-02-19 13:00:00
5,1424381400,244.000000,244.000000,244.000000,244.000000,0.000000e+00,0.000000,244.000000,2015-02-19 13:30:00
6,1424383200,244.000000,244.000000,244.000000,244.000000,0.000000e+00,0.000000,244.000000,2015-02-19 14:00:00
7,1424385000,244.000000,244.000000,244.000000,244.000000,4.367600e-04,0.000002,244.000000,2015-02-19 14:30:00
8,1424386800,244.000000,244.000000,244.000000,244.000000,0.000000e+00,0.000000,244.000000,2015-02-19 15:00:00
9,1424388600,244.000000,244.000000,244.000000,244.000000,0.000000e+00,0.000000,244.000000,2015-02-19 15:30:00


## GDAX

In [None]:
# https://github.com/danpaquin/gdax-python
# https://docs.gdax.com

# Use the sandbox API (requires a different set of API access credentials)
gdax_client = gdax.AuthenticatedClient(cfg.GDAX_API_KEY, cfg.GDAX_API_SECRET_KEY, 
                                       cfg.GDAX_PASSPHRASE, api_url=cfg.GDAX_ENDPOINT)
gdax_client.get_time()

### Historical Prices

In [None]:
# Default request is 400 minutes (~7 hours), 1 minute gap
# ~4 requests to get 24 hours of data
# 1460 requests for 1 year, ~3000 requests for 2 years
def write_to_df(data, fpath):
    if os.path.exists(fpath):
        df = pd.read_csv(fpath)
        data = pd.DataFrame(data, columns=PRICE_COLUMNS)
        df.set_index('time')
        df.sort_values(by='time', ascending=True, inplace=True)
        df = pd.concat([df, data]).drop_duplicates().reset_index(drop=True)
    else:
        df = pd.DataFrame(data, columns=PRICE_COLUMNS)
        df.set_index('time')
        df.sort_values(by='time', ascending=True, inplace=True)
    df.to_csv(fpath, index=False)
    return df

def get_data(currency_pair, start_time, end_time, timestep_sec):
#     print("Start", start_utc.isoformat())
#     print("End", end_utc.isoformat())
    delta = end_time - start_time
#     print("Years", round(delta.days/365,2))
#     print("Days", delta.days)
#     print("Hours", delta.days*24)
#     print("Minutes", delta.days*24*60)
#     print("Seconds", round(delta.total_seconds()))
    data = gdax_client.get_product_historic_rates(
        currency_pair, start=start_time.isoformat(), 
        end=end_time.isoformat(), granularity=timestep_sec)
    return data

def get_all_data(pair, start_utc, end_utc, timesteps_per_request, timestep_sec, outfpath):
    start_time = datetime.datetime.strptime(
        start_utc, '%Y-%m-%dT%H:%M:%SZ')#.astimezone(datetime.timezone.utc)
    end_time = datetime.datetime.strptime(
        end_utc, '%Y-%m-%dT%H:%M:%SZ')#.astimezone(datetime.timezone.utc)
    cur_time = start_time
    time_delta = datetime.timedelta(
        seconds=timesteps_per_request*timestep_sec)
    n_records = 0
    retry = 0
    while cur_time < end_time and retry < 10:
        try:
            data = get_data(pair, cur_time, cur_time+time_delta, timestep_sec)
            data = np.array(data)
            last_time = datetime.datetime.fromtimestamp(np.max(data[:,0]))
            if last_time < cur_time:
                break
            print("Records", n_records, "Start:", cur_time, "End:", last_time)
            cur_time = datetime.datetime.fromtimestamp(
                data[0][0] + timestep_sec)
            df = write_to_df(data, outfpath)
            n_records += len(data)
            retry = 0
        except Exception as e:
            retry += 1
            print("Error! Retrying!", e)
            traceback.print_exc()
        finally:
            time.sleep(1)
    return df

In [None]:
START_UTC = '2015-02-01T00:00:00Z'
END_UTC = '2017-12-31T00:00:00Z'
PRICE_COLUMNS = ['time', 'low', 'high', 'open', 'close', 'volume']
TIMESTEP_INTERVAL = 1800
TIMESTEPS_PER_REQUEST = 100
PRODUCT = c.BTC_USD
EXCHANGE = c.GDAX
PRICE_FNAME = '{:s}_{:s}_{:d}.csv'.format(EXCHANGE, PRODUCT, TIMESTEP_INTERVAL)
PRICE_FPATH = os.path.join(cfg.DATA_DIR, PRICE_FNAME)
PRICE_FPATH

In [113]:
# Test
# Older time periods may not work (looks like they keep 2 years of data?)
START_UTC = '2017-01-01T00:00:00Z'
END_UTC = '2017-12-31T00:00:00Z'
start_time = datetime.datetime.strptime(
    START_UTC, '%Y-%m-%dT%H:%M:%SZ')#.astimezone(datetime.timezone.utc)
time_delta = datetime.timedelta(
    seconds=TIMESTEPS_PER_REQUEST*TIMESTEP_INTERVAL)
end_time = start_time + time_delta
start_time, end_time
PRODUCT = c.LTC_USD
gdax_client.get_product_historic_rates(
    PRODUCT, start=start_time.isoformat(), end=end_time.isoformat(),
    granularity=TIMESTEP_INTERVAL)

[[1483407000, 4.61, 4.61, 4.61, 4.61, 1.1817330000000001],
 [1483405200, 4.58, 4.58, 4.58, 4.58, 1],
 [1483403400, 4.56, 4.56, 4.56, 4.56, 1.40379],
 [1483401600, 4.55, 4.56, 4.56, 4.55, 3.57027],
 [1483399800, 4.56, 4.62, 4.56, 4.62, 24.97],
 [1483398000, 4.56, 4.56, 4.56, 4.56, 9.799999999999999],
 [1483396200, 4.59, 4.65, 4.65, 4.59, 1.939751],
 [1483394400, 4.54, 4.65, 4.54, 4.65, 1.73237],
 [1483392600, 4.57, 4.57, 4.57, 4.57, 79.87560599999999],
 [1483390800, 4.51, 4.51, 4.51, 4.51, 10],
 [1483389000, 4.49, 4.85, 4.8, 4.85, 74.132073],
 [1483387200, 4.45, 4.57, 4.51, 4.57, 1018.9581440000001],
 [1483385400, 4.53, 4.56, 4.56, 4.53, 110.818348],
 [1483383600, 4.65, 4.65, 4.65, 4.65, 0.044575],
 [1483381800, 4.64, 4.73, 4.67, 4.64, 163.98004027],
 [1483380000, 4.65, 4.69, 4.65, 4.69, 2.455591],
 [1483378200, 4.64, 4.7, 4.69, 4.64, 159.170762],
 [1483376400, 4.69, 4.74, 4.72, 4.69, 537.94368584],
 [1483374600, 4.75, 4.79, 4.79, 4.75, 145.795263],
 [1483372800, 4.67, 4.74, 4.67, 4.74,

In [None]:
# Fetch
df = get_all_data(PRODUCT, START_UTC, END_UTC, TIMESTEPS_PER_REQUEST, TIMESTEP_INTERVAL, PRICE_FPATH)

In [None]:
PRODUCT = c.ETH_USD
PRICE_FNAME = '{:s}_{:s}_{:d}.csv'.format(EXCHANGE, PRODUCT, TIMESTEP_INTERVAL)
PRICE_FPATH = os.path.join(cfg.DATA_DIR, PRICE_FNAME)
df = get_all_data(PRODUCT, START_UTC, END_UTC, TIMESTEPS_PER_REQUEST, TIMESTEP_INTERVAL, PRICE_FPATH)

In [None]:
PRODUCT = c.LTC_USD
PRICE_FNAME = '{:s}_{:s}_{:d}.csv'.format(EXCHANGE, PRODUCT, TIMESTEP_INTERVAL)
PRICE_FPATH = os.path.join(cfg.DATA_DIR, PRICE_FNAME)
df = get_all_data(PRODUCT, START_UTC, END_UTC, TIMESTEPS_PER_REQUEST, TIMESTEP_INTERVAL, PRICE_FPATH)

In [None]:
start_utc = '2017-12-23T00:00:00Z'
end_utc = '2017-12-31T00:00:00Z'
df = get_all_data(PRODUCT, start_utc, end_utc, TIMESTEPS_PER_REQUEST, TIMESTEP_INTERVAL, PRICE_FPATH)

In [None]:
s = datetime.datetime.fromtimestamp(1514641260)
next_ = datetime.datetime.fromtimestamp(1514641320)
e = datetime.datetime.fromtimestamp(1514665260)
s,next_,e,len(prices),(e-s).total_seconds()/3600
#datetime.datetime.timestamp(s)

### Load Prices

In [None]:
# https://github.com/bfortuner/computer-vision/blob/master/applied/libraries/PandasQuickstart.ipynb
df = pd.read_csv(PRICE_FPATH)
epochsec = [datetime.datetime.fromtimestamp(t) for t in df['time'].values]
df = df.assign(time_utc = epochsec)
df.sort_values(by='time_utc', ascending=True, inplace=True)

In [None]:
# Check for NULL
df.isnull().sum()

In [None]:
last_time = df.iloc[-1]['time']
last_record_utc = datetime.datetime.fromtimestamp(last_time)
print(last_record_utc.isoformat())
df.tail()

In [None]:
len(df)

In [None]:
# Query Date Range
start = datetime.datetime(2017, 4, 15, 12, 0)
end = datetime.datetime(2017, 4, 15, 16, 10)
results = df[ (df['time_utc'] >= start) & (df['time_utc'] < end) ]

In [None]:
# Check for missing timesteps
last_time = df.iloc[0]['time_utc']
n_missing = 0
for idx,row in df[1:].iterrows():
    cur_time = row['time_utc']
#     print(cur_time, last_time + datetime.timedelta(seconds=60))
    if cur_time != last_time + datetime.timedelta(seconds=3600):
        n_missing += 1 #(cur_time - last_time).seconds//60
    last_time = cur_time
n_missing

### Plot

In [None]:
# https://github.com/quantopian/qgrid
# https://hub.mybinder.org/user/quantopian-qgrid-notebooks-bu5joi0d/notebooks/index.ipynb
# https://ipywidgets.readthedocs.io/en/stable/examples/Widget%20Basics.html

qgrid_widget = qgrid.QgridWidget(df=df, show_toolbar=True)
qgrid_widget
#qgrid_widget.get_changed_df()

In [None]:
def plot_prices(time, close):
    fig, ax = plt.subplots()
    ax.plot(time, close)

    years = mdates.YearLocator()   # every year
    months = mdates.MonthLocator()  # every month
    yearsFmt = mdates.DateFormatter('%Y')
    monthsFmt = mdates.DateFormatter('%m')
    ax.xaxis.set_major_locator(years)
    ax.xaxis.set_major_formatter(yearsFmt)
    ax.xaxis.set_minor_locator(months)
    ax.xaxis.set_minor_formatter(monthsFmt)

    # datemin = datetime.date(r.date.min().year, 1, 1)
    # datemax = datetime.date(r.date.max().year + 1, 1, 1)
    # ax.set_xlim(datemin, datemax)

    # # format the coords message box
    # def price(x):
    #     return '$%1.2f' % x
    # ax.format_xdata = mdates.DateFormatter('%Y-%m-%d')
    # ax.format_ydata = price
    ax.grid(True)

    # rotates and right aligns the x labels, and moves the bottom of the
    # axes up to make room for them
    fig.autofmt_xdate(rotation=30)
    fig.set_size_inches(12,6)
    plt.show()

def plot_range(df, start, end):
    df = df[ (df['time_utc'] >= start) & (df['time_utc'] < end) ]
    vals = df[['time_utc','close']].values
    plot_prices(vals[:,0], vals[:,1])

vals = df[['time_utc','close']].values
plot_prices(vals[:,0], vals[:,1])
# start = datetime.datetime(2017, 4, 15, 12, 0)
# end = datetime.datetime(2017, 4, 15, 16, 10)
# results = df[ (df['time_utc'] >= start) & (df['time_utc'] < end) ]
# results = results.drop(684239)
# plot_range(results, start, end)

### Exchange Metadata

In [None]:
gdax_client.get_products()
gdax_client.get_currencies()
gdax_client.get_time()

### Current Prices

In [None]:
# Get the order book at the default level.
gdax_client.get_product_order_book('BTC-USD')
# Get the order book at a specific level.
gdax_client.get_product_order_book('BTC-USD', level=1)

In [None]:
# Get the product ticker for a specific product.
gdax_client.get_product_ticker(product_id='ETH-USD')

In [None]:
# Get the product trades for a specific product.
gdax_client.get_product_trades(product_id='ETH-USD')

In [None]:
gdax_client.get_product_24hr_stats('ETH-USD')

### Streaming

In [None]:
class myWebsocketClient(gdax.WebsocketClient):
    def on_open(self):
        self.url = cfg.GDAX_WEBSOCKET
        self.products = [c.BTC_USD]
        self.message_count = 0
        print("Lets count the messages!")
    
    def on_message(self, msg):
        self.message_count += 1
        if 'price' in msg and 'type' in msg:
            print ("Message type:", msg["type"],
                   "\t@ {:.3f}".format(float(msg["price"])))
    
    def on_close(self):
        print("-- Goodbye! --")

        
wsClient = myWebsocketClient()
wsClient.start()
print(wsClient.url, wsClient.products)
while (wsClient.message_count < 500):
    print ("\nmessage_count =", "{} \n".format(wsClient.message_count))
    time.sleep(1)
wsClient.close()

In [None]:
wsClient.close()

## Mashape (Brave New Coin)

### 5 minute-ticks

In [156]:
# https://market.mashape.com/BraveNewCoin/digital-currency-ex-rates#mwa-historic-exchange-rates
# https://bravenewcoin.com/api/digital-currency-exchange-rates/
# http://docs.python-requests.org/en/master/user/quickstart/ 

# Coins
# BTC start = 1396328100 - datetime.datetime(2014, 3, 31, 21, 55)
# ETH start = 1439011500 - datetime.datetime(2015, 8, 7, 22, 25)
# LTC start = 1396344000 - datetime.datetime(2014, 4, 1, 2, 20)
# XRP start = 1397010900 - datetime.datetime(2014, 4, 8, 19, 35)

# Meta
# columns = ['timestamp', 'index', 'volume', 'index_usd', 'volume_usd']
# Returns 1000 rows? Multiple days worth of data...
# 230 requests for 2 years of 5-min data?
# Looks like they fill in missing data with previous known price
# They have 0 volume days

In [192]:
def write_to_df(data, fpath, columns):
    if os.path.exists(fpath):
        df = pd.read_csv(fpath)
        data = pd.DataFrame(data, columns=columns)
        df.set_index(columns[0])
        df = pd.concat([df, data]).drop_duplicates().reset_index(drop=True)
    else:
        df = pd.DataFrame(data, columns=columns)
        df.set_index(columns[0])
    df.to_csv(fpath, index=False)
    return df

def get_bnc_data(coin, fiat, start_time, end_time):
    params = {
        'coin': coin,
        'market': fiat,
        'from': round(start_time.timestamp()),
        'to': round(end_time.timestamp())
    }
    headers = {
        "X-Mashape-Key": cfg.BNC_API_KEY,
        "Accept": "application/json"
    }
    r = requests.get(url, headers=headers, params=params)
    data = r.json()['data']
    data = np.array(data).astype(float)
    return data

def get_all_data(coin, fiat, start_utc, end_utc, timesteps_per_request, timestep_sec, outfpath):
    start_time = datetime.datetime.strptime(
        start_utc, '%Y-%m-%dT%H:%M:%SZ')#.astimezone(datetime.timezone.utc)
    end_time = datetime.datetime.strptime(
        end_utc, '%Y-%m-%dT%H:%M:%SZ')#.astimezone(datetime.timezone.utc)
    cur_time = start_time
    timerange_delta = datetime.timedelta(
        seconds=timesteps_per_request*timestep_sec)
    timestep_delta = datetime.timedelta(seconds=timestep_sec)
    n_records = 0
    retry = 0
    while cur_time < end_time and retry < 10:
        try:
            data = get_bnc_data(coin, fiat, cur_time, cur_time+timerange_delta)
            last_time = datetime.datetime.fromtimestamp(np.max(data[:,0]))
            if last_time < cur_time:
                break
            print("Records", n_records, "Start:", cur_time, "End:", last_time)
            cur_time = last_time + timestep_delta
            df = write_to_df(data, outfpath, BNC_PRICE_COLUMNS)
            n_records += len(data)
            retry = 0
        except Exception as e:
            retry += 1
            print("Error! Retrying!", e)
            traceback.print_exc()
        finally:
            time.sleep(1)
    return df

In [193]:
BNC_PRICE_COLUMNS = ['timestamp', 'price_coin', 'volume_coin', 'price_fiat', 'volume_fiat']
BNC_EXCHANGE_RATE_ENDPOINT = 'https://bravenewcoin-mwa-historic-v1.p.mashape.com/mwa-historic'
COIN = c.BTC
FIAT = c.USD
START_UTC = '2015-08-08T00:00:00Z'
END_UTC = '2017-12-31T00:00:00Z'
TIMESTEP_INTERVAL = 300 # 5 minutes
TIMESTEPS_PER_REQUEST = 1000 
EXCHANGE = c.BNC
PRICE_FNAME = '{:s}_{:s}_{:d}.csv'.format(EXCHANGE, COIN+'-'+FIAT, TIMESTEP_INTERVAL)
PRICE_FPATH = os.path.join(cfg.DATA_DIR, PRICE_FNAME)

In [None]:
get_bnc_data(c.BTC, c.USD, 
             datetime.datetime.fromtimestamp(1396328100), 
             datetime.datetime.fromtimestamp(1396329100))

In [None]:
df = get_all_data(COIN, FIAT, START_UTC, END_UTC, TIMESTEPS_PER_REQUEST, TIMESTEP_INTERVAL, PRICE_FPATH)

Records 0 Start: 2015-08-08 00:00:00 End: 2015-08-11 11:15:00
Records 1000 Start: 2015-08-11 11:20:00 End: 2015-08-14 22:35:00
Records 2000 Start: 2015-08-14 22:40:00 End: 2015-08-18 09:55:00
Records 2993 Start: 2015-08-18 10:00:00 End: 2015-08-21 21:15:00
Records 3993 Start: 2015-08-21 21:20:00 End: 2015-08-25 08:35:00
Records 4993 Start: 2015-08-25 08:40:00 End: 2015-08-28 19:55:00
Records 5993 Start: 2015-08-28 20:00:00 End: 2015-09-01 07:15:00
Records 6992 Start: 2015-09-01 07:20:00 End: 2015-09-04 18:35:00
Records 7992 Start: 2015-09-04 18:40:00 End: 2015-09-08 05:55:00
Records 8992 Start: 2015-09-08 06:00:00 End: 2015-09-11 17:15:00
Records 9992 Start: 2015-09-11 17:20:00 End: 2015-09-15 04:35:00
Records 10992 Start: 2015-09-15 04:40:00 End: 2015-09-18 15:55:00
Records 11992 Start: 2015-09-18 16:00:00 End: 2015-09-22 03:15:00
Records 12986 Start: 2015-09-22 03:20:00 End: 2015-09-25 14:35:00
Records 13984 Start: 2015-09-25 14:40:00 End: 2015-09-29 01:55:00
Records 14984 Start: 201

Records 123912 Start: 2016-10-11 12:20:00 End: 2016-10-14 23:35:00
Records 124912 Start: 2016-10-14 23:40:00 End: 2016-10-18 10:55:00
Records 125912 Start: 2016-10-18 11:00:00 End: 2016-10-21 22:15:00
Records 126912 Start: 2016-10-21 22:20:00 End: 2016-10-25 09:35:00
Records 127912 Start: 2016-10-25 09:40:00 End: 2016-10-28 20:55:00
Records 128912 Start: 2016-10-28 21:00:00 End: 2016-11-01 08:15:00
Records 129912 Start: 2016-11-01 08:20:00 End: 2016-11-04 19:35:00
Records 130911 Start: 2016-11-04 19:40:00 End: 2016-11-08 05:55:00
Records 131911 Start: 2016-11-08 06:00:00 End: 2016-11-11 17:15:00
Records 132911 Start: 2016-11-11 17:20:00 End: 2016-11-15 04:35:00
Records 133911 Start: 2016-11-15 04:40:00 End: 2016-11-18 15:55:00
Records 134911 Start: 2016-11-18 16:00:00 End: 2016-11-22 03:15:00
Records 135911 Start: 2016-11-22 03:20:00 End: 2016-11-25 14:35:00
Records 136910 Start: 2016-11-25 14:40:00 End: 2016-11-29 01:55:00
Records 137910 Start: 2016-11-29 02:00:00 End: 2016-12-02 13:1

In [None]:
PRICE_FNAME = '{:s}_{:s}_{:d}.csv'.format(EXCHANGE, c.ETH+'-'+FIAT, TIMESTEP_INTERVAL)
PRICE_FPATH = os.path.join(cfg.DATA_DIR, PRICE_FNAME)
df = get_all_data(c.ETH, FIAT, START_UTC, END_UTC, TIMESTEPS_PER_REQUEST, TIMESTEP_INTERVAL, PRICE_FPATH)

In [None]:
PRICE_FNAME = '{:s}_{:s}_{:d}.csv'.format(EXCHANGE, c.LTC+'-'+FIAT, TIMESTEP_INTERVAL)
PRICE_FPATH = os.path.join(cfg.DATA_DIR, PRICE_FNAME)
df = get_all_data(c.LTC, FIAT, START_UTC, END_UTC, TIMESTEPS_PER_REQUEST, TIMESTEP_INTERVAL, PRICE_FPATH)

In [None]:
PRICE_FNAME = '{:s}_{:s}_{:d}.csv'.format(EXCHANGE, c.XRP+'-'+FIAT, TIMESTEP_INTERVAL)
PRICE_FPATH = os.path.join(cfg.DATA_DIR, PRICE_FNAME)
df = get_all_data(c.XRP, FIAT, START_UTC, END_UTC, TIMESTEPS_PER_REQUEST, TIMESTEP_INTERVAL, PRICE_FPATH)