##### Import libraries

In [1]:
import requests
import os
import gzip
import numpy as np
import pandas as pd

##### Set variables

In [2]:
bitcoin_url = "http://api.bitcoincharts.com/v1/csv/"
data_location = "/Users/minathaniel/Documents/Sydney University/Master of Data Science/COMP5703/Data/"
bitcoin_files = ["anxhkAUD","bcmBMAUD","bitmarketAUD","btcmarketsAUD","cryptoxAUD","localbtcAUD","mtgoxAUD","ruxumAUD","thAUD","wbxAUD","weexAUD"]

##### Download and save data

In [3]:
for x in bitcoin_files:
    print("collect bitcoin exchange: {}".format(x))
    r = requests.get("{}{}.csv.gz".format(bitcoin_url, x), allow_redirects=True)
    open("{}{}.csv.gz".format(data_location, x), "wb").write(r.content)
    del r
    
del x

collect bitcoin exchange: anxhkAUD
collect bitcoin exchange: bcmBMAUD
collect bitcoin exchange: bitmarketAUD
collect bitcoin exchange: btcmarketsAUD
collect bitcoin exchange: cryptoxAUD
collect bitcoin exchange: localbtcAUD
collect bitcoin exchange: mtgoxAUD
collect bitcoin exchange: ruxumAUD
collect bitcoin exchange: thAUD
collect bitcoin exchange: wbxAUD
collect bitcoin exchange: weexAUD


##### Unzip, import and delete .csv files

In [4]:
bitcoin_list = []

for x in bitcoin_files:
    print("compiling file source: {}".format(x))
    with gzip.open("{}{}.csv.gz".format(data_location, x), "rb") as f:
        try:
            bitcoin_temp = pd.read_csv(f, sep=",", header=None)
            bitcoin_temp.columns = ["Datetime","Price","Volume"]
            bitcoin_temp["Datetime"] = pd.to_datetime(bitcoin_temp["Datetime"], unit="s")
            bitcoin_temp["Date"] = pd.to_datetime(bitcoin_temp["Datetime"].dt.date, format="%Y-%m-%d")
            bitcoin_temp["Source"] = x
            bitcoin_temp = bitcoin_temp[["Source","Date","Datetime","Price","Volume"]]
            bitcoin_list.append(bitcoin_temp)
            del bitcoin_temp
            print("successful import: {}".format(x))
        except:
            print("unsuccessful import: {}".format(x))
    del f
    
del x

bitcoin = pd.concat(bitcoin_list)
del bitcoin_list

compiling file source: anxhkAUD
successful import: anxhkAUD
compiling file source: bcmBMAUD
unsuccessful import: bcmBMAUD
compiling file source: bitmarketAUD
successful import: bitmarketAUD
compiling file source: btcmarketsAUD
successful import: btcmarketsAUD
compiling file source: cryptoxAUD
successful import: cryptoxAUD
compiling file source: localbtcAUD
successful import: localbtcAUD
compiling file source: mtgoxAUD
successful import: mtgoxAUD
compiling file source: ruxumAUD
successful import: ruxumAUD
compiling file source: thAUD
successful import: thAUD
compiling file source: wbxAUD
successful import: wbxAUD
compiling file source: weexAUD
successful import: weexAUD


In [5]:
bitcoin["Source"].value_counts()

btcmarketsAUD    1441927
anxhkAUD          634515
localbtcAUD       307745
mtgoxAUD          133394
cryptoxAUD          6809
thAUD               2136
wbxAUD              1800
weexAUD              639
bitmarketAUD         116
ruxumAUD              60
Name: Source, dtype: int64

##### Feature extraction

Create dataset which compiles daily price and price movements.

In [6]:
bitcoin_daily = bitcoin[bitcoin["Date"]>="2011-06-01"][["Date","Price"]].groupby(["Date"]).agg(["min","median","mean","max"], axis=1).reset_index()
bitcoin_daily.columns = ["Date","Min_Price","Median_Price","Mean_Price","Max_Price"]
bitcoin_daily["Day_1_Difference"] = bitcoin_daily["Mean_Price"]-np.insert(np.array(bitcoin_daily[0:(bitcoin_daily.shape[0]-1)]["Mean_Price"]), 0, np.NaN)
bitcoin_daily["Day_2_Difference"] = np.insert(np.array(bitcoin_daily[0:(bitcoin_daily.shape[0]-1)]["Mean_Price"]), 0, np.NaN)-np.insert(np.array(bitcoin_daily[0:(bitcoin_daily.shape[0]-2)]["Mean_Price"]), 0, [np.NaN,np.NaN])
bitcoin_daily["Day_3_Difference"] = np.insert(np.array(bitcoin_daily[0:(bitcoin_daily.shape[0]-2)]["Mean_Price"]), 0, [np.NaN,np.NaN])-np.insert(np.array(bitcoin_daily[0:(bitcoin_daily.shape[0]-3)]["Mean_Price"]), 0, [np.NaN,np.NaN,np.NaN])
bitcoin_daily["Day_4_Difference"] = np.insert(np.array(bitcoin_daily[0:(bitcoin_daily.shape[0]-3)]["Mean_Price"]), 0, [np.NaN,np.NaN,np.NaN])-np.insert(np.array(bitcoin_daily[0:(bitcoin_daily.shape[0]-4)]["Mean_Price"]), 0, [np.NaN,np.NaN,np.NaN,np.NaN])
bitcoin_daily["Day_5_Difference"] = np.insert(np.array(bitcoin_daily[0:(bitcoin_daily.shape[0]-4)]["Mean_Price"]), 0, [np.NaN,np.NaN,np.NaN,np.NaN])-np.insert(np.array(bitcoin_daily[0:(bitcoin_daily.shape[0]-5)]["Mean_Price"]), 0, [np.NaN,np.NaN,np.NaN,np.NaN,np.NaN])
bitcoin_daily["Day_6_Difference"] = np.insert(np.array(bitcoin_daily[0:(bitcoin_daily.shape[0]-5)]["Mean_Price"]), 0, [np.NaN,np.NaN,np.NaN,np.NaN,np.NaN])-np.insert(np.array(bitcoin_daily[0:(bitcoin_daily.shape[0]-6)]["Mean_Price"]), 0, [np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN])
bitcoin_daily["Day_7_Difference"] = np.insert(np.array(bitcoin_daily[0:(bitcoin_daily.shape[0]-6)]["Mean_Price"]), 0, [np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN])-np.insert(np.array(bitcoin_daily[0:(bitcoin_daily.shape[0]-7)]["Mean_Price"]), 0, [np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN])
bitcoin_daily["Day_8_Difference"] = np.insert(np.array(bitcoin_daily[0:(bitcoin_daily.shape[0]-7)]["Mean_Price"]), 0, [np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN])-np.insert(np.array(bitcoin_daily[0:(bitcoin_daily.shape[0]-8)]["Mean_Price"]), 0, [np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN])

In [7]:
bitcoin_daily[(bitcoin_daily.shape[0]-10):bitcoin_daily.shape[0]]

Unnamed: 0,Date,Min_Price,Median_Price,Mean_Price,Max_Price,Day_1_Difference,Day_2_Difference,Day_3_Difference,Day_4_Difference,Day_5_Difference,Day_6_Difference,Day_7_Difference,Day_8_Difference
2619,2018-08-25,8441.29,9162.77,9303.437235,36902.62,161.48162,199.373391,-166.25427,245.037275,-153.600706,-15.931937,18.975124,-20.319537
2620,2018-08-26,7972.23,9127.59,9274.096156,18294.18,-29.341079,161.48162,199.373391,-166.25427,245.037275,-153.600706,-15.931937,18.975124
2621,2018-08-27,8243.0,9150.0,9258.331553,21913.77,-15.764603,-29.341079,161.48162,199.373391,-166.25427,245.037275,-153.600706,-15.931937
2622,2018-08-28,8180.0,9494.05,9607.204378,88999.64,348.872825,-15.764603,-29.341079,161.48162,199.373391,-166.25427,245.037275,-153.600706
2623,2018-08-29,8264.33,9607.45,9852.168227,88999.64,244.963849,348.872825,-15.764603,-29.341079,161.48162,199.373391,-166.25427,245.037275
2624,2018-08-30,8985.0,9526.59,9600.403873,15000.0,-251.764355,244.963849,348.872825,-15.764603,-29.341079,161.48162,199.373391,-166.25427
2625,2018-08-31,8838.93,9590.0,9708.136845,19345.73,107.732973,-251.764355,244.963849,348.872825,-15.764603,-29.341079,161.48162,199.373391
2626,2018-09-01,8700.16,9827.12,9989.413003,28642.49,281.276158,107.732973,-251.764355,244.963849,348.872825,-15.764603,-29.341079,161.48162
2627,2018-09-02,8180.0,9995.0,10078.806014,19945.07,89.393011,281.276158,107.732973,-251.764355,244.963849,348.872825,-15.764603,-29.341079
2628,2018-09-03,9102.0,10000.93,10134.166185,22445.66,55.360171,89.393011,281.276158,107.732973,-251.764355,244.963849,348.872825,-15.764603
