# 目的
毎日のOHLCVをダウンロードする。   
CryptoCompare(https://min-api.cryptocompare.com/) では七日間だけminute OHLCV が残されているらしい。   
デイリーの分も残すが、minute がデイリーに及ぼす影響も見ておきたい。


In [1]:
import urllib.request
import urllib.parse
import json

def getRateViaCrypto(histoticks, params):
    """getRateViaCrypto(histoticks, params)
    get rate via CryptoCompare (https://min-api.cryptocompare.com/).
    params should be a dict object with items of str.
    <params>
    histoticks: day, hour, minute or some tick (see the site)
    params: parameters to send the site 
        fsym : currency symbol of interest (required)
        tsym : currency symbol to convert into (required)
        limit: limit of retrieved data (max: 2000)
        e    : the exchange to obtain data from (CCCAGG - by default)
        toTs : last unix timestamp to return data for
    </params>
    
    <return>
    retrieved data (json object)
    </return>
    """
    
    url = "https://min-api.cryptocompare.com/data/"
    url += "histo{}".format(histoticks) + "?" + urllib.parse.urlencode(params) 
    res = urllib.request.urlopen(url)
    result = json.loads(res.read().decode('utf-8'))
    return result

In [2]:
import numpy as np
import pandas as pd
import copy
import datetime

datetimeFmt = "%Y-%m-%dT%H:%M:%S.%f"

def toDataFrame(data, is_datetime=False):
    """toDataFrame(data)
    convert data to a DataFrame object.
    'data' is obtained from CryptoCompare.
    The values in the 'time' are converted to datetime strings 
    if 'is_datetime' == True.
    
    <params>
    data       : data obtained from CryptoCompare (dict object)
    is_datetime: boolean
    </params>
    
    <return>
    a DataFrame object
    </return>
    """
    
    if not isinstance(data, dict):
        raise TypeError
    keys = data["Data"][0].keys()
    
    output = np.zeros((len(data["Data"]), len(keys)), dtype=object)
    for ii, col in enumerate(data["Data"]):
        if is_datetime:
            buff = copy.deepcopy(col)
            datetime1 = datetime.datetime.fromtimestamp(buff["time"])
            buff["time"] = datetime1.strftime(datetimeFmt)
            output[ii] = np.array([buff[key] for key in keys], dtype=object)
        else:
            output[ii] = np.array([col[key] for key in keys])
        
    return pd.DataFrame(output, columns=list(keys))

## minute OHLCV の取得テスト
`minute`を指定する。   
分足は一度に最大300まで取得できる。

In [5]:
histoticks = "minute"
params = {
    "fsym": "BTC",
    "tsym": "JPY",
    "limit": "10", # １時間分
    "e": "bitFlyer"
}
result = getRateViaCrypto(histoticks, params)
for key in result.keys():
    if key != "Data":
        print(key, result[key])
    else:
        print(key)
        for data in result[key]:
            print(data)

Response Success
Type 100
Aggregated False
Data
{'time': 1546168440, 'close': 423403, 'high': 423950, 'low': 423403, 'open': 423950, 'volumefrom': 2.53, 'volumeto': 1071110.62}
{'time': 1546168500, 'close': 423402, 'high': 423403, 'low': 423402, 'open': 423403, 'volumefrom': 1.02, 'volumeto': 431870.23}
{'time': 1546168560, 'close': 423490, 'high': 423490, 'low': 423384, 'open': 423402, 'volumefrom': 1.2, 'volumeto': 508558.22}
{'time': 1546168620, 'close': 423502, 'high': 423765, 'low': 423490, 'open': 423490, 'volumefrom': 0.231, 'volumeto': 97855.13}
{'time': 1546168680, 'close': 423381, 'high': 423502, 'low': 423381, 'open': 423502, 'volumefrom': 0.933, 'volumeto': 395124.84}
{'time': 1546168740, 'close': 423291, 'high': 423381, 'low': 423291, 'open': 423381, 'volumefrom': 0.17, 'volumeto': 71959.88}
{'time': 1546168800, 'close': 423051, 'high': 423518, 'low': 423051, 'open': 423291, 'volumefrom': 0.7424, 'volumeto': 314364.8}
{'time': 1546168860, 'close': 423076, 'high': 423076, '

In [6]:
histoticks = "minute"
params = {
    "fsym": "BTC",
    "tsym": "JPY",
    "limit": "10", # １時間分
    "e": "bitFlyerfx"
}
result = getRateViaCrypto(histoticks, params)
for key in result.keys():
    if key != "Data":
        print(key, result[key])
    else:
        print(key)
        for data in result[key]:
            print(data)

Response Success
Type 100
Aggregated False
Data
{'time': 1546168500, 'close': 422359, 'high': 422600, 'low': 422350, 'open': 422409, 'volumefrom': 135.32, 'volumeto': 57171080.97}
{'time': 1546168560, 'close': 422532, 'high': 422571, 'low': 422190, 'open': 422359, 'volumefrom': 153.37, 'volumeto': 64776950.97}
{'time': 1546168620, 'close': 422467, 'high': 422748, 'low': 422277, 'open': 422532, 'volumefrom': 220.04, 'volumeto': 92972468.2}
{'time': 1546168680, 'close': 422277, 'high': 422506, 'low': 422169, 'open': 422467, 'volumefrom': 117.4, 'volumeto': 49577556.21}
{'time': 1546168740, 'close': 422231, 'high': 422378, 'low': 422024, 'open': 422277, 'volumefrom': 96.89, 'volumeto': 40904810.29}
{'time': 1546168800, 'close': 422355, 'high': 422446, 'low': 422231, 'open': 422231, 'volumefrom': 111.25, 'volumeto': 46986049.58}
{'time': 1546168860, 'close': 422012, 'high': 422407, 'low': 422012, 'open': 422355, 'volumefrom': 140.79, 'volumeto': 59439721.44}
{'time': 1546168920, 'close': 4

### ローソク足チャート

In [4]:
import matplotlib.pyplot as plt
from matplotlib.finance import candlestick_ohlc
%matplotlib inline

df = toDataFrame(result, False)
timestamps = df["time"].as_matrix()


# candle stick
fig, axes = plt.subplots(figsize=(8, 8))
_ = candlestick_ohlc(axes, df[["time", "open", "high", "low", "close"]].values, width=np.diff(timestamps)[0]*0.8)
plt.ylabel("JPY/BTC")
plt.xlabel("time")

ModuleNotFoundError: No module named 'matplotlib.finance'

In [16]:
import time

datetimeFmt = "%Y-%m-%dT%H:%M:%S.%f"

toTs_end = datetime.datetime(2018, 12, 31, 12, 1, 0).timestamp()
toTs_last = datetime.datetime(2019, 1, 3, 12, 0, 0).timestamp()
histoticks = "minute"
hours = 4 # ６回に分けてデータを取得する
limit = int(hours*60 - 1)

for ii in range(3*365*24): # とりあえず３年分に設定
    toTs = toTs_last - ii * hours * 3600
    datetime1 = datetime.datetime.fromtimestamp(toTs)
    print(ii, datetime1.strftime(datetimeFmt))
    if toTs <= toTs_end:
        break
    params = {
        "fsym": "BTC",
        "tsym": "JPY",
        "limit": str(limit),
        "e": "bitFlyerfx",
        "toTs":str(toTs)
    }
    result = getRateViaCrypto(histoticks, params)
    if ii == 0:
        df = toDataFrame(result, True)
    else:
        df = pd.concat([toDataFrame(result, True), df], ignore_index=True)
    time.sleep(5)
#     if ii == 2:
#         break

0 2019-01-03T12:00:00.000000
1 2019-01-03T08:00:00.000000
2 2019-01-03T04:00:00.000000
3 2019-01-03T00:00:00.000000
4 2019-01-02T20:00:00.000000
5 2019-01-02T16:00:00.000000
6 2019-01-02T12:00:00.000000
7 2019-01-02T08:00:00.000000
8 2019-01-02T04:00:00.000000
9 2019-01-02T00:00:00.000000
10 2019-01-01T20:00:00.000000
11 2019-01-01T16:00:00.000000
12 2019-01-01T12:00:00.000000
13 2019-01-01T08:00:00.000000
14 2019-01-01T04:00:00.000000
15 2019-01-01T00:00:00.000000
16 2018-12-31T20:00:00.000000
17 2018-12-31T16:00:00.000000
18 2018-12-31T12:00:00.000000


In [17]:
df.head()

Unnamed: 0,time,close,high,low,open,volumefrom,volumeto
0,2018-12-31T12:01:00.000000,417259,417391,416886,417162,281.92,117615000.0
1,2018-12-31T12:02:00.000000,417063,417594,417060,417259,307.51,128337000.0
2,2018-12-31T12:03:00.000000,416882,417168,416634,417063,245.22,102225000.0
3,2018-12-31T12:04:00.000000,417088,417277,416523,416882,367.12,152997000.0
4,2018-12-31T12:05:00.000000,417279,417450,416964,417088,328.85,137207000.0


In [19]:
df.tail()

Unnamed: 0,time,close,high,low,open,volumefrom,volumeto
4315,2019-01-03T11:56:00.000000,415411,415448,415209,415260,127.42,52926900.0
4316,2019-01-03T11:57:00.000000,415511,415618,415350,415411,150.98,62730500.0
4317,2019-01-03T11:58:00.000000,415525,415650,415474,415511,110.99,46118700.0
4318,2019-01-03T11:59:00.000000,415561,415648,415483,415525,90.12,37449700.0
4319,2019-01-03T12:00:00.000000,415487,415652,415474,415561,119.46,49644100.0


In [18]:
len(df)

4320

In [21]:
df.to_csv("../data/ohlcv/OHLCV_201812311201_to_201901031200.csv")

In [None]:
from google.colab import files

files.download("./test.csv")