# Upbit API를 이용한 데이터 수집
3초마다 200개의 자료를 수집하여 저장

In [None]:
from datetime import datetime
import pandas as pd
import json

# json to DataFrame
def resToDF(json_text):
    contents = json.loads(json_text)
    contents = pd.DataFrame(contents,columns=[
                                'candle_date_time_kst','opening_price','high_price','low_price',
                                'trade_price','candle_acc_trade_volume','candle_acc_trade_price'])
    contents = contents.set_index('candle_date_time_kst')
    contents.index=pd.to_datetime(contents.index)
    return contents

def get_line_time(file_name, line_number):
    try: 
        with open(file_name, "r") as f:
            line = f.readlines()[line_number].strip().split(",")
            return datetime.strptime(line[0], '%Y-%m-%d %H:%M:%S')
    except ValueError:
        print("empty file:", file_name)
        return datetime.now()

In [None]:
from modules.local_values import tickers, raw_folder
import os
if os.path.exists(raw_folder)==False:
    os.makedirs(raw_folder)

for tick in tickers:
    file_name=raw_folder+tick+'.csv'
    if os.path.exists(file_name)==False:
        with open(file_name,'w') as file:
            file.write(",open,high,low,close,volume,value\n")

In [None]:
from datetime import timedelta
import requests
import time

count=200
headers = {"accept": "application/json"}
for tick in tickers:
    file_name=raw_folder+tick+'.csv'
    url=f'https://api.upbit.com/v1/candles/minutes/1?market={tick}&count={count}&to='
    tick_len=0
    to_time = get_line_time(file_name,-1)

    while True:
        to_time=(to_time-timedelta(hours=9)).strftime('%Y-%m-%dT%H:%M:%S')
        response=requests.get(url+to_time, headers=headers)
        contents=resToDF(response.text)
        time.sleep(3)
        if contents.shape[0] != 200:
            print(f'{tick} updates ~ {to_time} = {tick_len} length')
            break
        to_time=contents.index[-1]
        contents.to_csv(file_name,mode='a',header=False)
        tick_len += contents.shape[0]

### Data 파악
|index|open|high|low|close|volume|value|
|-----|----|----|---|-----|------|-----|
|UTC 기준|시가|고가|저가|종가|누적 거래량|누적 거래금액|

In [None]:
from modules.local_values import tickers, raw_datas
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
df = pd.read_csv(raw_datas[0],parse_dates=[0],index_col=[0])
df.describe()

In [None]:
for file_name,tick in zip(raw_datas,tickers):
    df=pd.read_csv(file_name,parse_dates=[0],index_col=[0])
    df=df.resample(rule='W').first()
    df_min_max=df['close']-df['close'].min()
    df_min_max=df_min_max/df_min_max.max()
    plt.plot(df_min_max,label=tick)

plt.yticks([],[])
plt.legend()
plt.show()

### 결측 데이터 확인
NaN 값 확인

In [None]:
for file_name,tick in zip(raw_datas,tickers):
    df=pd.read_csv(file_name,parse_dates=[0],index_col=[0])
    df=df.resample(rule='T').first()
    msg='Data{:>9}\tTotal:{:>8}\t NaN:{:7}' \
        .format(tick,len(df),df['open'].isnull().sum())
    print(msg)