In [None]:
# 바이낸스 API
from binance.client import Client
from binance.exceptions import BinanceAPIException
from binance.enums import *

# Time 동기화
import time
import win32api

# 보조지표 계산/출력 라이브러리
import talib
import math
import matplotlib.pyplot as plt

# Numpy / pandas
import numpy as np
import pandas as pd
import pytz

# CSV파일
import os
import csv

# Dict 깔끔한 출력
import pprint

# API 파일 경로
api_key_file_path = "../api.txt"

In [None]:
# 클라이언트 변수
_client = None

In [None]:
#시스템 시간 동기화
def set_system_time(serv_time):
    gmtime = time.gmtime(int((serv_time["serverTime"])/1000))
    win32api.SetSystemTime(gmtime[0],
                           gmtime[1],
                           0,
                           gmtime[2],
                           gmtime[3],
                           gmtime[4],
                           gmtime[5],
                           0)


In [None]:
# API 키를 읽어오는 함수
def read_api_keys(file_path):
    with open(file_path, "r") as file:
        api_key = file.readline().strip()
        api_secret = file.readline().strip()
    return api_key, api_secret

In [None]:
def create_client():
    global _client
    ### 계좌 연결
    binance_access_key, binance_secret_key = read_api_keys(api_key_file_path)
    try:
        _client = Client(binance_access_key, binance_secret_key)
        server_time = _client.get_server_time()
        set_system_time(server_time)
    except BinanceAPIException as e:
        print(e)
        exit()
    return

# USDT 잔고 출력
def get_usdt_balance(client, isprint):
    usdt_balance = None
    futures_account = client.futures_account_balance()
    for asset in futures_account:
        if asset['asset'] == "USDT":
            usdt_balance = float(asset['balance'])
            break
    if usdt_balance is not None:
        if isprint:
            print(f"USDT 잔고: {usdt_balance}")
    else:
        print("USDT 잔고를 찾을 수 없습니다.")
    return usdt_balance

In [None]:
### Initiation
# row 생략 없이 출력
pd.set_option('display.max_rows', 20)
# col 생략 없이 출력
pd.set_option('display.max_columns', None)
# 가져올 분봉 데이터의 개수 (최대 500개까지 가능)
limit = 500
# 캔들 데이터 가져오기
symbol = "BTCUSDT"
# 계좌 연결
create_client()
get_usdt_balance(_client, True)

In [None]:
# 디렉토리 생성
data_dir = '../candle_data'

# csv 파일 생성
filename = "candle_data_1d.csv"
filepath = os.path.join(data_dir, filename)

with open(filepath, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['time', 'open', 'high', 'low', 'close', 'volume'])

    print("Open Ok")

    klines = _client.get_historical_klines("BTCUSDT", Client.KLINE_INTERVAL_1DAY, "1 Jan, 2021", "30 Jun, 2023")
    print("Get Candles OK")

    for k in klines:
        timestamp = k[0]
        open_price = k[1]
        high_price = k[2]
        low_price = k[3]
        close_price = k[4]
        volume = k[5]
        writer.writerow([timestamp, open_price, high_price, low_price, close_price, volume])

print("Data fetching and saving completed.")

### 헌재 데이터 뽑아오는 함수들

In [None]:
def get_klines(client, symbol, limit, interval):
    # klines 데이터 형태
    # 0=Open time(ms), 1=Open, 2=High, 3=Low, 4=Close, 5=Voume,
    # 6=Close time, 7=Quote asset vloume, 8=Number of trades
    # 9=Taker buy base asset volume 10=Take buy quote asset volume [2차원 list]
    klines_1m = client.get_klines(symbol=symbol, interval=interval, limit=limit)
    col_name = ['time', 'open', 'high', 'low', 'close', 'volume', 'close time', 'quote', 'trade_num', 'taker_buy_base',
                'taker_buy_quote', 'ignored']
    return pd.DataFrame(klines_1m, columns=col_name)

def get_klines_by_date(client, symbol, limit, interval, start_time, end_time):
    start_timestamp = int(start_time.timestamp() * 1000)  # 밀리초 단위로 변환
    end_timestamp = int(end_time.timestamp() * 1000)  # 밀리초 단위로 변환

    candles = client.get_klines(symbol=symbol, interval=interval, limit=limit,
                                startTime=start_timestamp, endTime=end_timestamp)
    col_name = ['time', 'open', 'high', 'low', 'close', 'volume', 'close time', 'quote', 'trade_num', 'taker_buy_base',
                'taker_buy_quote', 'ignored']
    return pd.DataFrame(candles, columns=col_name)

In [None]:
def get_candles(client, sym, limit):
    candles_1m = get_klines(client, sym, limit, Client.KLINE_INTERVAL_1MINUTE)
    candles_5m = get_klines(client, sym, limit, Client.KLINE_INTERVAL_5MINUTE)
    candles_15m = get_klines(client, sym, limit, Client.KLINE_INTERVAL_15MINUTE)
    candles_1h = get_klines(client, sym, limit, Client.KLINE_INTERVAL_1HOUR)
    candles_4h = get_klines(client, sym, limit, Client.KLINE_INTERVAL_4HOUR)
    candles_1d = get_klines(client, sym, limit, Client.KLINE_INTERVAL_1DAY)
    candles_1w = get_klines(client, sym, limit, Client.KLINE_INTERVAL_1WEEK)

    return candles_1m, candles_5m, candles_15m, candles_1h, candles_4h, candles_1d, candles_1w

### 추가 지표

In [None]:
def get_candle_subdatas(candles):
    ### 데이터 분석
    # 문자열 -> 숫자 변환 && Pd Series
    close = candles['close'].apply(pd.to_numeric)  # 종가 값 활용
    # Numpy밖에 못 쓴다 -> .to_numpy()
    sma7 = pd.Series(talib.SMA(close.to_numpy(), timeperiod=7), name="sma7")
    sma20 = pd.Series(talib.SMA(close.to_numpy(), timeperiod=20), name="sma20")
    sma60 = pd.Series(talib.SMA(close.to_numpy(), timeperiod=60), name="sma60")
    sma120 = pd.Series(talib.SMA(close.to_numpy(), timeperiod=120), name="sma120")

    rsi = pd.Series(talib.RSI(close.to_numpy(), timeperiod=14), name="rsi")
    _volume = candles['volume'].apply(pd.to_numeric)
    volume_sma = pd.Series(talib.SMA(_volume.to_numpy(), timeperiod=20), name="vol_sma")
    ### 한국 시간으로 맞춰주기 + DateTime으로 변환
    # korea_tz = pytz.timezone('Asia/Seoul')
    # datetime = pd.to_datetime(candles['time'], unit='ms')
    # candles['time'] = datetime.dt.tz_localize(pytz.utc).dt.tz_convert(korea_tz)
    # 볼린저 밴드
    upperband, middleband, lowerband = talib.BBANDS(candles['close'], timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)
    upperband.name = "upperband"
    lowerband.name = "lowerband"
    # 트렌드
    # inclination = calculate_trends(candles, 0)
    # 연결
    data = pd.concat([candles, sma7, sma20, sma60, sma120, rsi, volume_sma, upperband, lowerband],
                     axis=1)
    return data


### CSV 데이터 가져오기

In [None]:
def read_csv_data(time):
    candles_history = pd.read_csv(f"../candle_data/candle_data_{time}.csv")
    return candles_history

In [None]:
candles_history_1d = read_csv_data("1d")
candles_history_info_1d = get_candle_subdatas(candles_history_1d)

In [None]:
candles_history_info_1d.info()

In [None]:
def compute_atr(candles):
    df = talib.ATR(candles.high, candles.low,
             candles.close, timeperiod=14)
    return df.sub(df.mean()).div(df.std())

def compute_macd(close):
    macd = talib.MACD(close)[0]
    return (macd - np.mean(macd))/np.std(macd)

In [None]:
candles_history_info_1d['atr'] = compute_atr(candles_history_info_1d)
candles_history_info_1d['macd'] = compute_macd(candles_history_info_1d.close)

In [None]:
lags = [1, 5, 10, 21, 42, 63]

returns = candles_history_info_1d.close.pct_change() # pct_change() = 전날 대비 얼마나 변했는지

percentiles=[.0001, .001, .01]
percentiles+= [1-p for p in percentiles]
returns.describe(percentiles=percentiles).iloc[2:].to_frame('percentiles').style.format(lambda x: f'{x:,.2%}')

In [None]:
q = 0.0001
for lag in lags:
    candles_history_info_1d[f'return_{lag}d'] = (candles_history_info_1d.close
                                .pct_change(lag)
                                .pipe(lambda x: x.clip(lower=x.quantile(q),
                                                       upper=x.quantile(1 - q))) # 특이값 자르기 ( 상위 q, 하위 q 삭제 )
                                .add(1) # 자수계산을 취하기 위해
                                .pow(1 / lag)
                                .sub(1) # 지수계산했으니 이제 다시 빼준다.
                                )

In [None]:
# 이거 진짜 왜하는거??
for t in [1, 2, 3, 4, 5]:
    for lag in [1, 5, 10, 21]:
        candles_history_info_1d[f'return_{lag}d_lag{t}'] = (candles_history_info_1d
                                           [f'return_{lag}d'].shift(t * lag))

In [None]:
for t in [1, 5, 10, 21]:
    candles_history_info_1d[f'target_{t}d'] = candles_history_info_1d[f'return_{t}d'].shift(-t)

In [None]:
candles_history_info_1d.info()

In [None]:
# Nasdaq 가격도 넣으면 좋을듯?

In [None]:
# 데이터 저장
candles_history_info_1d.to_csv("../candle_data/candles_history_info_1d.csv")