# CP1 프로젝트

## moldule

In [None]:
#pip install ccxt

In [57]:
from datetime import datetime
import time
import matplotlib.pyplot as plt
import requests
import numpy as np 
import pandas as pd 
import os
import pprint

# sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# tensorflow
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import LSTM
from tensorflow.keras.preprocessing.text import text_to_word_sequence

# Natural Language toolkit
import nltk
from nltk.corpus import stopwords

# tweeter module
import tweepy

# binance module
import ccxt

# pickle
import pickle

## 비트코인 주가 예측

In [None]:
# start time ~ end time 까지의 data를 DataFrame화
COLUMNS = ['Open_time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Close_time', 'quote_av', 'trades', 
                   'tb_base_av', 'tb_quote_av', 'ignore']
URL = 'https://api.binance.com/api/v3/klines'
def get_data(start_date, end_date, symbol):
    data = []
    
    start = int(time.mktime(datetime.strptime(start_date + ' 00:00', '%Y-%m-%d %H:%M').timetuple())) * 1000
    end = int(time.mktime(datetime.strptime(end_date +' 23:59', '%Y-%m-%d %H:%M').timetuple())) * 1000
    params = {
        'symbol': symbol,
        'interval': '1h',
        'limit': 1000,
        'startTime': start,
        'endTime': end
    }
    
    while start < end:
        print(datetime.fromtimestamp(start // 1000))
        params['startTime'] = start
        result = requests.get(URL, params = params)
        js = result.json()
        if not js:
            break
        data.extend(js)  # result에 저장
        start = js[-1][0] + 60000  # 다음 step으로
    # 전처리
    if not data:  # 해당 기간에 데이터가 없는 경우
        print('해당 기간에 일치하는 데이터가 없습니다.')
        return -1
    df = pd.DataFrame(data)
    df.columns = COLUMNS
    df['Open_time'] = df.apply(lambda x:datetime.fromtimestamp(x['Open_time'] // 1000), axis=1)
    df = df.drop(columns = ['Close_time', 'ignore'])
    df['Symbol'] = symbol
    df.loc[:, 'Open':'tb_quote_av'] = df.loc[:, 'Open':'tb_quote_av'].astype(float)  # string to float
    df['trades'] = df['trades'].astype(int)
    return df

start_date = '2021-12-21'
end_date = '2021-12-22'
symbol = 'BTCUSDT'
btcdata = get_data(start_date, end_date, symbol)

2021-12-21 00:00:00
2021-12-21 17:01:00


In [None]:
# get_data 함수를 통해 2021-12-20까지의 data를 csv로 저장해둠. 
# 불러와서 사용
filepath = '/content/drive/MyDrive/AI_bootcamp/CP1/BTCDATA_20211220.csv'
df = pd.read_csv(filepath)

# BTCDATA_20211220 + btcdata 
df_concat = pd.concat([df,btcdata])
df_concat.reset_index(drop=True, inplace=True)

Unnamed: 0,Open_time,Open,High,Low,Close,Volume,quote_av,trades,tb_base_av,tb_quote_av,Symbol
37597,2021-12-20 19:00:00,46268.58,46690.0,46240.01,46585.55,1717.22917,79833810.0,52616,912.50628,42414810.0,BTCUSDT
37598,2021-12-20 20:00:00,46585.56,47246.0,46585.55,47021.02,2240.77932,105080800.0,60166,1120.62133,52556140.0,BTCUSDT
37599,2021-12-20 21:00:00,47020.99,47537.57,46956.89,46961.23,2309.70816,109077900.0,61232,1229.39207,58083460.0,BTCUSDT
37600,2021-12-20 22:00:00,46962.64,47108.92,46717.55,46757.69,727.72223,34146350.0,31339,342.63711,16079150.0,BTCUSDT
37601,2021-12-20 23:00:00,46757.7,47096.96,46747.84,46914.16,1028.84754,48258590.0,28018,531.17144,24910960.0,BTCUSDT


In [None]:
df = df_concat.copy()

# 필요한 columns만 추출 및 columns rename
df = df.loc[:,['Open_time','Open','High','Low','Close','Volume']]
df.columns = ['date','open','high','low','close','volume']
df.tail()

Unnamed: 0,date,open,high,low,close,volume
37615,2021-12-21 13:00:00,48735.49,48970.88,48575.09,48909.49,1564.00435
37616,2021-12-21 14:00:00,48909.49,48966.2,48520.0,48624.01,1453.20324
37617,2021-12-21 15:00:00,48624.01,48788.87,48265.24,48418.49,2209.08606
37618,2021-12-21 16:00:00,48418.48,48746.97,48381.57,48685.19,1137.16935
37619,2021-12-21 17:00:00,48685.18,48876.94,48570.32,48616.92,908.48504


In [None]:
# scaler
scaler = MinMaxScaler()
# volume과 price를 나누어 scaler 
# -> volume이 너무 크기 때문에 volume의 영향을 크게 받을 수 있기 때문
scale_cols_exvol = ['open', 'high', 'low', 'close'] 
scale_cols_vol = ['volume']

df_exvol_scaled = pd.DataFrame(scaler.fit_transform(df[scale_cols_exvol]))
df_vol_scaled = pd.DataFrame(scaler.fit_transform(df[scale_cols_vol]))

df_scaled = pd.concat([df_exvol_scaled,df_vol_scaled], axis=1)
df_scaled.columns = scale_cols_exvol + scale_cols_vol

df_scaled.head()

Unnamed: 0,open,high,low,close,volume
0,0.027659,0.027182,0.028535,0.027422,0.000332
1,0.02827,0.027509,0.028706,0.027482,0.000595
2,0.028436,0.027509,0.02904,0.027649,0.00032
3,0.028832,0.027509,0.028898,0.027326,0.000353
4,0.028037,0.027025,0.028263,0.026676,0.000864


In [None]:
# test size = 0.2 
TEST_SIZE = int(len(df)*0.2)
WINDOW_SIZE = 20

train = df_scaled[:-TEST_SIZE]
test = df_scaled[-TEST_SIZE:]

In [None]:
# window size 만큼의 데이터로 다음의 close를 예측하기 위해
# 20개 간격의 dataset을 만듦
def make_dataset(data, label, window_size=20):
    feature_list = []
    label_list = []
    for i in range(len(data) - window_size):
        feature_list.append(np.array(data.iloc[i:i+window_size]))
        label_list.append(np.array(label.iloc[i+window_size]))
    return np.array(feature_list), np.array(label_list)

In [None]:
# target 선정 및 make train, valid, test dataset 
feature_cols = ['open','high','low','volume']
label_cols = ['close']

# train dataset
train_feature = train[feature_cols]
train_label = train[label_cols]
train_feature, train_label = make_dataset(train_feature, train_label, 20)

# train, valid dataset
x_train, x_valid, y_train, y_valid = train_test_split(train_feature, train_label, test_size=0.2)

# test dataset
test_feature = test[feature_cols]
test_label = test[label_cols]
test_feature, test_label = make_dataset(test_feature, test_label, 20)

In [None]:
# make model
model = Sequential()
model.add(LSTM(16, 
               input_shape=(train_feature.shape[1], train_feature.shape[2]), 
               activation='relu', 
               return_sequences=False)
          )

model.add(Dense(1))

In [None]:
# loss = MSE , optimizer = adam, early_stop 적용
model.compile(loss='mean_squared_error', optimizer='adam')
early_stop = EarlyStopping(monitor='val_loss', patience=5)

# checkpoint
model_path = 'model'
filename = os.path.join(model_path, 'tmp_checkpoint.h5')
checkpoint = ModelCheckpoint(filename, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')

# model train
history = model.fit(x_train, y_train, 
                                    epochs=5, 
                                    batch_size=10,
                                    validation_data=(x_valid, y_valid), 
                                    callbacks=[early_stop, checkpoint])

Epoch 1/5
Epoch 00001: val_loss improved from inf to 0.00002, saving model to model/tmp_checkpoint.h5
Epoch 2/5
Epoch 00002: val_loss improved from 0.00002 to 0.00001, saving model to model/tmp_checkpoint.h5
Epoch 3/5
Epoch 00003: val_loss did not improve from 0.00001
Epoch 4/5
Epoch 00004: val_loss did not improve from 0.00001
Epoch 5/5
Epoch 00005: val_loss improved from 0.00001 to 0.00001, saving model to model/tmp_checkpoint.h5


In [None]:
# save model weights
model.load_weights(filename)

# predict price
pred = model.predict(test_feature)

(7504, 1)

In [None]:
# predict graph 
"""
plt.figure(figsize=(20, 12))
plt.plot(test_label[5000:], label = 'actual')
plt.plot(pred[5000:], label = 'prediction')
plt.legend()
plt.show()
"""

"\nplt.figure(figsize=(20, 12))\nplt.plot(test_label[5000:], label = 'actual')\nplt.plot(pred[5000:], label = 'prediction')\nplt.legend()\nplt.show()\n"

In [None]:
# 종가 예측값
def reverse_min_max_scaling(org_x, x): 
    org_x_np = np.asarray(org_x) 
    x_np = np.asarray(x)
    return (x_np * (org_x_np.max() - org_x_np.min() + 1e-7)) + org_x_np.min()

price = reverse_min_max_scaling(df[scale_cols_exvol], pred[-1])

In [None]:
# 4h 전값과 비교하여 예측 변동률을 얻음
pre_price = reverse_min_max_scaling(df[scale_cols_exvol], pred[-5])
price_change = (pre_price[0]-price[0])/pre_price[0]*100
print(price_change)

array([49592.15], dtype=float32)

## 트윗 

In [None]:
# model, tokenzier load
def load_tokenizer(path):
    with open(path, 'rb') as f:
        tokenizer = pickle.load(f)
    return tokenizer

model_name = 'keras_tweet_timeline_trained_model.h5'
tokenizer_name = 'keras_tweet_timeline_tokenizer.pickle'
model_path = os.path.join('/content/drive/MyDrive/AI_bootcamp/CP1/', model_name)
tokenizer_path = os.path.join('/content/drive/MyDrive/AI_bootcamp/CP1/', tokenizer_name)

model = load_model(model_path)
tokenizer = load_tokenizer(tokenizer_path)

In [None]:
# 불용어 다운로드
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
# list 최대 길이
max_len=100

# 불용어 set
stop_words = set(stopwords.words('english'))

# tweet timeline sentiment analysis
def predict_sentiment_tweet(text, model):
    tokens = []

    text_split = text.split(" ")

    for x in text_split:
      if x not in stop_words:
        if "@" in x or "https://" in x or "#" in x:
          pass
        else:
          tokens.append(x)

    if len(tokens) == 0:
      return 0.5
    else:
      tokens = tokenizer.texts_to_sequences(tokens)
      x_test = pad_sequences(tokens, maxlen=max_len)
      predict = model.predict(x_test)
      if np.mean(predict) > 0.5: # 감정 분석 결과 총점 0.5 이상이면 Good으로 분류
          return 1 # Good
      else:
          return 0 # Bad

In [None]:
# twitter key, token
twitter_api_key = 'ssecret'
twitter_api_secret_key = 'secret'
twitter_access_token = 'secret'
twitter_access_token_seret = 'secret'

auth = tweepy.OAuthHandler(twitter_api_key, twitter_api_secret_key)
auth.set_access_token(twitter_access_token,twitter_access_token_seret)

api = tweepy.API(auth)

# bitcoin influence twitter id 
# 베리실버트, 일론머스크, planB, 마이크로스트래지 ceo, techdev
influencer_list = ['@BarrySilbert','@elonmusk','@100trillionUSD','@saylor','@TechDev_52']

review_text = []

# all influence's timeline list
for tweet_id in influencer_list:
  timeline_list = api.user_timeline(tweet_id, tweet_mode = 'extended')
  for tweet in timeline_list:
      review_text.append(tweet.full_text)

score = []

# 감정 분석 결과
for sentence in review_text:
  result = predict_sentiment_tweet(sentence, model)
  score.append(result)

total_result = np.mean(score)

# 0.25 이상이면 good
print(total_result)

## 바이낸스 주문

In [None]:
# binance Key
binance = ccxt.binance(config={
    'apiKey': 'secret',
    'secret': 'secret',
    'enableRateLimit': True,
    'options': {
        'defaultType': 'future'
    }
})

# ticker 및 leverage 설정
markets = binance.load_markets()
symbol = "BTC/BUSD"
market = binance.market(symbol)
leverage = 3

resp = binance.fapiPrivate_post_leverage({
    'symbol': market['id'],
    'leverage': leverage
})

# 현재 BTC/BUSD 가격
btc_live = binance.fetch_ticker("BTC/BUSD")

In [None]:
# 무 포지션으로 시작
position = { "type": None,
             "amount": 0
           }
# 선물 계좌 현황
balance = binance.fetch_balance(params={"type": "future"})

# price change에 따라 long, short 주문
def make_order(price_change):
  # short, long position 결정 
  if price_change >= 0 :
    pred_input = "buy"
  else:
    pred_input = "sell"

  # 주문
  if pred_input == "buy":
    position['type'] = "long"
    order = binance.create_market_buy_order(
        symbol="BTC/BUSD",
        amount=0.005,)
    pprint.pprint(order['info'])
    return "매수 주문이 완료되었습니다."

  elif pred_input == "sell":
    position['type'] = "short"
    order = binance.create_market_sell_order(
        symbol="BTC/BUSD",
        amount=0.005,
        )
    pprint.pprint(order['info'])
    return "매도 주문이 완료되었습니다."


# 결과에 따른 주문 
print(f"계좌 상황 : {balance['BUSD']}")
print("예상 가격 등락률 : ", round(price_change,4))
print(make_order(price_change))

In [None]:
# 등락 상, 하한 %
price_low_limit = -3
price_high_limit = 5

# 트위터 감정 분석을 등락 상, 하한%에 가중치로 줌
if total_result >= 0.25:
  price_low_limit = price_low_limit*total_result
  price_high_limit = price_high_limit*total_result
else : 
  price_low_limit = price_low_limit-(price_low_limit*total_result)
  price_high_limit = price_high_limit-(price_high_limit*total_result)

# 각 포지션의 등락 상, 하한%
if position['type'] == "short":
  print("position : ", position['type'])
  print("price_low_limit : ", -price_low_limit )
  print("price_high_limit : ", -price_high_limit)
  print("price_change : ", price_change)
elif position['type'] == "long":
  print("position : ", position['type'])
  print("price_high_limit : ", price_high_limit)
  print("price_low_limit : ", price_low_limit  )
  print("price_change : ", price_change) 

# 포지션 종료 
def end_position(price_change):
  if position['type'] == "long": # Long position 일 때
    position['type'] = None
    order = binance.create_market_sell_order(
      symbol="BTC/BUSD",
      amount=0.005,
      )
    pprint.pprint(order['info'])
    return "Long 포지션이 종료되었습니다."
  elif position['type'] == "short": # Short position 일 때
    position['type'] = None
    order = binance.create_market_buy_order(
      symbol="BTC/BUSD",
      amount=0.005,)
    pprint.pprint(order['info'])
    return "Short 포지션이 종료되었습니다."

In [None]:
# 현재 short 포지션을 가지고 있을 때
if position['type'] == "short":
  price_high_limit, price_low_limit = price_low_limit, price_high_limit
  if (price_change < price_high_limit) or (price_change > price_low_limit):
    end_position(price_change)
    position['type'] == None
    print("숏 포지션을 정리합니다.")
  else : 
    print(f"큰 변동이 없어 현재 포지션 {position['type']}을 유지합니다.") # 예측 등락률이 등락 상, 하한% 이내에 존재할 때

# 현재 long 포지션을 가지고 있을 때
elif position['type'] == "long":
  if (price_change > price_high_limit) or (price_change < price_low_limit):
    end_position(price_change)
    position['type'] == None
    print("롱 포지션을 정리합니다.")
  else : 
    print(f"큰 변동이 없어 현재 포지션 {position['type']}을 유지합니다.")

else: 
  print("포지션이 없습니다.")

# 현재 선물 계좌 상태
print(f"계좌 상황 : {balance['BUSD']}")