<a href="https://colab.research.google.com/github/Lydia-HMLin118/chihlee_110_IMS/blob/main/DQN_trader_stage3_0_%E8%A8%93%E7%B7%B4share_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 盤中程式自動交易DQN_訓練

交易標的為台指期,模型為DQN, 訓練及測試資料為證交所每五秒交易紀錄資料.目標為DQN模型在盤中依照即時指數變化自動交易(hold,buy,sell),交易後(buy/sell)以Line通知交易價格及數量,並每日彙整交易報表,程式分訓練及測試2部

In [1]:
#!pip install pandas-datareader

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas_datareader as data_reader
import xgboost as xgb
from tqdm import tqdm_notebook, tqdm
from collections import deque
import pickle
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
from functools import lru_cache

from datetime import timedelta, date, datetime
import time
import requests
from io import StringIO
from keras.models import load_model
import pickle
import math
import os

In [4]:
# 證交所指數檔案下載function
# 證交所參考URL: https://www.twse.com.tw/exchangeReport/MI_5MINS_INDEX?response=csv&date=20201019    
def TWSE_INDEX_Crawler(date): # 下載每五秒台股指數資料
    df = pd.DataFrame()
    try:
        url = f'https://www.twse.com.tw/exchangeReport/MI_5MINS_INDEX?response=csv&date={date}'
        res = requests.get(url)
        if res.text != '\r\n':
          df = pd.read_csv(StringIO(res.text.replace('=','')), header=1)
          df = df.loc[0:3240,['時間','發行量加權股價指數' ]]
          df['時間']= date+' '+ df['時間']
          df['時間']=pd.to_datetime(df['時間'])
    except:
        df = pd.DataFrame()
    time.sleep(5)
    return df     

In [5]:
# 設定爬蟲起止日
start_date = '2020-05-20'
end_date   =  '2020-10-26'
# 轉換為datetime格式
st_date = datetime.strptime(start_date, "%Y-%m-%d")
ed_date = datetime.strptime(end_date, "%Y-%m-%d")
print(st_date,ed_date) 

2020-05-20 00:00:00 2020-10-26 00:00:00


In [None]:
# 開始爬蟲取得資料
df_crawlered = pd.DataFrame(columns=['時間','發行量加權股價指數' ])
while st_date <= ed_date:
  if st_date.weekday() in [0,1,2,3,4]:
    date_str = st_date.strftime("%Y%m%d")
    temp_df = TWSE_INDEX_Crawler(date_str)
    df_crawlered=df_crawlered.append(temp_df, ignore_index=True)
    if st_date.weekday() == 4: #分次寫入csv避免出問題時全部重來
      df_crawlered.to_csv('/content/gdrive/My Drive/twse_index.csv')
  st_date += timedelta(days=1)




In [None]:
df_crawlered = pd.read_csv('/content/gdrive/My Drive/twse_index.csv',index_col=0, dtype={'時間': 'str', '發行量加權股價指數': 'str'}, parse_dates=['時間'])

In [None]:
# 去除','並將str轉成float
df_crawlered['發行量加權股價指數']= list(map(float, [item.replace(',','') for item in df_crawlered['發行量加權股價指數'] ]))

In [None]:
# 檢查有無空值
df_crawlered.isnull().sum()

In [None]:
#@lru_cache(maxsize=1024) #增加記憶體加速
def sigmoid(x): #將數值壓縮在0-1之間,降低太大太小值的影響
  return 1 / (1 + math.exp(-x))



def stocks_price_format(n): # 將格式調為小數點後兩位
  if n < 0:
    return "- $ {0:2f}".format(abs(n))
  else:
    return "$ {0:2f}".format(abs(n))

In [None]:
df_crawlered

In [None]:
#df_crawlered_10sec =df_crawlered[df_crawlered.index%2 ==0].copy().reset_index(drop=True) #想調整資料口徑可參考此範例:由每5秒一筆轉為每10秒一筆

In [None]:
#準備漲跌幅及實際台股指數
sigmoid_df= df_crawlered['發行量加權股價指數']-df_crawlered['發行量加權股價指數'].shift(1) #取漲跌幅值
sigmoid_df[0,0]=0 #將第一個值設為0
sigmoid_df = sigmoid_df.map(sigmoid) #轉換,將漲跌幅壓到0-1之間,漲跌幅為0時轉換後的值為0.5

actual_price_df = df_crawlered['發行量加權股價指數'].values # 實際台指指數

In [None]:
print(f'max: {sigmoid_df.max()}, min: {sigmoid_df.min()}')

In [None]:
sigmoid_df

In [None]:
actual_price_df

In [None]:
len(actual_price_df)

In [None]:
# 調整參數
window_size = 360 # 依照window_size 32筆資料做預測 
episodes = 10 # 看完全部訓練資料次數
batch_size = 64 # 每次DNN模型訓練使用最新64筆memory
max_inventory = 30 # 設定程式自動買賣最大允許庫存


In [None]:
# 分割train
train_ratio = 0.8 # 資料80%用於訓練
sep_idx = int(df_crawlered.shape[0]*train_ratio)
sep_idx


In [None]:
df_train = sigmoid_df[:sep_idx].copy()#.reset_index(drop=True)

In [None]:
len(df_train)

In [None]:
class DQN_trader():
  
  def __init__(self, state_size, action_num=3, model_name="DQN_trader"): # state_size指要看過去window_size個的價格（window_size)
    
    self.state_size = state_size
    self.action_num = action_num # number = 3 (hold, buy, sell)
    self.memory = deque(maxlen=2000) # 記憶最大2000組
    self.inventory = [] 
    self.model_name = model_name
    
    self.gamma = 0.618 # 下次reward的預測值的threshold值
    self.epsilon = 1.0 # 隨機選擇行動或依照預測值選擇行動的threshold
    self.epsilon_final = 0.01 # 設定threshold最低值
    self.epsilon_decay = 0.995 # 每次epsilon遞減比率, 這裡設定每次遞減千分之五
    
    self.model = self.model_dnn()
    
  def model_dnn(self): # dnn模型
    
    model = tf.keras.models.Sequential()
    
    model.add(tf.keras.layers.Dense(units=128, activation='relu', input_dim=self.state_size))
    
    model.add(tf.keras.layers.Dense(units=128, activation='relu'))

    model.add(tf.keras.layers.Dense(units=64, activation='relu'))
    
    model.add(tf.keras.layers.Dense(units=64, activation='relu'))
    
    model.add(tf.keras.layers.Dense(units=32, activation='relu'))
    
    model.add(tf.keras.layers.Dense(units=self.action_num, activation='linear'))
    # 輸出3個linear值,另以argmax選擇行動選項
    model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=1e-3))
    return model

  def trade(self, state):
    # 隨機選擇行動或依照預測值選擇行動
    
    if random.random() <= self.epsilon: # epsilon在每次dnn訓練後會減少千分之5讓隨機決策交易行動的機會降低
      return random.randrange(self.action_num)
    
    actions = self.model.predict(state)
    return np.argmax(actions[0]) #predict出三組數字然後argmax產出0,1,2三個數字 #0Hold, 1Buy, 2Sell
  
  @lru_cache(maxsize=1024) #使用cache加快計算
  def batch_train(self, batch_size): # 訓練dnn
    
    batch = []
    for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
      batch.append(self.memory[i]) # 取出最新64筆memory(batch_size筆), 每個memory包含5個items
      
    for curr_state, action, reward, next_state, done in batch: # 取出每筆memory的五個items
      reward = reward
      if not done:
        reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])#引導模型往這次＋預期下次最大reward方向擬合
      target = self.model.predict(curr_state)
      target[0][action] = reward
      
      self.model.fit(curr_state, target, epochs=1, verbose=0) #訓練模型來做預測
      
    if self.epsilon > self.epsilon_final: # final值為0.01
      self.epsilon *= self.epsilon_decay # epsilon初始值1, 每次遞減千分之五

In [None]:
trader = DQN_trader(window_size) # window_size用來設定DNN的輸入dimension

In [None]:
trader.model.summary() # 輸出3個linear數字（訓練時3個數字的y值為環境reward)

In [None]:
# reocord the history of total profit
history_total_profit=pd.DataFrame(columns = ['episode', 'total_profit', 'buy', 'sell','unsold'])

In [None]:
# training
now_start = datetime.now()

data = df_train.values


for episode in range(1, episodes + 1):
  
  print("Episode: {}/{}".format(episode, episodes))
  
  #產出第一個state
  state = deque([data[0] for i in range(window_size)], maxlen = window_size) # 初始state為32個data[0]
  total_profit = 0
  trader.inventory = []
  buy_count, sell_count = 0, 0
  reward = 0

  #開始訓練
  for timestep in tqdm(range(1,len(data))): # 走完訓練資料, tqdm顯示進度  
    curr_state = np.array(state).reshape(-1, window_size) 
    action = trader.trade(curr_state) # 產出第一個action(0,1,2),可能是隨機,也可能是模型預測  
    state.append(data[timestep]) #加入一個新data擠出最舊data
    next_state = np.array(state).reshape(-1, window_size) 
    if action == 1 and len(trader.inventory) < max_inventory : #Buying     
      trader.inventory.append(actual_price_df[timestep])
      #print("DQN Trader bought: ", stocks_price_format(actual_price_df[timestep]))
      buy_count+=1
      
    elif action == 2 and len(trader.inventory) > 0: #Selling
      buy_price = trader.inventory.pop(0)
      sell_count+=1
      
      reward = max(actual_price_df[timestep] - buy_price, 0) # reward環境回饋, 最小值為0 
      total_profit += actual_price_df[timestep] - buy_price # 這裡反應實際賺賠金額
      #print("DQN Trader sold: ", stocks_price_format(data[timestep]), " Profit: " + stocks_price_format(actual_price_df[timestep] - buy_price) )

    if timestep == len(data)-1: # 跑到最後一筆資料了
      done = True
    else:
      done = False
      
    trader.memory.append((curr_state, action, reward, next_state, done))
    
    if done:
      print("########################")
      print(f"Episode {episode}, Total Profit: {total_profit}, Buy: {buy_count}, Sell: {sell_count}, UNSOLD_inventory: {len(trader.inventory)}")
      print("########################")
      history_total_profit.loc[len(history_total_profit)]=[episode, total_profit, buy_count, sell_count,len(trader.inventory) ]
    
    if len(trader.memory) > batch_size: # 如果已經累積跟超過64筆的memory就訓練DNN
      trader.batch_train(batch_size)
      
  if episode % 1 == 0: #每次episode存下DNN模型及交易結果
    trader.model.save("/content/gdrive/My Drive/DQN股價預測/ai_trader_{}.h5".format(episode))
    history_total_profit.to_csv('/content/gdrive/My Drive/DQN股價預測/history_total_profit.csv',encoding='utf-8', index=False )
    # 存檔memory
    learned_memory = trader.memory
    f = open("/content/gdrive/My Drive/DQN股價預測/learned_memory.pkl",'wb')
    pickle.dump(learned_memory, f)
    


In [None]:
history_total_profit

In [None]:
#trader.memory

In [None]:
 
# 存檔memory
learned_memory = trader.memory
f = open("/content/gdrive/My Drive/DQN股價預測/learned_memory.pkl",'wb')
pickle.dump(learned_memory, f)

now_end = datetime.now()
nowsince = now_end-now_start
minutessince = int(nowsince.total_seconds() / 60)
print(f'total mins used is :{minutessince}') 

In [None]:
lm = pickle.load(open('/content/gdrive/My Drive/DQN股價預測/learned_memory.pkl', 'rb')) 

In [None]:
#len(lm[0][0][0])

In [None]:
lm