In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler

from tqdm import tqdm, trange
import tensorflow as tf 
from keras.layers.core import Dense, Dropout, Activation
from keras.callbacks import EarlyStopping,ModelCheckpoint
from keras.models import Sequential
from keras.layers import LSTM,Bidirectional,GRU
from sklearn.metrics import mean_absolute_error,mean_squared_error

In [2]:
def root_mean_squared_error(y_true, y_pred):
        return np.sqrt(np.mean(np.square(y_pred - y_true)))

In [3]:
def multivariate_data(dataset, target, start_index, end_index, history_size,
                      target_size, single_step=False):
  data = []
  labels = []
  
  start_index = start_index + history_size
  if end_index is None:
    end_index = len(dataset) - target_size
  
  for i in range(start_index, end_index):
    indices = range(i-history_size, i)
    data.append(dataset[indices])
    
    if single_step:
      labels.append(target[i+target_size])
    else:
      labels.append(target[i:i+target_size])
  
  return np.array(data), np.array(labels)


In [4]:
##GRU
def gru_model(input_length, input_dim):#input_length所選取天數;input_dim所選取特徵數

    d=0.3
    #return_sequences : True 為維持 (batch ,time_steps ,seq_len) ，連接下一層LSTM 設置 False 將會變成一維
    model= Sequential()
    model.add(GRU(256,input_shape=(input_length, input_dim),return_sequences=True))
    model.add(Dropout(d))
    model.add(Dropout(d))
    model.add(GRU(256,input_shape=(input_length, input_dim),return_sequences=True))
    model.add(Dropout(d))
    model.add(GRU(256,input_shape=(input_length, input_dim),return_sequences=False))
    model.add(Dropout(d))
    
 
    model.add(Dense(1,activation='linear',kernel_initializer="uniform"))#linear / softmax(多分類) / sigmoid(二分法)

    # optimizer = tf.keras.optimizers.Adam(lr=0.00005)
    model.compile(loss='mse',optimizer='adam', metrics=['mean_squared_error'])#loss=mse/categorical_crossentropy
    return model 

In [5]:
#coid='2317'##可改成比特幣的收盤價 其他做為X
#start='2018-01-01'
#end='2022-5-22'
#opts={'columns': ['open_d' ,'high_d','low_d','mdate', 'volume','close_d']}
#tw=tejapi.get('TWN/EWPRCD',coid=coid,
                #mdate={'gt':start,'lt':end},
                #paginate=True,
                #chinese_column_name=True,
                #opts=opts
                #)
#tw.set_index("日期",drop=True,inplace=True)
#tw.sort_index(inplace=True)
# 資料權限
from google.colab import drive
drive.mount('/content/gdrive/',force_remount=True)
#%cd /content/drive/My-Drive/
# 在線GPU性能检验
!nvidia-smi 
raw = pd.read_csv('/content/gdrive/MyDrive/科技部計畫/variables/各區間資料/DayPredictMonth_Lasso.csv',index_col=[0],parse_dates=["Date"])

#read the data
stock = raw 
amount = len(stock) 
stock = stock['2017-01-01':]

Mounted at /content/gdrive/
NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.



In [6]:
# lcd[~(lcd['收盤價-除權息'])]
y = stock["Close"]
x = stock.drop(['Close'], axis=1)
x.shape

(2069, 59)

In [7]:
#將資料正規化，讓資料變成0~1之間，讓資料訓練速度更快，且更容易收斂

y= y.values.reshape(-1,1) 
scaler=MinMaxScaler(feature_range=(0,1))
scaler1=MinMaxScaler(feature_range=(0,1))
y=scaler.fit_transform(y)
x=scaler1.fit_transform(x)



In [8]:
#並將資料分成訓練組50，驗證組25，測試組25
#def multivariate_data(dataset, target, start_index, end_index, history_size,
#                      target_size, single_step=False):
x,y=multivariate_data( x ,y , 0 ,None, 10 , 1 ,single_step=True)

split =0.75

x_,y_  = x[0:int(split*len(x))] , y[0:int(split*len(x))]
x_test ,y_test   = x[int(split*len(x)):] , y[int(split*len(x)):]

split= 2/3
x_train,y_train  =x_[0:int(split*len(x_))] , y_[0:int(split*len(x_))]
x_vaild,y_vaild  =x_[int(split*len(x_)):] , y_[int(split*len(x_)):]


In [9]:
trainindex= stock.index[10:len(x_train)+10]
valindex = stock.index[len(x_train)+10:len(x_train)+10+len(x_vaild)]

In [10]:
print(x_train.shape,y_train.shape,x_vaild.shape,y_vaild.shape,x_test.shape ,y_test.shape )

(1028, 10, 59) (1028, 1) (515, 10, 59) (515, 1) (515, 10, 59) (515, 1)


In [11]:
##LSTM
def lstm_model(input_length, input_dim):

    d=0.3
    model = Sequential()
    model.add(LSTM(units=256, activation="relu", return_sequences=True, input_shape=(input_length, input_dim)))
    model.add(Dropout(d)) 
    model.add(LSTM(units=256, activation="relu", return_sequences=False, input_shape=(input_length, input_dim)))
    model.add(Dropout(d)) 
    model.add(Dense(units=1))

    # optimizer = tf.keras.optimizers.Adam(lr=0.00005)
    model.compile(loss='mse',optimizer=tf.keras.optimizers.Adam(lr=0.01), metrics=['mean_squared_error'])#loss=mse/categorical_crossentropy
    return model 

In [None]:
##build LSTM model
my_callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=100, monitor = 'val_loss')
    ]######## 在訓練組訓練，使用驗證組選取
#EarlyStopping : 在訓練組訓練參數，以驗證組最低為選擇標準，如果300個epochs，沒有改善即停止訓練
filepath="lstm.best.hdf5" #模型儲存路徑
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, mode='min',save_best_only=True)
#選擇val_loss最低的當作最後的模型
call_backlist = [ my_callbacks,checkpoint]
callbacks=call_backlist
lstm = lstm_model(10,59)
historylstm = lstm.fit( x_train, y_train, batch_size=20,shuffle=False , epochs=100,validation_data=(x_vaild,y_vaild),callbacks=call_backlist)

#batch 是每次訓練抽取樣本數;epochs 訓練次數;shuffle True 隨機打亂 False 不打亂排序
lstm.summary()

print(lstm.summary())


Epoch 1/100


  super(Adam, self).__init__(name, **kwargs)


Epoch 1: val_loss improved from inf to 0.08484, saving model to lstm.best.hdf5
Epoch 2/100
Epoch 2: val_loss improved from 0.08484 to 0.07325, saving model to lstm.best.hdf5
Epoch 3/100
Epoch 3: val_loss did not improve from 0.07325
Epoch 4/100
Epoch 4: val_loss did not improve from 0.07325
Epoch 5/100
Epoch 5: val_loss did not improve from 0.07325
Epoch 6/100
Epoch 6: val_loss did not improve from 0.07325
Epoch 7/100
Epoch 7: val_loss did not improve from 0.07325
Epoch 8/100
Epoch 8: val_loss did not improve from 0.07325
Epoch 9/100
Epoch 9: val_loss did not improve from 0.07325
Epoch 10/100
Epoch 10: val_loss did not improve from 0.07325
Epoch 11/100
Epoch 11: val_loss did not improve from 0.07325
Epoch 12/100
Epoch 12: val_loss did not improve from 0.07325
Epoch 13/100
Epoch 13: val_loss did not improve from 0.07325
Epoch 14/100
Epoch 14: val_loss did not improve from 0.07325
Epoch 15/100
Epoch 15: val_loss did not improve from 0.07325
Epoch 16/100
Epoch 16: val_loss did not improve

In [None]:
##LSTM model predict performace
lstm_train  = lstm.predict(x_train)
lstm_val = lstm.predict(x_vaild)
lstm_pre = lstm.predict(x_test)
plt.plot(historylstm.history['loss'])
plt.plot(historylstm.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

pre = lstm.predict(x_train)
pre1=lstm.predict(x_vaild)
fc=np.concatenate((pre,pre1))
yreal=np.concatenate((y_train,y_vaild))
plt.figure(facecolor='white')
pd.Series(fc.reshape(-1)).plot(color='red', label='Predict_LSTM')
pd.Series(yreal.reshape(-1)).plot(color='blue', label='Original_LSTM')

plt.legend()
plt.show()

lstm_pre= scaler.inverse_transform(lstm_pre)
y_test = scaler.inverse_transform(y_test.reshape(-1,1))




plt.figure()
plt.plot(lstm_pre)
plt.plot(y_test)
plt.title('pre')
plt.ylabel('股價')
plt.xlabel('day')
plt.legend(['pre', 'Test'], loc='upper left')
plt.show()
root_mean_squared_error(lstm_pre,y_test) 
from sklearn.metrics import r2_score
r2_score(y_test,lstm_pre)

In [None]:
##build GRU model
my_callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=300, monitor = 'val_loss')
    ]######## 在訓練組訓練，使用驗證組選取
#EarlyStopping : 在訓練組訓練參數，以驗證組最低為選擇標準，如果300個epochs，沒有改善即停止訓練
filepath="gru.best.hdf5" #模型儲存路徑
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, mode='min',save_best_only=True)
#選擇val_loss最低的當作最後的模型
call_backlist = [ my_callbacks,checkpoint]
callbacks=call_backlist
gru = gru_model(5,124)
historygru = lstm.fit( x_train, y_train, batch_size=0,shuffle=False , epochs=1000,validation_data=(x_vaild,y_vaild),callbacks=call_backlist)
#batch 是每次訓練抽取樣本數;epochs 訓練次數;shuffle True 隨機打亂 False 不打亂排序
gru.summary()

In [None]:
##GRU model predict performace
gru_train  = gru.predict(x_train)
gru_val = gru.predict(x_vaild)
gru_pre = gru.predict(x_test)
plt.plot(historygru.history['loss'])
plt.plot(historygru.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

pre = gru.predict(x_train)
pre1=gru.predict(x_vaild)
fc=np.concatenate((pre,pre1))
yreal=np.concatenate((y_train,y_vaild))
plt.figure(facecolor='white')
pd.Series(fc.reshape(-1)).plot(color='red', label='Predict_GRU')
pd.Series(yreal.reshape(-1)).plot(color='blue', label='Original_GRU')

plt.legend()
plt.show()

gru_pre= scaler.inverse_transform(gru_pre)
y_test = scaler.inverse_transform(y_test.reshape(-1,1))


plt.figure()
plt.plot(gru_pre)
plt.plot(y_test)
plt.title('pre')
plt.ylabel('股價')
plt.xlabel('day')
plt.legend(['pre', 'Test'], loc='upper left')
plt.show()
root_mean_squared_error(gru_pre,y_test) 