<a href="https://colab.research.google.com/github/31Wilson13/Stock_Prediction_LSTM/blob/main/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from talib import abstract
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
import matplotlib.pyplot as plt
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler
import pickle
from keras.models import load_model

## 匯入資料並分割資料集

In [None]:
def stock_fun(x):
    stock = pd.read_csv(x)
    stock = stock.rename(columns = {'日期' : 'Date', '開盤價' : 'open', '最高價' : 'high', '最低價' : 'low', '收盤價' : 'close', '成交量' : 'volume'})
    stock['RSI'] = abstract.RSI(stock)
    stock[['upperband','middleband','lowerband']] = abstract.BBANDS(stock)
    stock[['K(9)', 'D(9)']] = abstract.STOCH(stock)
    dataset_train = stock.iloc[:1476,:]
    dataset_valid = stock.iloc[1476:1963]
    dataset_test = stock.iloc[1963:,:]
    return stock, dataset_train, dataset_valid, dataset_test

#函數的輸入項x，填入欲讀取的檔案路徑位置
stock, dataset_train, dataset_valid, dataset_test = stock_fun('/Users/wilson/Python//0050/0050.csv')

## 資料正規化

In [None]:
# sc = pickle.load(open('scaler_final.pkl', 'rb'))

def normalization():
     sc = MinMaxScaler(feature_range = (0, 1))
     return sc
sc = normalization()


## 將資料轉換為模型用的格式

In [None]:
def data(train,valid,test,sc,day):
    # 構建訓練集數據
    training_set = train.iloc[:, 1:5].values
    training_set_scaled = sc.fit_transform(training_set)

    x_train = []
    y_train = []
    for i in range(day, len(training_set)):
        x_train.append(training_set_scaled[i-day:i, :])
        y_train.append(training_set_scaled[i, 3])

    x_train, y_train = np.array(x_train), np.array(y_train)
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 4))

    #x_train[0]代表第i天之前股價
    #y_train[0]代表第i天股價

    # 驗證集數據
    validing_set = valid.iloc[:, 1:5].values

    validing_set_scaled = sc.transform(validing_set)

    x_valid = []
    y_valid = []
    for i in range(day, len(validing_set)):
        x_valid.append(validing_set_scaled[i-day:i, :])
        y_valid.append(validing_set_scaled[i, 3])

    x_valid, y_valid = np.array(x_valid), np.array(y_valid)
    x_valid = np.reshape(x_valid, (x_valid.shape[0], x_valid.shape[1], 4))

    # 測試集數據
    real_stock_price = stock.iloc[:, 2:3].values

    db_all = stock.iloc[:, 1:5].values

    inputs = db_all[len(stock) - len(test) - day:]
    inputs = inputs.reshape(-1,4)

    inputs = sc.transform(inputs)
    x_test = []
    for i in range(day, len(inputs)):
        x_test.append(inputs[i-day:i, :])
    x_test = np.array(x_test)
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 4))

    return x_train,y_train,x_valid,y_valid,x_test,real_stock_price,training_set_scaled

x_train,y_train,x_valid,y_valid,x_test,real_stock_price,training_set_scaled = data(dataset_train,dataset_valid,dataset_test,sc,5)

## LSTM模型訓練

In [None]:
#創建股票預測模型
def model_training(x_train, y_train):
    regressor = Sequential()
    #LSTM的輸入爲 [samples, timesteps, features],這裏的timesteps爲步數，features爲維度 這裏我們的數據是4維的
    regressor.add(LSTM(units = 64, return_sequences = True, input_shape = (x_train.shape[1], 4)))

    regressor.add(LSTM(units = 64))
    regressor.add(Dropout(0.2))

    #全連接，輸出4個
    regressor.add(Dense(units = 1))

    regressor.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = ['mae'])

    early_stopping = EarlyStopping(patience=50, restore_best_weights=True)

    history = regressor.fit(x_train, y_train, epochs = 250, validation_data = (x_valid, y_valid), batch_size = 32, verbose=2, callbacks=[early_stopping])

    return regressor, history

regressor, history = model_training(x_train, y_train)

## 讀入模型

In [None]:
model = load_model('my_model_final.h5')
my_loss_final = np.load('my_loss_final.npy', allow_pickle = 'TRUE').item()

## 繪製loss圖

In [None]:
def plot_metric(history, metric):
    train_metrics = history.history[metric]
    val_metrics = history.history['val_'+metric]
    # train_metrics = history[metric]
    # val_metrics = history['val_'+metric]
    epochs = range(1, len(train_metrics) + 1)
    plt.plot(epochs, train_metrics)
    plt.plot(epochs, val_metrics)
    plt.title('Training and validation '+ metric)
    plt.xlabel("Epochs")
    plt.ylabel(metric)
    plt.legend(["train_"+metric, 'val_'+metric])
    plt.show()

# plot_metric(my_loss_final, 'loss')
plot_metric(history, 'loss')

## 將測試集資料放入模型進行預測

In [None]:
def inverse_predictions(predictions,scaler,prediction_index):
    '''This function uses the fitted scaler to inverse predictions,
    the index should be set to the position of the target variable'''

    max_val = scaler.data_max_[prediction_index]
    min_val = scaler.data_min_[prediction_index]
    original_values = (predictions*(max_val - min_val )) + min_val

    return original_values

In [None]:
def predict(test):
     predicted_stock_price = regressor.predict(test)
     predicted_stock_price = inverse_predictions(predicted_stock_price,sc,3)

     return predicted_stock_price
predicted_stock_price = predict(x_test)

## 繪製預測圖

In [None]:
def main(predicted_stock_price):
    plt.plot(real_stock_price[1963:], color = 'black', label = 'Stock Price')
    #顯示收盤價
    plt.plot(predicted_stock_price, color = 'green', label = 'Predicted Stock Price')
    plt.title('Stock Price Prediction')
    plt.xlabel('Time')
    plt.ylabel('Stock Price')
    plt.legend()
    plt.show()


if __name__ == '__main__':
    main(predicted_stock_price)

In [None]:
regressor.save('my_model_5d2ay.h5')
np.save('my_loss_5d2ay.npy', history.history)
pickle.dump(sc, open('scaler_5d2ay.pkl', 'wb'))