In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
from plotly.graph_objs import Scatter, Layout
from plotly.offline import plot

Using TensorFlow backend.


In [2]:
def load_data(df, sequence_length=10, split=0.8):
    data_all = np.array(df).astype(float)    # 轉為浮點型別矩陣
    #print(data_all.shape) # (241,1)
    scaler = MinMaxScaler()
    data_all = scaler.fit_transform(data_all)  # 將數據縮放為 0~1 之間
    data = []
    # data 資料共有 (241-10-1)=230 筆
    for i in range(len(data_all) - sequence_length - 1):
        # 每筆 data 資料有 11 欄
        data.append(data_all[i: i + sequence_length + 1])
    reshaped_data = np.array(data).astype('float64')

    x = reshaped_data[:, :-1] # 第 1至第10個欄位為 特徵
    y = reshaped_data[:, -1]  # 第 11個欄位為 label
    #print(x.shape,y.shape) # (230,10,1) (230,1)
    split_boundary = int(reshaped_data.shape[0] * split)
    train_x = x[: split_boundary] # 前 80% 為 train 的特徵
    test_x = x[split_boundary:]   # 最後 20% 為 test 的特徵
 
    train_y = y[: split_boundary] # 前 80% 為 train 的 label
    test_y = y[split_boundary:]   # 最後 20% 為 test 的 label

    return train_x, train_y, test_x, test_y, scaler

def build_model():
    model = Sequential()     
    # 隱藏層：256 個神經元，input_shape：(10,1)
    # TIME_STEPS=10,INPUT_SIZE=1
    model.add(LSTM(input_shape=(10,1),units=256,unroll=False))
    model.add(Dense(units=1)) # 輸出層：1 個神經元
    #compile:loss, optimizer, metrics
    model.compile(loss="mse", optimizer="adam", metrics=['accuracy'])
    return model

def train_model(train_x, train_y, test_x, test_y):
    model = build_model()
    try:
        model.fit(train_x, train_y, batch_size=10, epochs=1, validation_split=0.1)
        predict = model.predict(test_x)
        predict = np.reshape(predict, (predict.size, )) #轉換為1維矩陣
    except KeyboardInterrupt:
        print(predict)
        print(test_y)
    return predict, test_y #傳回 預測值和真實值

In [3]:
pd.options.mode.chained_assignment = None  #取消顯示pandas資料重設警告
filename = './data/Train_Y.csv'
df = pd.read_csv(filename,encoding='cp950')  #以pandas讀取檔案
ddprice=pd.DataFrame(df['Target_Y'])

In [5]:
train_x, train_y, test_x, test_y, scaler =load_data(ddprice, sequence_length=10, split=0.8)
# train_x 共 230*0.8=184 筆, test_x 共 230*0.2=46 筆
#print(train_x.shape,train_y.shape) # (184,10,1) (184,1)
#print(test_x.shape,test_y.shape)   # (46,10,1)  (46,1)
predict_y, test_y = train_model(train_x, train_y, test_x, test_y)
predict_y = scaler.inverse_transform([[i] for i in predict_y]) # 還原
test_y = scaler.inverse_transform(test_y)  # 還原

Train on 162 samples, validate on 18 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300

UnboundLocalError: local variable 'predict' referenced before assignment

In [7]:
train_x, train_y, test_x, test_y

(array([[[0.26644644],
         [0.25409867],
         [0.27368954],
         ...,
         [0.15182917],
         [0.        ],
         [0.03566364]],
 
        [[0.25409867],
         [0.27368954],
         [0.25708789],
         ...,
         [0.        ],
         [0.03566364],
         [0.09417563]],
 
        [[0.27368954],
         [0.25708789],
         [0.27655612],
         ...,
         [0.03566364],
         [0.09417563],
         [0.09770137]],
 
        ...,
 
        [[0.52939779],
         [0.53802819],
         [0.34254114],
         ...,
         [0.30555917],
         [0.29904422],
         [0.37123761]],
 
        [[0.53802819],
         [0.34254114],
         [0.38622968],
         ...,
         [0.29904422],
         [0.37123761],
         [0.35762518]],
 
        [[0.34254114],
         [0.38622968],
         [0.29971104],
         ...,
         [0.37123761],
         [0.35762518],
         [0.29242962]]]), array([[0.09417563],
        [0.09770137],
        [0.1

In [6]:
predict_y

array([[8718.45121648],
       [8728.62451018],
       [8782.36954953],
       [8844.87775924],
       [8914.42414313],
       [8978.40756596],
       [9018.62605963],
       [9028.66536323],
       [9002.55657935],
       [8969.40993236],
       [8977.85465266],
       [9007.13261738],
       [9009.16307678],
       [9006.13161878],
       [9003.94562822],
       [8979.31057992],
       [8941.30195086],
       [8932.19474555],
       [8972.16438935],
       [9032.85560619],
       [9093.6736587 ],
       [9134.99914545],
       [9155.25225861],
       [9170.37424302],
       [9186.0360761 ],
       [9181.06631092],
       [9143.82888315],
       [9133.94394424],
       [9159.37367903],
       [9192.42949582],
       [9212.89623105],
       [9203.75449782],
       [9155.03078223],
       [9093.35349779],
       [9048.30894974],
       [9016.53113256],
       [8991.17255328],
       [8945.09061234],
       [8909.11633096],
       [8919.51825547],
       [8977.59273817],
       [9046.597

In [8]:
test_y

array([[0.39018464],
       [0.48992481],
       [0.47796028],
       [0.54440518],
       [0.5674758 ],
       [0.55545762],
       [0.53508496],
       [0.48022136],
       [0.49677701],
       [0.60205873],
       [0.58991791],
       [0.50162874],
       [0.548935  ],
       [0.55062889],
       [0.47513969],
       [0.45573278],
       [0.5355755 ],
       [0.62420192],
       [0.63390537],
       [0.65751251],
       [0.65284474],
       [0.6575585 ],
       [0.6904552 ],
       [0.70719481],
       [0.65397144],
       [0.59041611],
       [0.69808154],
       [0.73628985],
       [0.72207957],
       [0.70730212],
       [0.66254819],
       [0.58840031],
       [0.57376848],
       [0.58469828],
       [0.55273667],
       [0.52612498],
       [0.43219462],
       [0.47072485],
       [0.56338287],
       [0.63656501],
       [0.63864213],
       [0.70645134],
       [0.68538887],
       [0.72782807],
       [0.73122351],
       [0.78317455]])

In [10]:
predict_y/test_y

array([[22344.42437707],
       [17816.25330442],
       [18374.68485314],
       [16246.8654104 ],
       [15708.90628501],
       [16163.98312024],
       [16854.56831739],
       [18801.04927498],
       [18121.92665708],
       [14897.89872012],
       [15218.82024112],
       [17955.77467961],
       [16412.0763689 ],
       [16356.08276964],
       [18950.10214656],
       [19703.0167351 ],
       [16694.75534127],
       [14309.78420974],
       [14153.79137554],
       [13737.921992  ],
       [13929.30530403],
       [13892.29876686],
       [13259.73387797],
       [12967.25326619],
       [14046.54009836],
       [15550.16214926],
       [13098.51128216],
       [12405.364526  ],
       [12684.71509654],
       [12996.46822087],
       [13905.24696754],
       [15641.99465371],
       [15955.96669909],
       [15552.21521272],
       [16370.01761581],
       [17137.62219504],
       [20803.52720178],
       [19002.80106003],
       [15813.60876392],
       [14011.95187681],
