In [None]:
import pandas as pd
from keras.layers.core import Dense, Dropout
from keras.layers.recurrent import GRU
from keras.models import Sequential, load_model
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


prices = pd.read_csv('prices_stock.csv', index_col=['date'])

# selecting YHOO stocks
yahoo = prices[prices['symbol']=='YHOO']

# preparing input features
yahoo = yahoo.drop(['symbol'], axis=1)
yahoo = yahoo.drop(['volume'], axis=1)
yahoo = yahoo[['open', 'low', 'high', 'close']]

print(yahoo)

                 open        low       high      close
date                                                  
2010-01-04  16.940001  16.879999  17.200001  17.100000
2010-01-05  17.219999  17.000000  17.230000  17.230000
2010-01-06  17.170000  17.070000  17.299999  17.170000
2010-01-07  16.809999  16.570000  16.900000  16.700001
2010-01-08  16.680000  16.620001  16.760000  16.700001
...               ...        ...        ...        ...
2016-12-23  38.459999  38.369999  38.810001  38.660000
2016-12-27  38.590000  38.500000  39.070000  38.919998
2016-12-28  39.119999  38.709999  39.220001  38.730000
2016-12-29  38.759998  38.480000  38.930000  38.639999
2016-12-30  38.720001  38.430000  39.000000  38.669998

[1762 rows x 4 columns]


In [None]:
# preparing label data
'''該處位移，是將原後一天的價格挪移到當天的收盤價
用意為: 蒐集前一天之價格資訊，以用作預測隔一日之收盤價'''
yahoo_shift = yahoo.shift(-1)
label = yahoo_shift['close']

print(label)

date
2010-01-04    17.230000
2010-01-05    17.170000
2010-01-06    16.700001
2010-01-07    16.700001
2010-01-08    16.740000
                ...    
2016-12-23    38.919998
2016-12-27    38.730000
2016-12-28    38.639999
2016-12-29    38.669998
2016-12-30          NaN
Name: close, Length: 1762, dtype: float64


In [None]:
# adjusting the shape of both
yahoo.drop(yahoo.index[len(yahoo)-1], axis=0, inplace=True) # 將最後一行丟掉
label.drop(label.index[len(label)-1], axis=0, inplace=True) # 將最後一行丟掉

print(yahoo)
print(label)


                 open        low       high      close
date                                                  
2010-01-04  16.940001  16.879999  17.200001  17.100000
2010-01-05  17.219999  17.000000  17.230000  17.230000
2010-01-06  17.170000  17.070000  17.299999  17.170000
2010-01-07  16.809999  16.570000  16.900000  16.700001
2010-01-08  16.680000  16.620001  16.760000  16.700001
...               ...        ...        ...        ...
2016-12-22  38.689999  38.259998  38.790001  38.500000
2016-12-23  38.459999  38.369999  38.810001  38.660000
2016-12-27  38.590000  38.500000  39.070000  38.919998
2016-12-28  39.119999  38.709999  39.220001  38.730000
2016-12-29  38.759998  38.480000  38.930000  38.639999

[1761 rows x 4 columns]
date
2010-01-04    17.230000
2010-01-05    17.170000
2010-01-06    16.700001
2010-01-07    16.700001
2010-01-08    16.740000
                ...    
2016-12-22    38.660000
2016-12-23    38.919998
2016-12-27    38.730000
2016-12-28    38.639999
2016-12-29    3

In [None]:
yahoo = yahoo[yahoo.index < "2010-01-08"]
label = label[label.index < "2010-01-08"]
yahoo

Unnamed: 0_level_0,open,low,high,close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-04,16.940001,16.879999,17.200001,17.1
2010-01-05,17.219999,17.0,17.23,17.23
2010-01-06,17.17,17.07,17.299999,17.17
2010-01-07,16.809999,16.57,16.9,16.700001


In [None]:
# conversion to numpy array
x, y = yahoo.values, label.values

print(yahoo.values)
print(label.values)

[[16.940001 16.879999 17.200001 17.1     ]
 [17.219999 17.       17.23     17.23    ]
 [17.17     17.07     17.299999 17.17    ]
 [16.809999 16.57     16.9      16.700001]]
[17.23     17.17     16.700001 16.700001]


In [None]:
# scaling values for model
x_scale = MinMaxScaler()
y_scale = MinMaxScaler()

X = x_scale.fit_transform(x)
Y = y_scale.fit_transform(y.reshape(-1,1)) # 除了MinMax外， 將原y轉正

In [None]:
# splitting train and test
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33)
X_train

array([[1.        , 0.86      , 0.82500206, 1.        ],
       [0.        , 0.        , 0.        , 0.        ]])

In [None]:
X_train = X_train.reshape((-1,1,4))
X_test = X_test.reshape((-1,1,4))
X_train

array([[[1.        , 0.63158089, 0.299996  , 1.        ]],

       [[0.        , 0.        , 0.        , 0.        ]]])

In [None]:


print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)

# # creating model using Keras
# # tf.reset_default_graph()

# model_name = 'stock_price_GRU'

# model = Sequential()
# model.add(GRU(units=512,
#               return_sequences=True,
#               input_shape=(1, 4)))
# model.add(Dropout(0.2))
# model.add(GRU(units=256))
# model.add(Dropout(0.2))
# model.add(Dense(1, activation='sigmoid'))
# model.compile(loss='mse', optimizer='adam')

# # model = load_model("{}.h5".format(model_name))
# # print("MODEL-LOADED")

# model.fit(X_train,y_train,batch_size=250, epochs=500, validation_split=0.1, verbose=1)
# model.save("{}.h5".format(model_name))
# print('MODEL-SAVED')

# score = model.evaluate(X_test, y_test)
# print('Score: {}'.format(score))
# yhat = model.predict(X_test)
# yhat = y_scale.inverse_transform(yhat)
# y_test = y_scale.inverse_transform(y_test)
# plt.plot(yhat[-100:], label='Predicted')
# plt.plot(y_test[-100:], label='Ground Truth')
# plt.legend()
# plt.show()

X_train shape: (2, 1, 4)
y_train shape: (2, 1)
X_test shape: (1, 1, 4)
