# Part 1 - Data Preprocessing

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Load data 

In [3]:
dataset = pd.read_csv('DJI.csv', header=0)
dataset = dataset.dropna()
dataset

Unnamed: 0,Date,Open,High,Low,Close,Adj Close
0,2004/1/2,10452.74023,10527.03027,10384.29981,10409.84961,10409.84961
1,2004/1/5,10411.84961,10544.07031,10411.84961,10544.07031,10544.07031
2,2004/1/6,10543.84961,10549.17969,10499.84961,10538.66016,10538.66016
3,2004/1/7,10535.45996,10539.45996,10466.29004,10529.03027,10529.03027
4,2004/1/8,10530.07031,10592.58984,10530.07031,10592.44043,10592.44043
5,2004/1/9,10589.25000,10589.25000,10448.66992,10458.88965,10458.88965
6,2004/1/12,10461.54981,10491.62988,10444.15039,10485.17969,10485.17969
7,2004/1/13,10485.17969,10509.84961,10367.41016,10427.17969,10427.17969
8,2004/1/14,10428.66992,10548.50977,10428.66992,10538.37012,10538.37012
9,2004/1/15,10534.51953,10592.74023,10477.17969,10553.84961,10553.84961


Technical indicator (KD)

In [4]:
# RSV
RSV = 100* ((dataset['Adj Close'] - dataset['Low'].rolling(window=9).min() ) /
            (dataset['High'].rolling(window=9).max()-dataset['Low'].rolling(window=9).min()))
RSV.fillna(method='bfill', inplace=True)
dataset['rsv']=round(RSV,3)

# K-value
dataset['k'] = 0
# print(file.head())
for idx, row in dataset.iterrows(): 
    # idx是在row裡  row在file裡 用file.interrows() 拆成一列一列
 iRSV = row['rsv']
 if idx == 0:
  K=(RSV[0]*1/3)+(50*2/3)     #將第一天的k值預設為50
 else:
  bk = dataset.loc[idx - 1, 'k'] #取前一個k值 #取某一欄或某一列
  K=(iRSV*1/3)+(bk*2/3)
 dataset.loc[idx, 'k'] = round(K,3)

# D-value
dataset['d'] = 0
K = dataset['k']
for idx, row in dataset.iterrows():
 iK = row['k']
 if idx == 0:
  D=(K[0]*1/3)+(50*2/3)
 else:
  bd = dataset.loc[idx - 1, 'd']
  D=(iK*1/3)+(bd*2/3)
 dataset.loc[idx, 'd'] = round(D,3)

# Golden cross or Death cross
dataset['gx'] = np.where(dataset['k'] > dataset['d'], 1, 0)
dataset

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,rsv,k,d,gx
0,2004/1/2,10452.74023,10527.03027,10384.29981,10409.84961,10409.84961,75.922,58.641,52.880,1
1,2004/1/5,10411.84961,10544.07031,10411.84961,10544.07031,10544.07031,75.922,64.401,56.720,1
2,2004/1/6,10543.84961,10549.17969,10499.84961,10538.66016,10538.66016,75.922,68.241,60.560,1
3,2004/1/7,10535.45996,10539.45996,10466.29004,10529.03027,10529.03027,75.922,70.801,63.974,1
4,2004/1/8,10530.07031,10592.58984,10530.07031,10592.44043,10592.44043,75.922,72.508,66.819,1
5,2004/1/9,10589.25000,10589.25000,10448.66992,10458.88965,10458.88965,75.922,73.646,69.095,1
6,2004/1/12,10461.54981,10491.62988,10444.15039,10485.17969,10485.17969,75.922,74.405,70.865,1
7,2004/1/13,10485.17969,10509.84961,10367.41016,10427.17969,10427.17969,75.922,74.911,72.214,1
8,2004/1/14,10428.66992,10548.50977,10428.66992,10538.37012,10538.37012,75.922,75.248,73.225,1
9,2004/1/15,10534.51953,10592.74023,10477.17969,10553.84961,10553.84961,82.741,77.746,74.732,1


In [5]:
X_train = dataset.iloc[:3776, 5:-1].values
y_train = dataset.iloc[:3776, -1].values
X_train, y_train = np.array(X_train),  np.array(y_train)

Split dataset into training and test set

In [414]:
#from sklearn.model_selection import train_test_split
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

Feature scaling

In [6]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
#X_test = sc.transform(X_test)

In [7]:
pd.DataFrame(y_train)

Unnamed: 0,0
0,1
1,1
2,1
3,1
4,1
5,1
6,1
7,1
8,1
9,1


Creating a data structure with timesteps and one output

In [417]:
#X_train = []
# 60 previous stock prices
#y_train = []
#for i in range(60, 4810):
#    X_train.append(dataset_train_scaled[i-60:i, 0]) 
    # get the 60 previous stock prices
#    y_train.append(dataset_train_scaled[i, 0])
#X_train, y_train = np.array(X_train),  np.array(y_train)

Reshaping - 3 dimensions

In [418]:
X_train = np.reshape(X_train, (X_train.shape[0], 
                               X_train.shape[1], 
                               1))

#X_test = np.reshape(X_test,   (X_test.shape[0], 
#                               X_test.shape[1], 
#                               1))

# Part 2 - Building the RNN

In [419]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

Initializing the RNN

In [420]:
regressor = Sequential()

Adding the first LSTM layer and also some Dropout regularisation

In [421]:
regressor.add(LSTM(units = 50, 
                   return_sequences = True, 
                   input_shape = (X_train.shape[1], 1)))
regressor.add(Dropout(0.2))

Adding the second LSTM layer and also some Dropout regularisation

In [422]:
regressor.add(LSTM(units = 50, 
                   return_sequences = True))
regressor.add(Dropout(0.2))

Adding the third LSTM layer and also some Dropout regularisation

In [423]:
regressor.add(LSTM(units = 50, 
                   return_sequences = True))
regressor.add(Dropout(0.2))

Adding the fourth LSTM layer and also some Dropout regularisation

In [424]:
regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.2))

Adding the output layer

In [425]:
regressor.add(Dense(units = 1))

Compiling the RNN

In [426]:
regressor.compile(optimizer = 'adam',
                  loss = 'mean_squared_error')

Fitting the RNN to the training set

In [427]:
regressor.fit(X_train, y_train, epochs = 100, batch_size = 32, verbose =1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x1a5af6e1d0>

Evaluate the regressor

In [None]:
#X_test = np.reshape(X_test,   (X_test.shape[0], 
#                               X_test.shape[1],
#                               1))

In [436]:
#loss, accuracy = regressor.evaluate(X_test, y_test, verbose=0)

# Part 3 - Making the predictions 

In [430]:
X_test = dataset.iloc[3776:, 5:-1].values
y_test = dataset.iloc[3776:, -1].values

X_test = np.reshape(X_test,   (X_test.shape[0], 
                               X_test.shape[1],
                               1))

In [433]:
y_pred = regressor.predict(X_test)
pd.DataFrame(y_pred)

Unnamed: 0,0
0,-0.000891
1,-0.001046
2,-0.001146
3,-0.001252
4,-0.001362
5,-0.00142
6,-0.001417
7,-0.001412
8,-0.001413
9,-0.00141


In [434]:
y_pred = (y_pred > 1)
pd.DataFrame(y_pred)

Unnamed: 0,0
0,False
1,False
2,False
3,False
4,False
5,False
6,False
7,False
8,False
9,False
