In [23]:
import pandas_datareader as pdr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import StandardScaler

In [2]:
rawData = pdr.DataReader("SI=F", 
                       start='2000-1-1', 
                       end='2020-4-30', 
                       data_source='yahoo')

rawData.describe()

Unnamed: 0,High,Low,Open,Close,Volume,Adj Close
count,5048.0,5048.0,5048.0,5048.0,5048.0,5048.0
mean,15.133639,14.924146,15.035435,15.029906,150599.1,15.029906
std,8.557144,8.358698,8.464873,8.457472,2274977.0,8.457472
min,4.026,4.026,4.026,4.026,0.0,4.026
25%,7.18375,7.13625,7.1615,7.16475,3.0,7.16475
50%,15.256,15.0875,15.185,15.191,31.0,15.191
75%,18.13925,18.00525,18.09025,18.07075,134.0,18.07075
max,49.52,47.540001,48.459999,48.584,69801560.0,48.584


In [3]:
rawData['O-C'] = rawData['Open'] - rawData['Close']

trend = []
for i in range(len(rawData)):
    if rawData['O-C'][i] >= 0:
        trend.append(1)
    else:
        trend.append(0)
rawData['Trend'] = trend
rawData.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,O-C,Trend
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2000-02-28,5.095,5.02,5.045,5.048,14203.0,5.048,-0.003,0
2000-02-29,5.09,5.035,5.065,5.048,2830.0,5.048,0.017,1
2000-03-01,5.13,5.05,5.07,5.073,511.0,5.073,-0.003,0
2000-03-02,5.105,5.0,5.105,5.008,645.0,5.008,0.097,1
2000-03-03,5.11,5.0,5.02,5.099,307.0,5.099,-0.079,0


In [10]:
inputs = rawData[['High', 'Low', 'Open', 'Close', 'Volume']][ 0 : len(rawData) - 1 ].to_numpy()
targets = rawData[['Trend']][ 1 : len(rawData) ].to_numpy()

scaler = StandardScaler()
scaler.fit(inputs)
scaled_inputs = scaler.transform(inputs)
scaled_inputs

array([[-1.17311205e+00, -1.18489275e+00, -1.18021151e+00,
        -1.18024045e+00, -5.99626200e-02],
       [-1.17369632e+00, -1.18309823e+00, -1.17784880e+00,
        -1.18024045e+00, -6.49617926e-02],
       [-1.16902186e+00, -1.18130365e+00, -1.17725811e+00,
        -1.17728447e+00, -6.59811438e-02],
       ...,
       [ 3.64047027e-02,  2.16347859e-02,  4.42608575e-02,
         2.66215143e-02,  6.31480089e+00],
       [ 2.29656542e-02,  1.89487455e-03,  7.04828735e-03,
         2.01183522e-02,  5.96140470e+00],
       [ 5.51025286e-02,  3.71874606e-02,  3.48100425e-02,
         3.52529305e-02, -4.84288744e-02]])

In [12]:
sum_uptrend = int(np.sum(targets))

sum_downtrend = 0

unnecessary_indices = []

for i in range(len(targets)):
    if targets[i] == 0:
        sum_downtrend += 1
        if sum_downtrend > sum_uptrend:
            unnecessary_indices.append(i)

extracted_inputs = np.delete( scaled_inputs, unnecessary_indices, axis = 0 )
extracted_targets = np.delete( targets, unnecessary_indices, axis = 0 )
print(len(extracted_inputs), len(extracted_targets), unnecessary_indices)

5047 5047 []


In [17]:
shuffle_value = np.arange(len(extracted_inputs))
np.random.shuffle(shuffle_value)

shuffled_inputs = extracted_inputs[shuffle_value]
shuffled_targets = extracted_targets[shuffle_value]
shuffled_inputs

array([[-0.16284419, -0.19670062, -0.1530251 , -0.19353891, -0.06572971],
       [-0.88072448, -0.87659114, -0.87873022, -0.87885009, -0.06618818],
       [-1.23960623, -1.24399285, -1.24152374, -1.24196101, -0.06620444],
       ...,
       [-1.22394681, -1.23513982, -1.22687495, -1.22895474, -0.06292924],
       [-1.19882157, -1.2043934 , -1.20029452, -1.20282403, -0.06620356],
       [ 0.28882558,  0.28064652,  0.27521536,  0.26865603, -0.0660207 ]])

In [18]:
total_samples = len(shuffled_inputs)

train_samples = int( 0.8 * total_samples )
validation_samples = int( 0.1 * total_samples )
test_samples = total_samples - ( train_samples + validation_samples )

train_inputs = shuffled_inputs[ :  train_samples]
train_targets = shuffled_targets[ : train_samples]

validation_inputs = shuffled_inputs[ train_samples : train_samples + validation_samples ]
validation_targets = shuffled_targets[ train_samples : train_samples + validation_samples ]

test_inputs = shuffled_inputs[ train_samples + validation_samples : ]
test_targets = shuffled_targets[ train_samples + validation_samples : ]

print( np.sum(train_targets), train_samples, np.sum(train_targets) /  train_samples )
print( np.sum(validation_targets), validation_samples, np.sum(validation_targets) /  validation_samples )
print( np.sum(test_targets), test_samples, np.sum(test_targets) /  test_samples )

2703 4037 0.6695566014367105
350 504 0.6944444444444444
336 506 0.6640316205533597


In [19]:
np.savez('silver_train', inputs=train_inputs, targets=train_targets)
np.savez('silver_validation', inputs=validation_inputs, targets=validation_targets)
np.savez('silver_test', inputs=test_inputs, targets=test_targets)

In [20]:
npz_trn = np.load('silver_train.npz')
trn_inputs, trn_targets = npz_trn['inputs'].astype(np.float), npz_trn['targets'].astype(np.int)

npz_val = np.load('silver_validation.npz')
val_inputs, val_targets = npz_val['inputs'].astype(np.float), npz_val['targets'].astype(np.int)

npz_tst = np.load('silver_test.npz')
tst_inputs, tst_targets = npz_tst['inputs'].astype(np.float), npz_tst['targets'].astype(np.int)

In [21]:
trn_inputs

array([[-0.16284419, -0.19670062, -0.1530251 , -0.19353891, -0.06572971],
       [-0.88072448, -0.87659114, -0.87873022, -0.87885009, -0.06618818],
       [-1.23960623, -1.24399285, -1.24152374, -1.24196101, -0.06620444],
       ...,
       [ 0.53306618,  0.56262827,  0.54243753,  0.54356099, -0.06619389],
       [ 0.77905963,  0.80465146,  0.78142511,  0.7892608 , -0.06619126],
       [ 0.24231483,  0.19151771,  0.17598169,  0.25742344, -0.06619345]])

In [31]:
input_size = 5
output_size = 2

hidden_layer_size = 100

model = tf.keras.Sequential([
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), 
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), 
    tf.keras.layers.Dense(output_size, activation='softmax') 
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

batch_size = 100

epochs = 100

early_stopping = tf.keras.callbacks.EarlyStopping(patience=2)


model.fit(trn_inputs,
          trn_targets,
          batch_size=batch_size, 
          epochs=epochs,
          callbacks = [early_stopping],
          validation_data=(val_inputs, val_targets),
          verbose = 2 
          )  

Train on 4037 samples, validate on 504 samples
Epoch 1/100
4037/4037 - 1s - loss: 0.6487 - accuracy: 0.6544 - val_loss: 0.6184 - val_accuracy: 0.6944
Epoch 2/100
4037/4037 - 0s - loss: 0.6322 - accuracy: 0.6678 - val_loss: 0.6036 - val_accuracy: 0.6944
Epoch 3/100
4037/4037 - 0s - loss: 0.6269 - accuracy: 0.6661 - val_loss: 0.5994 - val_accuracy: 0.6984
Epoch 4/100
4037/4037 - 0s - loss: 0.6251 - accuracy: 0.6681 - val_loss: 0.5999 - val_accuracy: 0.6944
Epoch 5/100
4037/4037 - 0s - loss: 0.6251 - accuracy: 0.6693 - val_loss: 0.5992 - val_accuracy: 0.7004
Epoch 6/100
4037/4037 - 0s - loss: 0.6262 - accuracy: 0.6686 - val_loss: 0.6012 - val_accuracy: 0.6925
Epoch 7/100
4037/4037 - 0s - loss: 0.6252 - accuracy: 0.6710 - val_loss: 0.5990 - val_accuracy: 0.7044
Epoch 8/100
4037/4037 - 0s - loss: 0.6268 - accuracy: 0.6646 - val_loss: 0.6010 - val_accuracy: 0.6984
Epoch 9/100
4037/4037 - 0s - loss: 0.6270 - accuracy: 0.6661 - val_loss: 0.6022 - val_accuracy: 0.6964


<tensorflow.python.keras.callbacks.History at 0x21be9a93408>

In [32]:
test_loss, test_accuracy = model.evaluate(tst_inputs, tst_targets)

