In [1]:
import pandas_datareader as pdr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import StandardScaler

In [2]:
rawData = pdr.DataReader("SI=F", 
                       start='2000-1-1', 
                       end='2020-5-10', 
                       data_source='yahoo')

rawData.describe()

Unnamed: 0,High,Low,Open,Close,Volume,Adj Close
count,5056.0,5056.0,5056.0,5056.0,5056.0,5056.0
mean,15.134165,14.924315,15.035751,15.030289,166987.4,15.030289
std,8.550389,8.352095,8.458185,8.450795,2351958.0,8.450795
min,4.026,4.026,4.026,4.026,0.0,4.026
25%,7.1865,7.14,7.16975,7.1705,3.0,7.1705
50%,15.256,15.0775,15.1825,15.1855,31.0,15.1855
75%,18.13225,18.001,18.08775,18.068001,135.0,18.068001
max,49.52,47.540001,48.459999,48.584,69801560.0,48.584


In [3]:
rawData['O-C'] = rawData['Open'] - rawData['Close']

trend = []
for i in range(len(rawData)):
    if rawData['O-C'][i] >= 0:
        trend.append(1)
    else:
        trend.append(0)
rawData['Trend'] = trend

rawData['0.5(O+C)'] = ( rawData['Open'] + rawData['Close'] ) * 0.5
rawData['0.5(H+L)'] = ( rawData['High'] + rawData['Low'] ) * 0.5
rawData['AvOC-HL'] = rawData['0.5(O+C)'] + rawData['0.5(H+L)'] * 0.5
rawData.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,O-C,Trend,0.5(O+C),0.5(H+L),AvOC-HL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2000-02-28,5.095,5.02,5.045,5.048,14203.0,5.048,-0.003,0,5.0465,5.0575,7.57525
2000-02-29,5.09,5.035,5.065,5.048,2830.0,5.048,0.017,1,5.0565,5.0625,7.58775
2000-03-01,5.13,5.05,5.07,5.073,511.0,5.073,-0.003,0,5.0715,5.09,7.6165
2000-03-02,5.105,5.0,5.105,5.008,645.0,5.008,0.097,1,5.0565,5.0525,7.58275
2000-03-03,5.11,5.0,5.02,5.099,307.0,5.099,-0.079,0,5.0595,5.055,7.587


In [46]:
rawData[1:6].mean().to_numpy()

array([5.10500011e+00, 5.02700005e+00, 5.07000008e+00, 5.05619993e+00,
       8.86600000e+02, 5.05619993e+00, 1.38001442e-02, 6.00000000e-01,
       5.06310000e+00, 5.06600008e+00, 7.59610004e+00])

In [47]:
top5 = []
p = 0
h = 0
for i in range(len(rawData)):
    p += 1
    for x in range(5):
        h = x+p
    top5.append(rawData[i:h].mean().to_numpy())

top5[1]

array([5.10500011e+00, 5.02700005e+00, 5.07000008e+00, 5.05619993e+00,
       8.86600000e+02, 5.05619993e+00, 1.38001442e-02, 6.00000000e-01,
       5.06310000e+00, 5.06600008e+00, 7.59610004e+00])

In [4]:
inputs = rawData[['High', 'Low', 'Open', 'Close']][ 0 : len(rawData) - 1 ].to_numpy()
inputs2 = rawData[['High', 'Low','Volume']][ 0 : len(rawData) - 1 ].to_numpy()
inputs3 = rawData['Volume'][ 0 : len(rawData) - 1 ].to_numpy()

targets = rawData[['Trend']][ 1 : len(rawData) ].to_numpy()

scaler = StandardScaler()
scaler.fit(inputs)
# scaler.fit(inputs2)
# scaler.fit(inputs3.reshape(-1,1))

scaled_inputs = scaler.transform(inputs)
# scaled_inputs2 = scaler.transform(inputs2)
# scaled_inputs3 = scaler.transform(inputs3.reshape(-1,1))

scaled_inputs2

array([[-1.17410196, -1.18582945, -1.18117726, -1.18120675],
       [-1.17468668, -1.18403351, -1.17881269, -1.18120675],
       [-1.17000853, -1.18223751, -1.17822153, -1.17824843],
       ...,
       [ 0.02993774, -0.01007535,  0.02416488, -0.0029733 ],
       [ 0.06560859,  0.01207474,  0.0052483 ,  0.06388433],
       [ 0.09894041,  0.06954547,  0.08327926,  0.08814242]])

In [5]:
sum_uptrend = int(np.sum(targets))

sum_downtrend = 0

unnecessary_indices = []

for i in range(len(targets)):
    if targets[i] == 0:
        sum_downtrend += 1
        if sum_downtrend > sum_uptrend:
            unnecessary_indices.append(i)

extracted_inputs = np.delete( scaled_inputs, unnecessary_indices, axis = 0 )
extracted_targets = np.delete( targets, unnecessary_indices, axis = 0 )
print(len(extracted_inputs), len(extracted_targets), unnecessary_indices)

5055 5055 []


In [6]:
shuffle_value = np.arange(len(extracted_inputs))
np.random.shuffle(shuffle_value)

shuffled_inputs = extracted_inputs[shuffle_value]
shuffled_targets = extracted_targets[shuffle_value]
shuffled_inputs

array([[-0.12502616, -0.11579748, -0.12243875, -0.12698545],
       [-1.25433226, -1.25898483, -1.25637077, -1.256821  ],
       [-1.22497686, -1.23910957, -1.23615365, -1.2274746 ],
       ...,
       [-1.18684989, -1.18990028, -1.18815276, -1.18854332],
       [ 0.22583542,  0.21202478,  0.19618771,  0.22327778],
       [-1.04018983, -1.05652046, -1.0422585 , -1.04749138]])

In [7]:
total_samples = len(shuffled_inputs)

train_samples = int( 0.8 * total_samples )
validation_samples = int( 0.1 * total_samples )
test_samples = total_samples - ( train_samples + validation_samples )

train_inputs = shuffled_inputs[ :  train_samples]
train_targets = shuffled_targets[ : train_samples]

validation_inputs = shuffled_inputs[ train_samples : train_samples + validation_samples ]
validation_targets = shuffled_targets[ train_samples : train_samples + validation_samples ]

test_inputs = shuffled_inputs[ train_samples + validation_samples : ]
test_targets = shuffled_targets[ train_samples + validation_samples : ]

print( np.sum(train_targets), train_samples, np.sum(train_targets) /  train_samples )
print( np.sum(validation_targets), validation_samples, np.sum(validation_targets) /  validation_samples )
print( np.sum(test_targets), test_samples, np.sum(test_targets) /  test_samples )

2699 4044 0.667408506429278
338 505 0.6693069306930693
356 506 0.7035573122529645


In [8]:
np.savez('silver_train', inputs=train_inputs, targets=train_targets)
np.savez('silver_validation', inputs=validation_inputs, targets=validation_targets)
np.savez('silver_test', inputs=test_inputs, targets=test_targets)

In [9]:
npz_trn = np.load('silver_train.npz')
trn_inputs, trn_targets = npz_trn['inputs'].astype(np.float), npz_trn['targets'].astype(np.int)

npz_val = np.load('silver_validation.npz')
val_inputs, val_targets = npz_val['inputs'].astype(np.float), npz_val['targets'].astype(np.int)

npz_tst = np.load('silver_test.npz')
tst_inputs, tst_targets = npz_tst['inputs'].astype(np.float), npz_tst['targets'].astype(np.int)

In [10]:
train_inputs

array([[-0.12502616, -0.11579748, -0.12243875, -0.12698545],
       [-1.25433226, -1.25898483, -1.25637077, -1.256821  ],
       [-1.22497686, -1.23910957, -1.23615365, -1.2274746 ],
       ...,
       [ 0.12350083,  0.13659463,  0.12170369,  0.13322703],
       [ 0.07554967,  0.1012741 ,  0.0880084 ,  0.08329087],
       [ 1.3667199 ,  1.33449876,  1.33413949,  1.38612798]])

In [11]:
input_size = 5
output_size = 2

hidden_layer_size = 100

model = tf.keras.Sequential([
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), 
    tf.keras.layers.Dense(output_size, activation='softmax') 
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

batch_size = 100

epochs = 50

model.fit(train_inputs,
          trn_targets,
          batch_size=batch_size, 
          epochs=epochs,
          validation_data=(val_inputs, val_targets),
          verbose = 2 
          )  

Train on 4044 samples, validate on 505 samples
Epoch 1/50
4044/4044 - 1s - loss: 0.6783 - accuracy: 0.6098 - val_loss: 0.6346 - val_accuracy: 0.6693
Epoch 2/50
4044/4044 - 0s - loss: 0.6331 - accuracy: 0.6664 - val_loss: 0.6285 - val_accuracy: 0.6693
Epoch 3/50
4044/4044 - 0s - loss: 0.6291 - accuracy: 0.6682 - val_loss: 0.6263 - val_accuracy: 0.6673
Epoch 4/50
4044/4044 - 0s - loss: 0.6270 - accuracy: 0.6696 - val_loss: 0.6248 - val_accuracy: 0.6713
Epoch 5/50
4044/4044 - 0s - loss: 0.6269 - accuracy: 0.6686 - val_loss: 0.6248 - val_accuracy: 0.6693
Epoch 6/50
4044/4044 - 0s - loss: 0.6262 - accuracy: 0.6674 - val_loss: 0.6250 - val_accuracy: 0.6693
Epoch 7/50
4044/4044 - 0s - loss: 0.6265 - accuracy: 0.6644 - val_loss: 0.6247 - val_accuracy: 0.6614
Epoch 8/50
4044/4044 - 0s - loss: 0.6259 - accuracy: 0.6667 - val_loss: 0.6246 - val_accuracy: 0.6673
Epoch 9/50
4044/4044 - 0s - loss: 0.6255 - accuracy: 0.6686 - val_loss: 0.6241 - val_accuracy: 0.6673
Epoch 10/50
4044/4044 - 0s - loss: 

<tensorflow.python.keras.callbacks.History at 0x2a25a677ec8>

In [12]:
test_loss, test_accuracy = model.evaluate(tst_inputs, tst_targets)

