In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from sklearn import preprocessing
from keras.layers.core import Dense, Activation, Dropout
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.layers import LSTM
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error


In [2]:
data = pd.read_csv("../input/batcad/BAT-CAD.csv").dropna().reset_index(drop=True)
data = data.drop(['Dividends', 'Stock Splits'], axis=1, errors='ignore')

print(data.shape)

(1157, 7)


In [3]:
print(data)

            Date      Open      High       Low     Close  Adj Close     Volume
0     2017-06-01  0.226909  0.236655  0.197074  0.231172   0.231172    5911817
1     2017-06-02  0.231375  0.231291  0.207033  0.211127   0.211127    4209596
2     2017-06-03  0.211070  0.249598  0.208587  0.223900   0.223900    6388732
3     2017-06-04  0.223807  0.475217  0.222293  0.385425   0.385425   37225189
4     2017-06-05  0.384724  0.445181  0.363572  0.393013   0.393013   19265746
...          ...       ...       ...       ...       ...        ...        ...
1152  2020-07-27  0.340725  0.343681  0.310723  0.330144   0.330144  203058147
1153  2020-07-28  0.329991  0.340953  0.325217  0.336396   0.336396  169884317
1154  2020-07-29  0.336272  0.341429  0.328961  0.330046   0.330046  185519642
1155  2020-07-30  0.330116  0.337525  0.329529  0.331649   0.331649  204093541
1156  2020-07-31  0.332148  0.337688  0.330745  0.336589   0.336589  195371776

[1157 rows x 7 columns]


In [4]:
def transf_list(lst):
    return [[el] for el in lst]

X = transf_list(data.iloc[:, 1])
y = data.iloc[:, [5]].values.tolist()

min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(X)
y_scaled = min_max_scaler.fit_transform(y)


train_sliced_day = round(len(X) * 0.6)
valid_sliced_day = round(train_sliced_day + len(X) * 0.2)
test_sliced_day = round(valid_sliced_day + len(X) * 0.2)

X_train = x_scaled[0:train_sliced_day]
X_valid = x_scaled[train_sliced_day:valid_sliced_day]
X_test = x_scaled[valid_sliced_day:-1]

y_train = y_scaled[0:train_sliced_day]
y_valid = y_scaled[train_sliced_day:valid_sliced_day]
y_test = y_scaled[valid_sliced_day:-1]

X_train = X_train.reshape(X_train.shape[0],  X_train.shape[1], 1)
X_valid = X_valid.reshape(X_valid.shape[0],  X_valid.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

print(X_train.shape)
print(X_valid.shape)
print(X_test.shape)

(694, 1, 1)
(231, 1, 1)
(231, 1, 1)


In [5]:
def model(X_train, y_train, X_test, y_test,
          unit= 50, dropout_prob=0.2, opt='adam', epochs=10, batch_size=32):

    model = Sequential()

    model.add(LSTM(units = unit, return_sequences = True, input_shape = (X_train.shape[1], 1)))
    model.add(Dropout(dropout_prob))
    model.add(LSTM(units = unit, return_sequences = True))
    model.add(Dropout(dropout_prob))
    model.add(LSTM(units = unit, return_sequences = True))
    model.add(Dropout(dropout_prob))
    model.add(LSTM(units = unit))
    model.add(Dropout(dropout_prob))
    model.add(Dense(units = 1))

    model.compile(optimizer = opt, loss = 'mean_squared_error')

    model.fit(X_train, y_train, epochs = epochs, batch_size = batch_size, verbose=0)

    y_pred = model.predict(X_test)

    accuracy = round(r2_score(y_test,y_pred)*100, 3)
    mse = mean_squared_error(y_test, y_pred)
    
    return accuracy, mse, y_pred

In [6]:
(r2, mse, y_pred) = model(X_train, y_train, X_test, y_test)
print(r2, mse)

89.656 0.00029949344333697777


In [7]:
tuning_units = [10, 50, 100, 150, 200]
tuning_dropout = [0.1, 0.3, 0.5, 0.7, 0.9]


In [8]:
tuning_param = {'Units': [], 'Dropout': [], 'mse': [], 'r2_score': []}

for i in range(len(tuning_units)):
    for j in range(len(tuning_dropout)):
        (r2, mse, _) = model(X_train, y_train, X_valid, y_valid, unit= tuning_units[i], dropout_prob=tuning_dropout[j])
        tuning_param['Units'].append(tuning_units[i])
        tuning_param['Dropout'].append(tuning_dropout[j])
        
        tuning_param['mse'].append(mse)
        tuning_param['r2_score'].append(r2)


In [9]:
tuning_param_df = pd.DataFrame(tuning_param)

In [10]:
print("Best Mean Square Error")
best_mse = tuning_param_df['mse'].argmin()
print("Row ", best_mse)

print("\nBest Accuracy")
best_acc = tuning_param_df['r2_score'].argmax()
print("Row ", best_acc)

print("-----------")

print(tuning_param_df.iloc[[best_mse, best_acc], :])


Best Mean Square Error
Row  16

Best Accuracy
Row  16
-----------
    Units  Dropout       mse  r2_score
16    150      0.3  0.000276    96.652
16    150      0.3  0.000276    96.652


In [11]:
tuning_param_df.iloc[16, :]

Units       150.000000
Dropout       0.300000
mse           0.000276
r2_score     96.652000
Name: 16, dtype: float64

## Best Param : 150 Units, 0.3 Dropout

In [12]:
tuning_epochs = [1, 5, 10, 15, 20]
tuning_batch_size = [1, 16, 32, 64, 128]


In [13]:
tuning_param2 = {'Epochs': [], 'Batch_Size': [], 'mse': [], 'r2_score': []}

for i in range(len(tuning_epochs)):
    for j in range(len(tuning_batch_size)):
        (r2, mse, _) = model(X_train, y_train, X_valid, y_valid, unit=150, dropout_prob=0.3, epochs=tuning_epochs[i], batch_size=tuning_batch_size[j] )
        tuning_param2['Epochs'].append(tuning_epochs[i])
        tuning_param2['Batch_Size'].append(tuning_batch_size[j])

        tuning_param2['mse'].append(mse)
        tuning_param2['r2_score'].append(r2)

        print(i*j)

0
0
0
0
0
0
1
2
3
4
0
2
4
6
8
0
3
6
9
12
0
4
8
12
16


In [14]:
tuning_param_df2 = pd.DataFrame(tuning_param2)

In [15]:
print("Best Mean Square Error")
best_mse2 = tuning_param_df2['mse'].argmin()
print("Row ", best_mse2)

print("\nBest Accuracy")
best_acc2 = tuning_param_df2['r2_score'].argmax()
print("Row ", best_acc2)

print("-----------")

tuning_param_df2.iloc[[best_mse2, best_acc2], :]


Best Mean Square Error
Row  24

Best Accuracy
Row  24
-----------


Unnamed: 0,Epochs,Batch_Size,mse,r2_score
24,20,128,0.000281,96.589
24,20,128,0.000281,96.589


In [17]:
tuning_param_df2.iloc[24, :]


Epochs         20.000000
Batch_Size    128.000000
mse             0.000281
r2_score       96.589000
Name: 24, dtype: float64

## Best param : 20 Epochs, 128 Batch Size

In [18]:
tuning_opt = ['adam', 'sgd', 'rmsprop', 'adagrad', 'adamax']

In [19]:
tuning_param3 = {'Optimizer': [], 'mse': [], 'r2_score': []}

for i in range(len(tuning_opt)):
    (r2, mse, _) = model(X_train, y_train, X_valid, y_valid, unit=150, dropout_prob=0.3, opt=tuning_opt[i], epochs=20, batch_size=128)
    tuning_param3['Optimizer'].append(tuning_opt[i])

    tuning_param3['mse'].append(mse)
    tuning_param3['r2_score'].append(r2)
    print(i)

0
1
2
3
4


In [20]:
tuning_param_df3 = pd.DataFrame(tuning_param3)

In [21]:
print("Best Mean Square Error")
best_mse3 = tuning_param_df3['mse'].argmin()
print("Row ", best_mse3)
tuning_param_df3.iloc[best_mse3, :]

print("\nBest Accuracy")
best_acc3 = tuning_param_df3['r2_score'].argmax()
print("Row ", best_acc3)

print("-----------")

tuning_param_df3.iloc[[best_mse3, best_acc3], :]

Best Mean Square Error
Row  0

Best Accuracy
Row  0
-----------


Unnamed: 0,Optimizer,mse,r2_score
0,adam,0.000324,96.077
0,adam,0.000324,96.077


## Best Param : Adam Optimizer

In [None]:
(_, _, y_pred) = model(X_train, y_train, X_test, y_test, unit=150, dropout_prob=0.3, opt='adam', epochs=20, batch_size=128)

In [None]:
unscaled_y_pred = min_max_scaler.inverse_transform(y_pred)
unscaled_y_test = min_max_scaler.inverse_transform(y_test)

In [None]:
round(r2_score(unscaled_y_test,unscaled_y_pred)*100, 3)

In [None]:
plt.gcf().set_size_inches(20, 10, forward=True)

real_close = plt.plot(unscaled_y_test[:,0], label='Real Close Value')
pred_close = plt.plot(unscaled_y_pred[:,0], label='Predicted Close Value')

plt.legend(['Real Close Value', 'Predicted Close Value'])

plt.show()