In [30]:
# Import modules and packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
from datetime import datetime

from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard

%matplotlib inline

In [31]:
# Importing Training Set
dataset_train = pd.read_csv('TCS.NS.csv')

# Select features (columns) to be involved intro training and predictions
cols = list(dataset_train)[1:6]

# Extract dates (will be used in visualization)
datelist_train = list(dataset_train['Date'])
datelist_train = [dt.datetime.strptime(date, '%Y-%m-%d').date() for date in datelist_train]

print('Training set shape == {}'.format(dataset_train.shape))
print('All timestamps == {}'.format(len(datelist_train)))
print('Featured selected: {}'.format(cols))

Training set shape == (1515, 7)
All timestamps == 1515
Featured selected: ['Open', 'High', 'Low', 'Close', 'Adj Close']


In [32]:
dataset_train = dataset_train[cols].astype(str)
for i in cols:
    for j in range(0, len(dataset_train)):
        dataset_train[i][j] = dataset_train[i][j].replace(',', '')

dataset_train = dataset_train.astype(float)

# Using multiple features (predictors)
training_set = dataset_train.values

print('Shape of training set == {}.'.format(training_set.shape))
training_set

Shape of training set == (1515, 5).


array([[1219.5     , 1219.5     , 1206.125   , 1208.199951, 1075.699341],
       [1205.074951, 1207.      , 1183.025024, 1184.800049, 1054.865723],
       [1192.5     , 1193.300049, 1170.5     , 1174.474976, 1045.672974],
       ...,
       [3786.      , 3835.      , 3748.      , 3817.800049, 3817.800049],
       [3844.      , 3854.100098, 3806.      , 3813.100098, 3813.100098],
       [3825.      , 3835.      , 3779.      , 3784.199951, 3784.199951]])

In [33]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
training_set_scaled = sc.fit_transform(training_set)

sc_predict = StandardScaler()
sc_predict.fit_transform(training_set[:, 0:1])

array([[-1.02533526],
       [-1.04349259],
       [-1.05932114],
       ...,
       [ 2.20521124],
       [ 2.27821794],
       [ 2.25430195]])

In [34]:
# Creating a data structure with 90 timestamps and 1 output
X_train = []
y_train = []

n_future = 60   # Number of days we want top predict into the future
n_past = 90     # Number of past days we want to use to predict the future

for i in range(n_past, len(training_set_scaled) - n_future +1):
    X_train.append(training_set_scaled[i - n_past:i, 0:dataset_train.shape[1] - 1])
    y_train.append(training_set_scaled[i + n_future - 1:i + n_future, 0])

X_train, y_train = np.array(X_train), np.array(y_train)

print('X_train shape == {}.'.format(X_train.shape))
print('y_train shape == {}.'.format(y_train.shape))

X_train shape == (1366, 90, 4).
y_train shape == (1366, 1).


<h2>PART 2. Create a model. Training</h2>

<h3>Step #3. Building the LSTM based Neural Network</h3>

In [35]:
# Import Libraries and packages from Keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dropout
from tensorflow.keras.optimizers import Adam

In [36]:
# Initializing the Neural Network based on LSTM
model = Sequential()

# Adding 1st LSTM layer
model.add(LSTM(units=64, return_sequences=True, input_shape=(n_past, dataset_train.shape[1]-1)))

# Adding 2nd LSTM layer
model.add(LSTM(units=10, return_sequences=False))

# Adding Dropout
model.add(Dropout(0.25))

# Output layer
model.add(Dense(units=1, activation='linear'))

# Compiling the Neural Network
model.compile(optimizer = Adam(learning_rate=0.01), loss='mean_squared_error')

<h3>Step #4. Start training</h3>

In [37]:
%%time
es = EarlyStopping(monitor='val_loss', min_delta=1e-10, patience=10, verbose=1)
rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1)
mcp = ModelCheckpoint(filepath='weights.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True)

tb = TensorBoard('logs')

history = model.fit(X_train, y_train, shuffle=True, epochs=30, callbacks=[es, rlr, mcp, tb], validation_split=0.2, verbose=1, batch_size=256)

Epoch 1/30

Epoch 00001: val_loss improved from inf to 0.59595, saving model to weights.h5
Epoch 2/30

Epoch 00002: val_loss did not improve from 0.59595
Epoch 3/30

Epoch 00003: val_loss improved from 0.59595 to 0.21331, saving model to weights.h5
Epoch 4/30

Epoch 00004: val_loss did not improve from 0.21331
Epoch 5/30

Epoch 00005: val_loss improved from 0.21331 to 0.07936, saving model to weights.h5
Epoch 6/30

Epoch 00006: val_loss did not improve from 0.07936
Epoch 7/30

Epoch 00007: val_loss did not improve from 0.07936
Epoch 8/30

Epoch 00008: val_loss did not improve from 0.07936
Epoch 9/30

Epoch 00009: val_loss did not improve from 0.07936
Epoch 10/30

Epoch 00010: val_loss did not improve from 0.07936
Epoch 11/30

Epoch 00011: val_loss did not improve from 0.07936
Epoch 12/30

Epoch 00012: val_loss did not improve from 0.07936
Epoch 13/30

Epoch 00013: val_loss did not improve from 0.07936
Epoch 14/30

Epoch 00014: val_loss did not improve from 0.07936
Epoch 15/30

Epoch 00

# Hypertuning

In [38]:
#pip install -q -U keras-tuner

In [39]:
import keras_tuner as kt
import tensorflow

In [40]:
def model_builder(hp):
    model = Sequential()
    hp_units = hp.Int('units', min_value=32, max_value=512, step=32)
    model.add(LSTM(units=64, return_sequences=True, input_shape=(n_past, dataset_train.shape[1]-1)))
    model.add(LSTM(units=10, return_sequences=False))
    model.add(Dropout(0.25))
    model.add(Dense(units=1, activation='linear'))
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    model.compile(optimizer = Adam(learning_rate=hp_learning_rate), loss='mean_squared_error',metrics=tensorflow.keras.metrics.RootMeanSquaredError())
    
    return model

In [41]:
tuner1 = kt.Hyperband(hypermodel=model_builder,
                     objective='val_loss',
                     max_epochs=100,
                     factor=3,
                     hyperband_iterations=10,
                     directory='abc',
                     overwrite=True)

In [42]:
stop_early = tensorflow.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

In [48]:
tuner1.search(X_train,y_train, epochs=5,validation_split=0.2)

Trial 16 Complete [00h 00m 20s]
val_loss: 0.10821513831615448

Best val_loss So Far: 0.07485309988260269
Total elapsed time: 00h 18m 34s
INFO:tensorflow:Oracle triggered exit


In [56]:
tuner1.search_space_summary()

Search space summary
Default search space size: 1
units (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}


In [57]:
best=tuner1.get_best_hyperparameters(num_trials=3)[0]

In [58]:
best.get('units')

96

In [59]:
best.get('learning_rate')

KeyError: 'learning_rate does not exist.'

In [54]:
# Get the optimal hyperparameters
best_hps=tuner1.get_best_hyperparameters(num_trials=10)[0]

print(f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is {best_hps.get('units')} and the optimal learning rate for the optimizer
is .
""")


The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is 96 and the optimal learning rate for the optimizer
is .



<p>
Notes:<br>
<ul>
<li><b>EarlyStopping</b> - Stop training when a monitored metric has stopped improving.</li>
<li><code>monitor</code> - quantity to be monitored.</li>
<li><code>min_delta</code> - minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than <code>min_delta</code>, will count as no improvement.</li>
<li><code>patience</code> - number of epochs with no improvement after which training will be stopped.</li>
</ul>

<ul>
<li><b>ReduceLROnPlateau</b> - Reduce learning rate when a metric has stopped improving.</li>
<li><code>factor</code> - factor by which the learning rate will be reduced. <code>new_lr = lr * factor</code>.</li>
</ul>
</p>

<hr>

<p>
The last date for our training set is <code>30-Dec-2016</code>.<br>
</p>

<p>
We will perform predictions for the next <b>20</b> days, since <b>2017-01-01</b> to <b>2017-01-20</b>.
</p>

<h2>PART 3. Make future predictions</h2>

In [None]:
# Generate list of sequence of days for predictions
datelist_future = pd.date_range(datelist_train[-1], periods=n_future, freq='1d').tolist()

'''
Remeber, we have datelist_train from begining.
'''

# Convert Pandas Timestamp to Datetime object (for transformation) --> FUTURE
datelist_future_ = []
for this_timestamp in datelist_future:
    datelist_future_.append(this_timestamp.date())

<h3>Step #5. Make predictions for future dates</h3>

In [None]:
# Perform predictions
predictions_future = model.predict(X_train[-n_future:])

predictions_train = model.predict(X_train[n_past:])

In [None]:
# Inverse the predictions to original measurements

# ---> Special function: convert <datetime.date> to <Timestamp>
def datetime_to_timestamp(x):
    '''
        x : a given datetime value (datetime.date)
    '''
    return datetime.strptime(x.strftime('%Y%m%d'), '%Y%m%d')


y_pred_future = sc_predict.inverse_transform(predictions_future)
y_pred_train = sc_predict.inverse_transform(predictions_train)

PREDICTIONS_FUTURE = pd.DataFrame(y_pred_future, columns=['Open']).set_index(pd.Series(datelist_future))
PREDICTION_TRAIN = pd.DataFrame(y_pred_train, columns=['Open']).set_index(pd.Series(datelist_train[2 * n_past + n_future -1:]))

# Convert <datetime.date> to <Timestamp> for PREDCITION_TRAIN
PREDICTION_TRAIN.index = PREDICTION_TRAIN.index.to_series().apply(datetime_to_timestamp)

PREDICTION_TRAIN.head(3)

In [None]:
PREDICTION_TRAIN

In [None]:
PREDICTIONS_FUTURE

In [None]:
dataset_train=pd.DataFrame(dataset_train,columns=cols)
dataset_train.index=datelist_train
dataset_train.index=pd.to_datetime(dataset_train.index)

In [None]:
dataset_train

In [None]:
dataset_train[START_DATE_FOR_PLOTTING:]['Open']

<h3>Step #6. Visualize the Predictions</h3>

In [None]:
# Set plot size 
from pylab import rcParams
rcParams['figure.figsize'] = 14, 5

# Plot parameters
START_DATE_FOR_PLOTTING = '2016-12-22'

plt.plot(PREDICTIONS_FUTURE['Open'], color='r', label='Predicted Stock Price')
plt.plot(PREDICTION_TRAIN.loc[:]['Open'], color='orange', label='Training predictions')
plt.plot(dataset_train.loc[:].index, dataset_train.loc[:]['Open'], color='b', label='Actual Stock Price')

plt.axvline(x = min(PREDICTIONS_FUTURE.index), color='green', linewidth=2, linestyle='--')

plt.grid(which='major', color='#cccccc', alpha=0.5)

plt.legend(shadow=True)
plt.title('Predcitions and Acutal Stock Prices', family='Arial', fontsize=12)
plt.xlabel('Timeline', family='Arial', fontsize=10)
plt.ylabel('Stock Price Value', family='Arial', fontsize=10)
plt.xticks(rotation=45, fontsize=8)
plt.show()

In [None]:
from sklearn.metrics import mean_squared_error
MSE = np.sqrt(mean_squared_error(dataset_train[START_DATE_FOR_PLOTTING:]['Open'],PREDICTION_TRAIN))
print(MSE)