In [1]:
# Import modules and packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
from datetime import datetime

from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard

%matplotlib inline

In [4]:
# Importing Training Set
dataset_train = pd.read_csv('dataset-2-date.csv')

print(dataset_train)

            date        u1        u2  ...        u6        u7         y
0     01/01/2000  0.268900  0.650894  ...  0.843079  0.822079  0.180295
1     02/01/2000  0.268483  0.650140  ...  0.838605  0.822079  0.177124
2     03/01/2000  0.267967  0.659657  ...  0.807879  0.786246  0.173618
3     04/01/2000  0.267451  0.668338  ...  0.799606  0.786246  0.171640
4     05/01/2000  0.266935  0.647191  ...  0.773122  0.746142  0.166972
...          ...       ...       ...  ...       ...       ...       ...
2389  17/07/2006  0.284859  0.645894  ...  0.608848  0.500866  0.188976
2390  18/07/2006  0.266139  0.665503  ...  0.582074  0.508999  0.178992
2391  19/07/2006  0.247419  0.665706  ...  0.602186  0.523348  0.169742
2392  20/07/2006  0.228699  0.672975  ...  0.636970  0.537697  0.159023
2393  21/07/2006  0.215821  0.668945  ...  0.684994  0.569601  0.149922

[2394 rows x 9 columns]


In [7]:
# Select features (columns) to be involved intro training and predictions
cols = list(dataset_train)[1:9]

# Extract dates (will be used in visualization)
datelist_train = list(dataset_train['date'])
datelist_train = [dt.datetime.strptime(date, '%d/%m/%Y').date() for date in datelist_train]

print('Training set shape == {}'.format(dataset_train.shape))
print('All timestamps == {}'.format(len(datelist_train)))
print('Featured selected: {}'.format(cols))

Training set shape == (2394, 9)
All timestamps == 2394
Featured selected: ['u1', 'u2', 'u3', 'u4', 'u5', 'u6', 'u7', 'y']


In [8]:
dataset_train = dataset_train[cols].astype(str)
for i in cols:
    for j in range(0, len(dataset_train)):
        dataset_train[i][j] = dataset_train[i][j].replace(',', '')

dataset_train = dataset_train.astype(float)

training_set = dataset_train.to_numpy()
print('Shape of training set == {}.'.format(training_set.shape))
training_set


Shape of training set == (2394, 8).


array([[0.26890035, 0.65089438, 0.83274183, ..., 0.84307935, 0.8220792 ,
        0.18029533],
       [0.2684825 , 0.65013964, 0.85215319, ..., 0.83860539, 0.8220792 ,
        0.17712402],
       [0.26796671, 0.65965652, 0.823618  , ..., 0.80787913, 0.78624615,
        0.17361771],
       ...,
       [0.24741915, 0.66570628, 0.66474048, ..., 0.60218613, 0.52334833,
        0.16974206],
       [0.22869943, 0.67297514, 0.67729976, ..., 0.6369701 , 0.53769717,
        0.15902309],
       [0.21582127, 0.66894491, 0.67787719, ..., 0.68499444, 0.56960066,
        0.14992179]])

In [9]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
training_set_scaled = sc.fit_transform(training_set)

sc_predict = StandardScaler()
sc_predict.fit_transform(training_set[:, 0:1])

array([[-0.14539862],
       [-0.14945065],
       [-0.15445243],
       ...,
       [-0.3537087 ],
       [-0.53523982],
       [-0.66012347]])

In [10]:
# Creating a data structure with 90 timestamps and 1 output
X_train = []
y_train = []

n_future = 60   # Number of days we want top predict into the future
n_past = 90     # Number of past days we want to use to predict the future

for i in range(n_past, len(training_set_scaled) - n_future +1):
    X_train.append(training_set_scaled[i - n_past:i, 0:dataset_train.shape[1] - 1])
    y_train.append(training_set_scaled[i + n_future - 1:i + n_future, 0])

X_train, y_train = np.array(X_train), np.array(y_train)

print('X_train shape == {}.'.format(X_train.shape))
print('y_train shape == {}.'.format(y_train.shape))

X_train shape == (2245, 90, 7).
y_train shape == (2245, 1).


In [11]:
# Import Libraries and packages from Keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.optimizers import Adam

In [12]:

# Initializing the Neural Network based on LSTM
model = Sequential()

# Adding 1st LSTM layer
model.add(LSTM(units=64, return_sequences=True, input_shape=(n_past, dataset_train.shape[1]-1)))

# Adding 2nd LSTM layer
model.add(LSTM(units=10, return_sequences=False))

# Adding Dropout
model.add(Dropout(0.25))

# Output layer
model.add(Dense(units=1, activation='linear'))

# Compiling the Neural Network
model.compile(optimizer = Adam(learning_rate=0.01), loss='mean_squared_error')

In [13]:
%%time
es = EarlyStopping(monitor='val_loss', min_delta=1e-10, patience=10, verbose=1)
rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1)
mcp = ModelCheckpoint(filepath='weights.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True)

tb = TensorBoard('logs')

history = model.fit(X_train, y_train, shuffle=True, epochs=30, callbacks=[es, rlr, mcp, tb], validation_split=0.2, verbose=1, batch_size=256)

Epoch 1/30

Epoch 00001: val_loss improved from inf to 1.06428, saving model to weights.h5
Epoch 2/30

Epoch 00002: val_loss improved from 1.06428 to 1.00949, saving model to weights.h5
Epoch 3/30

Epoch 00003: val_loss improved from 1.00949 to 0.91071, saving model to weights.h5
Epoch 4/30

Epoch 00004: val_loss did not improve from 0.91071
Epoch 5/30

Epoch 00005: val_loss improved from 0.91071 to 0.87919, saving model to weights.h5
Epoch 6/30

Epoch 00006: val_loss did not improve from 0.87919
Epoch 7/30

Epoch 00007: val_loss did not improve from 0.87919
Epoch 8/30

Epoch 00008: val_loss did not improve from 0.87919
Epoch 9/30

Epoch 00009: val_loss did not improve from 0.87919
Epoch 10/30

Epoch 00010: val_loss did not improve from 0.87919
Epoch 11/30

Epoch 00011: val_loss did not improve from 0.87919
Epoch 12/30

Epoch 00012: val_loss did not improve from 0.87919
Epoch 13/30

Epoch 00013: val_loss did not improve from 0.87919
Epoch 14/30

Epoch 00014: val_loss did not improve fr

In [15]:
# Generate list of sequence of days for predictions
datelist_future = pd.date_range(datelist_train[-1], periods=n_future, freq='1d').tolist()

'''
Remeber, we have datelist_train from begining.
'''

# Convert Pandas Timestamp to Datetime object (for transformation) --> FUTURE
datelist_future_ = []
for this_timestamp in datelist_future:
    datelist_future_.append(this_timestamp.date())

In [16]:
# Perform predictions
predictions_future = model.predict(X_train[-n_future:])

predictions_train = model.predict(X_train[n_past:])


In [17]:
# Inverse the predictions to original measurements

# ---> Special function: convert <datetime.date> to <Timestamp>
def datetime_to_timestamp(x):
    '''
        x : a given datetime value (datetime.date)
    '''
    return datetime.strptime(x.strftime('%d%m%Y'), '%d%m%Y')


y_pred_future = sc_predict.inverse_transform(predictions_future)
y_pred_train = sc_predict.inverse_transform(predictions_train)

PREDICTIONS_FUTURE = pd.DataFrame(y_pred_future, columns=['y']).set_index(pd.Series(datelist_future))
PREDICTION_TRAIN = pd.DataFrame(y_pred_train, columns=['y']).set_index(pd.Series(datelist_train[2 * n_past + n_future -1:]))

# Convert <datetime.date> to <Timestamp> for PREDCITION_TRAIN
PREDICTION_TRAIN.index = PREDICTION_TRAIN.index.to_series().apply(datetime_to_timestamp)

PREDICTION_TRAIN.head(15)

Unnamed: 0,y
2000-08-27,0.229506
2000-08-28,0.230181
2000-08-29,0.231587
2000-08-30,0.233678
2000-08-31,0.234821
2000-09-01,0.235272
2000-09-02,0.236426
2000-09-03,0.23958
2000-09-04,0.240791
2000-09-05,0.24232


In [19]:

# Set plot size 
from pylab import rcParams
rcParams['figure.figsize'] = 14, 5

# Plot parameters
START_DATE_FOR_PLOTTING = '2012-06-01'

plt.plot(PREDICTIONS_FUTURE.index, PREDICTIONS_FUTURE['y'], color='r', label='Predicted Stock Price')
plt.plot(PREDICTION_TRAIN.loc[START_DATE_FOR_PLOTTING:].index, PREDICTION_TRAIN.loc[START_DATE_FOR_PLOTTING:]['y'], color='orange', label='Training predictions')
plt.plot(dataset_train.loc[START_DATE_FOR_PLOTTING:].index, dataset_train.loc[START_DATE_FOR_PLOTTING:]['y'], color='b', label='Actual Stock Price')

plt.axvline(x = min(PREDICTIONS_FUTURE.index), color='green', linewidth=2, linestyle='--')

plt.grid(which='major', color='#cccccc', alpha=0.5)

plt.legend(shadow=True)
plt.title('Predcitions and Acutal Stock Prices', family='Arial', fontsize=12)
plt.xlabel('Timeline', family='Arial', fontsize=10)
plt.ylabel('Stock Price Value', family='Arial', fontsize=10)
plt.xticks(rotation=45, fontsize=8)
plt.show()

ValueError: ignored

findfont: Font family ['Arial'] not found. Falling back to DejaVu Sans.


Error in callback <function install_repl_displayhook.<locals>.post_execute at 0x7fd44368f4d0> (for post_execute):


ValueError: ignored

ValueError: ignored