In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from keras.layers import Dense, SimpleRNN
from keras.models import Sequential
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler

In [None]:
path = './istanbul_stock_exchange_data.xlsx'
# read the ise data
df = pd.read_excel(path, sheet_name='original data', usecols='D:J',header=0, skiprows=1).iloc[0:530]
df.dropna(inplace=True)
X = df.to_numpy()

# change the column names to stocks
df.rename(columns=lambda x:'stocks', inplace=True)
print(df.head(5))

# read the results column
y_df = pd.read_excel(path, sheet_name='original data', usecols='C', header=0, skiprows=1).iloc[0:530]
y_df.dropna(inplace=True)
y = y_df.to_numpy()
print(y_df)

# scale the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
X = scaler.fit_transform(X)

In [None]:
# plot the original values
plt.plot(y[:,0], c='c')
plt.title('Original Dataset')
plt.show()

In [None]:
# transform the dataset columns and shift the data according to: 
# in: previous months to use for prediction,
# out: months ahead to predict
def timeseries_to_supervised(df, n_in, n_out):
   agg = pd.DataFrame()

   for i in range(n_in, 0, -1):
      df_shifted = df.shift(i).copy()
      df_shifted.rename(columns=lambda x: (f'{x}(t-{i})'), inplace=True)
      agg = pd.concat([agg, df_shifted], axis=1)

   for i in range(0, n_out):
      df_shifted = df.shift(-i).copy()
      if i == 0:
         df_shifted.rename(columns=lambda x: (f'{x}(t)'), inplace=True)
      else:
         df_shifted.rename(columns=lambda x: (f'{x}(t+{i})'), inplace=True)
      agg = pd.concat([agg, df_shifted], axis=1)
   agg.dropna(inplace=True)
   return agg

n_in = 1
n_out = 1
shifted_df = timeseries_to_supervised(df, n_in, n_out)
print(shifted_df.columns)
print(shifted_df.head(5))

X = shifted_df[[(f'stocks(t-{i})') for i in range(1, 0, -1)]].values
y = shifted_df['stocks(t)'].values
print(X.shape, y.shape)

In [None]:
# split the data into training and test, 80/20 
data_len = X.shape[0]
train_size = int(data_len * .8)
test_size = data_len - train_size

x_train = X[:train_size,:]
y_train = y[:train_size,:]
x_test = X[train_size:,:]
y_test = y[train_size:,:]
print(f'\nTrain shape: { x_train.shape, y_train.shape }')
print(f'\nTest shape: { x_test.shape, y_test.shape }')

In [None]:
# reshape data to desired format
samples = train_size
steps = 1
features_in = 7
features_out = 7

x_train = np.reshape(x_train, (samples, steps, features_in))
y_train = np.reshape(y_train, (samples, steps, features_out))
print(f'\nTrain shape: { x_train.shape, y_train.shape }')

x_test = np.reshape(x_test, (data_len - train_size, steps, features_in))
y_test = np.reshape(y_test, (data_len - train_size, steps, features_out))
print(f'\nTest shape: { x_test.shape, y_test.shape }')

In [None]:
batch_size = 1
# create the model
model = Sequential()

# initialize the model
model.add(SimpleRNN(units=50, input_shape=(x_train.shape[1], x_train.shape[2]), activation='relu', return_sequences=True))
model.add(Dense(25, activation='relu'))
model.add(Dense(7))

print(model.summary())

# compile the model
model.compile(loss='mean_squared_error', optimizer='adam')

# fit the model
model.fit(x_train, y_train, epochs=25, batch_size=batch_size, verbose=1)

# predicting results
train_pred = model.predict(x_train, batch_size=batch_size)
test_pred = model.predict(x_test, batch_size=batch_size)

In [None]:
### re-scale the prediction 
train_pred = np.reshape(train_pred, (samples*steps, features_out))
y_train_2d = np.reshape(y_train, (samples*steps, features_out))
test_pred = np.reshape(test_pred, (data_len - train_size, features_out))
y_test_2d = np.reshape(y_test, (data_len - train_size, features_out))

train_pred = scaler.inverse_transform(train_pred)
y_train = scaler.inverse_transform(y_train_2d)
test_pred = scaler.inverse_transform(test_pred)
y_test = scaler.inverse_transform(y_test_2d)

In [None]:
### Metrics
print("Test MSE: ", mean_squared_error(y_test, test_pred))
print("Test R2: ", r2_score(y_test, test_pred))

In [None]:
# Finally, we plot the results. A vertical line in the plot 
# identifies a splitting point between the training and the test data.
predicted = np.concatenate((train_pred, test_pred),axis=0)

original = np.concatenate((y_train, y_test),axis=0)
predicted = np.concatenate((train_pred, test_pred),axis=0)
index = range(0, original.shape[0])
plt.plot(index,original, 'g')
plt.plot(index,predicted, 'r')
plt.axvline(df.index[train_size], c="b")
plt.show()