<a href="https://colab.research.google.com/github/Derun-Chai/Assignment1/blob/https%2Fgithub.com%2FAlocinYerv%2FTulip.git/Assignment3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Author:** Derun Chai

**Description:**

This notebook demonstrates the future price prediction for different stocks using recurrent neural networks in tensorflow. Recurrent neural networks with basic, LSTM or GRU cells are implemented.

**Outline:**

1. [Libraries and settings](#1-bullet)
2. [Analyze data](#2-bullet)
3. [Manipulate data](#3-bullet)
4. [Model and validate data](#4-bullet)
5. [Predictions](#5-bullet)

**Reference:**  

[LSTM_Stock_prediction-20170507 by BenF](https://www.kaggle.com/benjibb/lstm-stock-prediction-20170507/notebook)

# 1. Libraries and settings <a class="anchor" id="1-bullet"></a>

In [1]:
pip install --upgrade tensorflow


Collecting tensorflow
  Downloading tensorflow-2.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (475.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m475.2/475.2 MB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
Collecting tensorboard<2.16,>=2.15 (from tensorflow)
  Downloading tensorboard-2.15.1-py3-none-any.whl (5.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m104.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorflow-estimator<2.16,>=2.15.0 (from tensorflow)
  Downloading tensorflow_estimator-2.15.0-py2.py3-none-any.whl (441 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m442.0/442.0 kB[0m [31m43.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting keras<2.16,>=2.15.0 (from tensorflow)
  Downloading keras-2.15.0-py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m72.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages:

In [2]:
import numpy as np
import pandas as pd
import math
import sklearn
import sklearn.preprocessing
import datetime
import os
import matplotlib.pyplot as plt
import tensorflow as tf
print(tf.__version__)
tf.compat.v1.enable_eager_execution()


# split data in 80%/10%/10% train/validation/test sets
valid_set_size_percentage = 10
test_set_size_percentage = 10

#display parent directory and working directory
print(os.path.dirname(os.getcwd())+':', os.listdir(os.path.dirname(os.getcwd())));
print(os.getcwd()+':', os.listdir(os.getcwd()));


2.15.0
/: ['sys', 'usr', 'etc', 'libx32', 'opt', 'lib64', 'media', 'sbin', 'root', 'boot', 'mnt', 'var', 'bin', 'home', 'dev', 'run', 'lib32', 'proc', 'srv', 'tmp', 'lib', '.dockerenv', 'tools', 'datalab', 'content', 'python-apt', 'NGC-DL-CONTAINER-LICENSE', 'cuda-keyring_1.0-1_all.deb']
/content: ['.config', 'sample_data']


# 2. Analyze data <a class="anchor" id="2-bullet"></a>
- load stock prices from prices-split-adjusted.csv
- analyze data

In [3]:
# import all stock prices
df = pd.read_csv("prices-split-adjusted.csv", index_col = 0)
df.info()
df.head()

# number of different stocks
print('\nnumber of different stocks: ', len(list(set(df.symbol))))
print(list(set(df.symbol))[:10])

FileNotFoundError: ignored

In [None]:
df.tail()

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
plt.figure(figsize=(15, 5));
plt.subplot(1,2,1);
plt.plot(df[df.symbol == 'EQIX'].open.values, color='red', label='open')
plt.plot(df[df.symbol == 'EQIX'].close.values, color='green', label='close')
plt.plot(df[df.symbol == 'EQIX'].low.values, color='blue', label='low')
plt.plot(df[df.symbol == 'EQIX'].high.values, color='black', label='high')
plt.title('stock price')
plt.xlabel('time [days]')
plt.ylabel('price')
plt.legend(loc='best')
#plt.show()

plt.subplot(1,2,2);
plt.plot(df[df.symbol == 'EQIX'].volume.values, color='black', label='volume')
plt.title('stock volume')
plt.xlabel('time [days]')
plt.ylabel('volume')
plt.legend(loc='best');

# 3. Manipulate data <a class="anchor" id="3-bullet"></a>
- choose a specific stock
- drop feature: volume
- normalize stock data
- create train, validation and test data sets

In [None]:
# function for min-max normalization of stock
def normalize_data(df):
    min_max_scaler = sklearn.preprocessing.MinMaxScaler()
    df['open'] = min_max_scaler.fit_transform(df.open.values.reshape(-1,1))
    df['high'] = min_max_scaler.fit_transform(df.high.values.reshape(-1,1))
    df['low'] = min_max_scaler.fit_transform(df.low.values.reshape(-1,1))
    df['close'] = min_max_scaler.fit_transform(df['close'].values.reshape(-1,1))
    return df

# function to create train, validation, test data given stock data and sequence length
def load_data(stock, seq_len):
    data_raw = stock.values # convert to numpy array
    data = []

    # create all possible sequences of length seq_len
    for index in range(len(data_raw) - seq_len):
        data.append(data_raw[index: index + seq_len])

    data = np.array(data);
    valid_set_size = int(np.round(valid_set_size_percentage/100*data.shape[0]));
    test_set_size = int(np.round(test_set_size_percentage/100*data.shape[0]));
    train_set_size = data.shape[0] - (valid_set_size + test_set_size);

    x_train = data[:train_set_size,:-1,:]
    y_train = data[:train_set_size,-1,:]

    x_valid = data[train_set_size:train_set_size+valid_set_size,:-1,:]
    y_valid = data[train_set_size:train_set_size+valid_set_size,-1,:]

    x_test = data[train_set_size+valid_set_size:,:-1,:]
    y_test = data[train_set_size+valid_set_size:,-1,:]

    return [x_train, y_train, x_valid, y_valid, x_test, y_test]

# choose one stock
df_stock = df[df.symbol == 'EQIX'].copy()
df_stock.drop(['symbol'],1,inplace=True)
df_stock.drop(['volume'],1,inplace=True)

cols = list(df_stock.columns.values)
print('df_stock.columns.values = ', cols)

# normalize stock
df_stock_norm = df_stock.copy()
df_stock_norm = normalize_data(df_stock_norm)

# create train, test data
seq_len = 20 # choose sequence length
x_train, y_train, x_valid, y_valid, x_test, y_test = load_data(df_stock_norm, seq_len)
print('x_train.shape = ',x_train.shape)
print('y_train.shape = ', y_train.shape)
print('x_valid.shape = ',x_valid.shape)
print('y_valid.shape = ', y_valid.shape)
print('x_test.shape = ', x_test.shape)
print('y_test.shape = ',y_test.shape)


In [None]:
plt.figure(figsize=(15, 5));
plt.plot(df_stock_norm.open.values, color='red', label='open')
plt.plot(df_stock_norm.close.values, color='green', label='low')
plt.plot(df_stock_norm.low.values, color='blue', label='low')
plt.plot(df_stock_norm.high.values, color='black', label='high')
#plt.plot(df_stock_norm.volume.values, color='gray', label='volume')
plt.title('stock')
plt.xlabel('time [days]')
plt.ylabel('normalized price/volume')
plt.legend(loc='best')
plt.show()

# 4. Model and validate data <a class="anchor" id="4-bullet"></a>
- RNNs with basic, LSTM, GRU cells


In [None]:
## Basic Cell RNN in tensorflow

# Function to calculate moving average
def calculate_moving_average(data, window_size=5):
    ma = np.zeros((data.shape[0] - window_size + 1, data.shape[1]))
    for i in range(data.shape[1]):
        ma[:, i] = np.convolve(data[:, i], np.ones(window_size) / window_size, mode='valid')
    return ma

# Calculate moving averages for y_train, y_valid, y_test
window_size = 5
y_train_ma = calculate_moving_average(y_train, window_size)
y_valid_ma = calculate_moving_average(y_valid, window_size)
y_test_ma = calculate_moving_average(y_test, window_size)

# Adjust y_train, y_valid, and y_test to match the length of the moving averages
y_train_adjusted = y_train[window_size-1:]
y_valid_adjusted = y_valid[window_size-1:]
y_test_adjusted = y_test[window_size-1:]

# Calculate residuals
y_train_residual = y_train_adjusted - y_train_ma
y_valid_residual = y_valid_adjusted - y_valid_ma
y_test_residual = y_test_adjusted - y_test_ma


# Adjust x_train, x_valid, and x_test to align with the new length of your target variables
x_train_adjusted = x_train[-y_train_residual.shape[0]:, :, :]
x_valid_adjusted = x_valid[-y_valid_residual.shape[0]:, :, :]
x_test_adjusted = x_test[-y_test_residual.shape[0]:, :, :]
print("Adjusted Shapes:")
print(f"x_train_adjusted: {x_train_adjusted.shape}, y_train_residual: {y_train_residual.shape}")
print(f"x_valid_adjusted: {x_valid_adjusted.shape}, y_valid_residual: {y_valid_residual.shape}")
print(f"x_test_adjusted: {x_test_adjusted.shape}, y_test_residual: {y_test_residual.shape}")

index_in_epoch = 0;
perm_array  = np.arange(x_train.shape[0])
np.random.shuffle(perm_array)

# Function to get the next batch
def get_next_batch(batch_size, x_data, y_data, perm_array):
    global index_in_epoch
    start = index_in_epoch
    index_in_epoch += batch_size
    if index_in_epoch >= x_data.shape[0]:  # Use >= to ensure index_in_epoch does not exceed the array size
        np.random.shuffle(perm_array)  # Shuffle the permutation array
        start = 0  # Start next epoch
        index_in_epoch = batch_size
    end = min(index_in_epoch, x_data.shape[0])  # Ensure end does not exceed the array size
    return x_data[perm_array[start:end]], y_data[perm_array[start:end]]

# Model parameters
n_steps = seq_len - 1
n_inputs = 4
n_neurons = 300
n_outputs = 4
n_layers = 2
learning_rate = 0.0025
batch_size = 100
n_epochs = 70

# Build the model
model = tf.keras.Sequential()
for i in range(n_layers):
    if i == n_layers - 1:
        # Last layer does not return sequences
        model.add(tf.keras.layers.SimpleRNN(n_neurons, return_sequences=False))
    else:
        model.add(tf.keras.layers.SimpleRNN(n_neurons, return_sequences=True))
model.add(tf.keras.layers.Dense(n_outputs))


# Compile the model
model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=learning_rate), loss='mean_squared_error')
# Train the model
history = model.fit(x_train, y_train, epochs=n_epochs, batch_size=batch_size, validation_data=(x_valid, y_valid))

# Training data preparation
index_in_epoch = 0
perm_array = np.arange(x_train_adjusted.shape[0])

for epoch in range(n_epochs):
    np.random.shuffle(perm_array)  # Shuffle at the start of each epoch
    index_in_epoch = 0  # Reset index_in_epoch at the start of each epoch
    for iteration in range(x_train_adjusted.shape[0] // batch_size):
        x_batch, y_batch = get_next_batch(batch_size, x_train_adjusted, y_train_residual, perm_array)
        model.train_on_batch(x_batch, y_batch)

        if iteration % (5 * x_train_adjusted.shape[0] // batch_size) == 0:
            mse_train = model.evaluate(x_train_adjusted, y_train_residual, verbose=0)
            mse_valid = model.evaluate(x_valid_adjusted, y_valid_residual, verbose=0)
            print(f'{epoch:.2f} epochs: MSE train/valid = {mse_train:.6f}/{mse_valid:.6f}')

# Predictions
y_train_pred_residual = model.predict(x_train_adjusted)
y_valid_pred_residual = model.predict(x_valid_adjusted)
y_test_pred_residual = model.predict(x_test_adjusted)

# Final predictions (basic model prediction + predicted residual)
# Adjust the length of the predicted residuals if necessary
y_train_final_pred = y_train_ma + y_train_pred_residual[:y_train_ma.shape[0], :]
y_valid_final_pred = y_valid_ma + y_valid_pred_residual[:y_valid_ma.shape[0], :]
y_test_final_pred = y_test_ma + y_test_pred_residual[:y_test_ma.shape[0], :]



# use Basic LSTM Cell
#layers = [tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons, activation=tf.nn.elu)
#          for layer in range(n_layers)]

# use LSTM Cell with peephole connections
#layers = [tf.contrib.rnn.LSTMCell(num_units=n_neurons,
#                                  activation=tf.nn.leaky_relu, use_peepholes = True)
#          for layer in range(n_layers)]

# use GRU cell
#layers = [tf.contrib.rnn.GRUCell(num_units=n_neurons, activation=tf.nn.leaky_relu)
#          for layer in range(n_layers)]


# run graph
# Extract the history of loss and validation loss
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)

# Plotting the training and validation loss
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

#Build the ARIMA model

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller, acf, pacf
from statsmodels.tsa.arima.model import ARIMA
import warnings
warnings.filterwarnings("ignore")  # Ignore convergence warnings for clarity

warnings.filterwarnings("ignore")
def check_stationarity(ts_data, title):
    # Rolling statistics
    roll_mean = ts_data.rolling(30).mean()
    roll_std = ts_data.rolling(5).std()

    # Plot rolling statistics
    plt.figure(figsize=(12, 6))
    plt.plot(ts_data, color='black', label='Original Data')
    plt.plot(roll_mean, color='red', label='Rolling Mean (30 days)')
    plt.plot(roll_std, color='blue', label='Rolling Std Dev (5 days)')
    plt.legend()
    plt.title(title)
    plt.show()

    # Dickey-Fuller test
    print(f'Dickey-Fuller Test: {title}\n')
    df_test = adfuller(ts_data, autolag='AIC')
    test_result = pd.Series(df_test[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
    print(test_result)
    for key, value in df_test[4].items():
        print(f'Critical value at {key}: {value:.5f}')
stocks = ['RIG', 'KMB', 'SWK', 'CTXS', 'GPN']

for stock in stocks:
    print(f"\nAnalyzing Stock: {stock}\n{'='*30}")
    stock_data = df[df['symbol'] == stock]['close'].dropna()

    # Checking stationarity
    check_stationarity(stock_data, f'Stationarity Check for {stock}')

    stock_data_log = np.log(stock_data)
    stock_data_log_diff = stock_data_log - stock_data_log.shift()
    stock_data_log_diff.dropna(inplace=True)
    check_stationarity(stock_data_log_diff, f'Log Differenced {stock}')

    # Fit ARIMA Model (example with ARIMA(1,1,0))
    model = ARIMA(stock_data, order=(1, 1, 0))
    model_fit = model.fit()
    print(model_fit.summary())

    # Forecasting (example)
    forecast = model_fit.forecast(steps=5)
    print(f"\nForecast for {stock}: {forecast}\n")


# Build the model using LSTM layers

In [None]:

# Build the model using LSTM layers
model_LSTM = tf.keras.Sequential()
for i in range(n_layers - 1):
    model_LSTM.add(tf.keras.layers.LSTM(n_neurons, return_sequences=True, activation='tanh', recurrent_activation='sigmoid'))
model_LSTM.add(tf.keras.layers.LSTM(n_neurons, return_sequences=False, activation='tanh', recurrent_activation='sigmoid'))
model_LSTM.add(tf.keras.layers.Dense(n_outputs))

# Compile the model
model_LSTM.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss='mean_squared_error')

# Train the model with adjusted data
history_LSTM = model_LSTM.fit(x_train_adjusted, y_train_residual, epochs=n_epochs, batch_size=batch_size, validation_data=(x_valid_adjusted, y_valid_residual))

# Make predictions using adjusted test set
y_train_pred_residual_LSTM = model_LSTM.predict(x_train_adjusted)
y_valid_pred_residual_LSTM = model_LSTM.predict(x_valid_adjusted)
y_test_pred_residual_LSTM = model_LSTM.predict(x_test_adjusted)

# Final predictions (basic model prediction + predicted residual)
y_train_final_pred_LSTM = y_train_ma + y_train_pred_residual_LSTM[:y_train_ma.shape[0], :]
y_valid_final_pred_LSTM = y_valid_ma + y_valid_pred_residual_LSTM[:y_valid_ma.shape[0], :]
y_test_final_pred_LSTM = y_test_ma + y_test_pred_residual_LSTM[:y_test_ma.shape[0], :]

# Extract the history of loss and validation loss
loss = history_LSTM.history['loss']
val_loss = history_LSTM.history['val_loss']
epochs = range(1, len(loss) + 1)

# Plotting the training and validation loss
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()


In [None]:
model_LSTM1 = tf.keras.Sequential([
    # Add LSTM layers, note that input shape is required for the first layer.
    tf.keras.layers.LSTM(n_neurons, return_sequences=True, input_shape=(19, n_inputs)),
    tf.keras.layers.LSTM(n_neurons, return_sequences=True),
    # You can only have return_sequences=False in the last LSTM layer
    tf.keras.layers.LSTM(n_neurons, return_sequences=False),
    # Output layer
    tf.keras.layers.Dense(n_outputs)
])

# Compile the model
model_LSTM1.compile(optimizer='adam', loss='mean_squared_error')

# Train the model on the residuals with adjusted datasets
history_LSTM1 = model_LSTM1.fit(x_train_adjusted, y_train_residual, epochs=n_epochs, batch_size=batch_size, validation_data=(x_valid_adjusted, y_valid_residual))

# Make predictions (these are now residual predictions)
y_train_pred_residual_LSTM1 = model_LSTM1.predict(x_train)
y_valid_pred_residual_LSTM1 = model_LSTM1.predict(x_valid)
y_test_pred_residual_LSTM1 = model_LSTM1.predict(x_test)
# Adjust the length of predicted residuals to match the moving averages
y_train_pred_residual_adjusted = y_train_pred_residual_LSTM1[-y_train_ma.shape[0]:]
y_valid_pred_residual_adjusted = y_valid_pred_residual_LSTM1[-y_valid_ma.shape[0]:]
y_test_pred_residual_adjusted = y_test_pred_residual_LSTM1[-y_test_ma.shape[0]:]

# Calculate final predictions (basic model prediction + predicted residual)
y_train_final_pred_LSTM1 = y_train_ma + y_train_pred_residual_adjusted
y_valid_final_pred_LSTM1 = y_valid_ma + y_valid_pred_residual_adjusted
y_test_final_pred_LSTM1 = y_test_ma + y_test_pred_residual_adjusted


# Extract the history of loss and validation loss
loss = history_LSTM1.history['loss']
val_loss = history_LSTM1.history['val_loss']
epochs = range(1, len(loss) + 1)

# Plotting the training and validation loss
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Use GRU cell

In [None]:
model_GRU = tf.keras.Sequential()
for _ in range(n_layers - 1):
    model_GRU.add(tf.keras.layers.GRU(n_neurons, return_sequences=True, activation='tanh'))
model_GRU.add(tf.keras.layers.GRU(n_neurons, activation='tanh'))  # Last layer does not return sequences
model_GRU.add(tf.keras.layers.Dense(n_outputs))

model_GRU.compile(optimizer='adam', loss='mean_squared_error')
# Trim x_train and x_valid to match the length of y_train_residual and y_valid_residual
x_train_adjusted = x_train[-y_train_residual.shape[0]:]
x_valid_adjusted = x_valid[-y_valid_residual.shape[0]:]

# Verify that the shapes now match
assert x_train_adjusted.shape[0] == y_train_residual.shape[0]
assert x_valid_adjusted.shape[0] == y_valid_residual.shape[0]

# Train the model on residuals

history_GRU = model_GRU.fit(x_train_adjusted, y_train_residual, epochs=n_epochs, batch_size=batch_size, validation_data=(x_valid_adjusted, y_valid_residual))

# Make predictions
# Predict residuals

y_train_pred_residual_GPU = model_GRU.predict(x_train_adjusted)
y_valid_pred_residual_GPU = model_GRU.predict(x_valid_adjusted)
y_test_pred_residual_GPU = model_GRU.predict(x_test)  # Assuming x_test is correctly adjusted
# Trim the predicted residuals to match the length of the moving averages
y_test_pred_residual_adjusted_GPU = y_test_pred_residual_GPU[-y_test_ma.shape[0]:]

# Calculate final predictions (basic model prediction + predicted residual)
y_train_final_pred_GPU = y_train_ma + y_train_pred_residual_GPU
y_valid_final_pred_GPU = y_valid_ma + y_valid_pred_residual_GPU
y_test_final_pred_GPU = y_test_ma + y_test_pred_residual_adjusted_GPU



# run graph
# Extract the history of loss and validation loss
loss = history_GRU.history['loss']
val_loss = history_GRU.history['val_loss']
epochs = range(1, len(loss) + 1)

# Plotting the training and validation loss
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# 5. Predictions <a class="anchor" id="5-bullet"></a>

In [None]:
y_train.shape

In [None]:
ft = 0 # 0 = open, 1 = close, 2 = highest, 3 = lowest

## show predictions
plt.figure(figsize=(15, 5));
plt.subplot(1,2,1);

plt.plot(np.arange(y_train.shape[0]), y_train[:,ft], color='blue', label='train target')

plt.plot(np.arange(y_train.shape[0], y_train.shape[0]+y_valid.shape[0]), y_valid[:,ft],
         color='gray', label='valid target')

plt.plot(np.arange(y_train.shape[0]+y_valid.shape[0],
                   y_train.shape[0]+y_test.shape[0]+y_test.shape[0]),
         y_test[:,ft], color='black', label='test target')

plt.plot(np.arange(y_train_final_pred.shape[0]),y_train_final_pred[:,ft], color='red',
         label='train prediction')

plt.plot(np.arange(y_train_final_pred.shape[0], y_train_final_pred.shape[0]+y_valid_final_pred.shape[0]),
         y_valid_final_pred[:,ft], color='orange', label='valid prediction')

plt.plot(np.arange(y_train_final_pred.shape[0]+y_valid_final_pred.shape[0],
                   y_train_final_pred.shape[0]+y_valid_final_pred.shape[0]+y_test_final_pred.shape[0]),
         y_test_final_pred[:,ft], color='green', label='test prediction')

plt.title('past and future stock prices')
plt.xlabel('time [days]')
plt.ylabel('normalized price')
plt.legend(loc='best');

plt.subplot(1,2,2);

plt.plot(np.arange(y_train.shape[0], y_train.shape[0]+y_test.shape[0]),
         y_test[:,ft], color='black', label='test target')

plt.plot(np.arange(y_train_final_pred.shape[0], y_train_final_pred.shape[0]+y_test_final_pred.shape[0]),
         y_test_final_pred[:,ft], color='green', label='test prediction')

plt.title('future stock prices')
plt.xlabel('time [days]')
plt.ylabel('normalized price')
plt.legend(loc='best');
y_train_adjusted = y_train[-y_train_final_pred.shape[0]:]

corr_price_development_train = np.sum(np.equal(np.sign(y_train_adjusted[:,1]-y_train_adjusted[:,0]),
                                                np.sign(y_train_final_pred[:,1]-y_train_final_pred[:,0])).astype(int)) / y_train_adjusted.shape[0]
y_valid_adjusted = y_valid[-y_valid_final_pred.shape[0]:]
y_test_adjusted = y_test[-y_test_final_pred.shape[0]:]

corr_price_development_valid = np.sum(np.equal(np.sign(y_valid_adjusted[:,1]-y_valid_adjusted[:,0]),
                                               np.sign(y_valid_final_pred[:,1]-y_valid_final_pred[:,0])).astype(int)) / y_valid_adjusted.shape[0]

corr_price_development_test = np.sum(np.equal(np.sign(y_test_adjusted[:,1]-y_test_adjusted[:,0]),
                                              np.sign(y_test_final_pred[:,1]-y_test_final_pred[:,0])).astype(int)) / y_test_adjusted.shape[0]

print('correct sign prediction for close - open price for train/valid/test: %.2f/%.2f/%.2f'%(
    corr_price_development_train, corr_price_development_valid, corr_price_development_test))
