In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow import keras

import datetime
import math

%matplotlib inline
%reload_ext tensorboard

In [2]:
def create_split(df, pct_train, pct_val, batch_size, window_size):
    length = df.shape[0]
    temp_train_size = find_batch_gcd(math.floor(pct_train * length), batch_size)
    test_size = length - temp_train_size
    train_size = find_batch_gcd(math.floor((1 - pct_val) * temp_train_size), batch_size)
    val_size = temp_train_size - train_size
    df_train = df[:- val_size - test_size]
    df_val = df[- val_size - test_size - window_size:- test_size]
    df_test = df[- test_size - window_size:]
    return df_train, df_val, df_test
    
def find_batch_gcd(length, batch_size):
    while length % batch_size != 0:
        length -= 1
    return length

def create_dataset(df, window_size):
    X, y = [], []
    for i in range(len(df) - window_size - 9):
        v = df.iloc[i:(i + window_size)].values
        X.append(v)
        y.append(df["Close"].iloc[i + window_size:i + window_size + 10].values)
    return np.array(X), np.array(y)

def create_model(nodes, optimizer, dropout, X_train):
    model = keras.Sequential()
    model.add(keras.layers.LSTM(nodes[0], input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
    model.add(keras.layers.LSTM(nodes[1]))
    model.add(keras.layers.Dropout(dropout))
    model.add(keras.layers.Dense(10))
    model.compile(loss="mse", optimizer=optimizer)
    return model

def train_model(pair, batch_size, window_size, time, nodes_arr, optimizer, dropout):
    series = pd.read_csv("../data/processed/{}_processed.csv".format(pair))
    
    series = series[series.shape[0] % batch_size:]
    close = series[['Real Close']]
    series = series.drop(['Time', 'Real Close'], axis=1)
    """series = series[{'Close', 'EMA_10', 'EMA_50', 'RSI', 'A/D Index',
                     'USD Interest Rate', 'EUR Interest Rate', 'USD_CPI', 'EUR_CPI',
                     'EUR Twitter Sentiment', 'USD Twitter Sentiment', 'EUR_GDP', 'USD_GDP',
                     'USD News Sentiment', 'EUR News Sentiment', 'USD_CPI', 'EUR_CPI',
                     'EUR Unemployment Rate', 'USD Unemployment Rate', 'EUR_PPI', 'USD_PPI'
                    }]"""
    series = series[{'Close', 'EMA_10', 'EMA_100', 'RSI'}]

    df_train, df_val, df_test = create_split(series, 0.8, 0.2, batch_size, window_size)
    print(f'df_train.shape {df_train.shape}, df_validation.shape {df_val.shape}, df_test.shape {df_test.shape}')

    closeScaler = MinMaxScaler()
    featureScaler = MinMaxScaler()
    df_train = df_train.copy()
    df_val = df_val.copy()
    df_test = df_test.copy()
    df_train.loc[:, ['Close']] = closeScaler.fit_transform(df_train[['Close']])
    df_train.loc[:, ~df_train.columns.isin(['Close'])] = featureScaler.fit_transform(df_train.loc[:, ~df_train.columns.isin(['Close'])])
    df_val.loc[:, ['Close']] = closeScaler.transform(df_val[['Close']])
    df_val.loc[:, ~df_val.columns.isin(['Close'])] = featureScaler.transform(df_val.loc[:, ~df_val.columns.isin(['Close'])])
    df_test.loc[:, ['Close']] = closeScaler.transform(df_test[['Close']])
    df_test.loc[:, ~df_test.columns.isin(['Close'])] = featureScaler.transform(df_test.loc[:, ~df_test.columns.isin(['Close'])])

    X_train, y_train = create_dataset(df_train, window_size)
    X_val, y_val = create_dataset(df_val, window_size)
    X_test, y_test = create_dataset(df_test, window_size)

    model = create_model(nodes_arr, optimizer, dropout, X_train)

    log_dir = "logs/tuning/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, update_freq='epoch', profile_batch=0, histogram_freq=1)

    history = model.fit(X_train, y_train,
                    validation_data=(X_val, y_val),
                    epochs=10,
                    batch_size=batch_size,
                    shuffle=False,
                    callbacks=[tensorboard_callback]
                   )

    return test_model(X_test, y_test, close, model, closeScaler, window_size)

def test_model(X_test, y_test, close_df, model, closeScaler, window_size):
    y_pred = model.predict(X_test)
    mse = model.evaluate(X_test, y_test)
    print("Test Mean Squared Error:", mse)

    #print(pd.DataFrame(list(zip(y_pred[:5], y_test[:5])), columns=["Prediction", 'Actual']))
    print(y_pred[:1])
    print(y_test[:1])

    #index = [i for i in range(y_pred.shape[0])]
    #df_predicted = pd.DataFrame(closeScaler.inverse_transform(y_pred), columns=['Close'], index=index)
    #df_actual = pd.DataFrame(closeScaler.inverse_transform(y_test.reshape(-1, 1)), columns=['Close'], index=index)

    #df = pd.DataFrame(close_df['Real Close'][-y_pred.shape[0] - window_size:-window_size])
    #df.reset_index(inplace=True, drop=True)

    #df_predicted['Close'] = df['Real Close'].mul(np.exp(df_predicted['Close'].shift(-1))).shift(1)
    #df_actual['Close'] = df['Real Close'].mul(np.exp(df_actual['Close'].shift(-1))).shift(1)

In [3]:
batch_size = 32
window_size = 1
time = 15
nodes = [80, 64]
dropout = 0.2
optimizer = keras.optimizers.Adam(learning_rate=0.0005)

train_model("EURUSD", batch_size, window_size, time, nodes, optimizer, dropout)

df_train.shape (47776, 4), df_validation.shape (11969, 4), df_test.shape (14945, 4)
Epoch 1/10
Epoch 2/10
Epoch 3/10

KeyboardInterrupt: 

In [None]:
tensorboard --logdir logs/tuning

In [None]:
series = pd.read_csv("../data/processed/EURUSD_processed.csv")

In [None]:
series.shape

In [None]:
plt.figure(figsize=(10, 8))
plt.xlabel("Date")
plt.ylabel("Closing Price")
plt.plot(series.index[:], series["Close"][:])

In [None]:
plt.figure(figsize=(12, 8))
plt.xlabel("Date")
plt.ylabel("USD")
plt.plot(series.index, series["USD_GDP"])

In [None]:
batch_size = 32
series = series[series.shape[0] % batch_size:]
close = series[['Real Close']]
series = series.drop(['Time', 'Real Close'], axis=1)
series = series[{'Close', 'EMA_10', 'EMA_100', 'RSI', 'A/D Index',
                 'USD_PPI', 'EUR_PPI', 'USD Interest Rate', 'EUR Interest Rate',
                 'EUR Twitter Sentiment', 'USD Twitter Sentiment', 'USD_CPI', 'EUR_CPI'}]
#series = series.drop(['Volume', 'USD_PPI', 'EUR_PPI', "EUR News Sentiment", "USD News Sentiment",
#                      'High', 'Low', 'Open', 'EUR Unemployment Rate', 'USD Unemployment Rate',
#                      'EUR_CPI', 'USD_CPI'], axis=1)
series

In [None]:
window_size = batch_size * 15
df_train, df_val, df_test = create_split(series, 0.8, 0.2)
print(f'df_train.shape {df_train.shape}, df_validation.shape {df_val.shape}, df_test.shape {df_test.shape}')

In [None]:
df_val

In [None]:
closeScaler = MinMaxScaler()
featureScaler = MinMaxScaler()
df_train = df_train.copy()
df_val = df_val.copy()
df_test = df_test.copy()
df_train.loc[:, ['Close']] = closeScaler.fit_transform(df_train[['Close']])
df_train.loc[:, ~df_train.columns.isin(['Close'])] = featureScaler.fit_transform(df_train.loc[:, ~df_train.columns.isin(['Close'])])
df_val.loc[:, ['Close']] = closeScaler.transform(df_val[['Close']])
df_val.loc[:, ~df_val.columns.isin(['Close'])] = featureScaler.transform(df_val.loc[:, ~df_val.columns.isin(['Close'])])
df_test.loc[:, ['Close']] = closeScaler.transform(df_test[['Close']])
df_test.loc[:, ~df_test.columns.isin(['Close'])] = featureScaler.transform(df_test.loc[:, ~df_test.columns.isin(['Close'])])

In [None]:
df_train

In [None]:
fig = plt.figure(figsize=(24, 18))
ax1, ax2, ax3 = fig.subplots(3)
ax1.set(xlabel='Time', ylabel='Close')
ax2.set(xlabel='Time', ylabel='EMA_100')
ax3.set(xlabel='Time', ylabel='EMA_10')
ax1.plot(series['Close'][:1000])
ax2.plot(series['EMA_100'][:1000])
ax3.plot(series['EMA_10'][:1000])

In [None]:
X_train, y_train = create_dataset(df_train, window_size)
X_val, y_val = create_dataset(df_val, window_size)
X_test, y_test = create_dataset(df_test, window_size)

In [None]:
X_train.shape

In [None]:
model = keras.Sequential()
#model.add(keras.layers.Bidirectional(keras.layers.LSTM(64, activation="relu", input_shape=(X_train.shape[1], X_train.shape[2]))))
model.add(keras.layers.LSTM(64, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(keras.layers.LSTM(32, activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(1))
model.compile(loss="mse", optimizer='Adam', metrics=['mae'])

log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, update_freq='epoch', profile_batch=0)


In [None]:
history = model.fit(X_train, y_train,
                    validation_data=(X_val, y_val),
                    epochs=5,
                    batch_size=batch_size,
                    shuffle=False,
                    callbacks=[tensorboard_callback]
                   )

In [None]:
fig = plt.figure(figsize=(16, 10))
ax1 = fig.subplots(1)
ax1.set_title('Model Loss')
ax1.set(xlabel='Epoch', ylabel='Loss')
ax1.plot(history.history['loss'], label='Train Loss')
ax1.plot(history.history['val_loss'], label='Val Loss')
ax1.legend()

In [None]:
y_pred = model.predict(X_test)

In [None]:
fig = plt.figure(figsize=(16, 10))
ax1 = fig.subplots(1)
ax1.set_title('Predicted Closing Price')
ax1.set(xlabel='Time', ylabel='Close')
ax1.plot(y_test, label='Actual')
ax1.plot(y_pred, label='Prediction')
ax1.legend()

In [None]:
fig = plt.figure(figsize=(16, 10))
ax1 = fig.subplots(1)
ax1.set_title('Predicted Closing Price')
ax1.set(xlabel='Time', ylabel='Close')
ax1.plot(y_pred[7400:], label='Prediction')
ax1.plot(y_test[7400:], label='Actual')
ax1.legend()

In [None]:
fig = plt.figure(figsize=(16, 10))
ax1, ax2 = fig.subplots(2)
ax1.set_title('Closing Price')
ax2.set_title('Predicted Closing Price')
ax2.plot(y_pred[6400:6600])
ax1.plot(y_test[6400:6600])

In [None]:
y_pred = model.predict(X_test)

In [None]:
mse = model.evaluate(X_test, y_test)
print("Mean Squared Error:", mse)

In [None]:
y_pred.shape

In [None]:
index = [i for i in range(y_pred.shape[0])]
df_predicted = pd.DataFrame(closeScaler.inverse_transform(y_pred), columns=['Close'], index=index)
df_actual = pd.DataFrame(closeScaler.inverse_transform(y_test.reshape(-1, 1)), columns=['Close'], index=index)
print(df_predicted)
print(df_actual)

In [None]:
fig = plt.figure(figsize=(16, 10))
ax1 = fig.subplots(1)
ax1.set_title('Predicted Closing Price')
ax1.set(xlabel='Time', ylabel='Close')
ax1.plot(df_actual['Close'], label='Actual')
ax1.plot(df_predicted['Close'], label='Prediction')
ax1.legend()

In [None]:
df = pd.DataFrame(close['Real Close'][-y_pred.shape[0] - window_size:-window_size])
df.reset_index(inplace=True, drop=True)
df

In [None]:
X_test.shape[0] + X_train.shape[0] + X_val.shape[0]

In [None]:
df_predicted['Close'] = df['Real Close'].mul(np.exp(df_predicted['Close'].shift(-1))).shift(1)
df_actual['Close'] = df['Real Close'].mul(np.exp(df_actual['Close'].shift(-1))).shift(1)

In [None]:
print(df_actual['Close'])
print(df_predicted['Close'])

In [None]:
fig = plt.figure(figsize=(16, 10))
ax1 = fig.subplots(1)
ax1.set_title('Predicted Closing Price')
ax1.set(xlabel='Time', ylabel='Close')
ax1.plot(df_predicted['Close'][:10], label='Prediction')
ax1.plot(df_actual['Close'][:10], label='Actual')
ax1.legend()