# Gold Predictor

In [1]:
# Initial imports
import os
import numpy as np
import pandas as pd
import alpaca_trade_api as tradeapi
from pathlib import Path
from dotenv import load_dotenv
%matplotlib inline

In [2]:
# Set the random seed for reproducibility
# Note: This is used for model prototyping, but it is good practice to comment this out and run multiple experiments to evaluate your model.
from numpy.random import seed

seed(1)
from tensorflow import random

random.set_seed(2)

### Data Loading

In this activity, we will use closing prices from different stocks to make predictions of future closing prices based on the temporal data of each stock.

In [3]:
alpaca_api_key = 'PK55DVY40BM8OTB4HSVX'
alpaca_secret_key = 'VDBV4ac8Cu1MiLfxYgKSh7zJ1H7u4ifXXtKNylW6'

api = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version = "v2"
)

In [4]:
print(alpaca_api_key)

PK55DVY40BM8OTB4HSVX


In [5]:
# Set timeframe to '1D'
timeframe = "1D"

# Set start and end datetimes between now and 3 years ago.
start_date = pd.Timestamp("2018-09-11", tz="America/New_York").isoformat()
end_date = pd.Timestamp("2021-09-11", tz="America/New_York").isoformat()

# Set the ticker information
tickers = ["BAR", "AAAU", "SGOL" , "TIP"]

# Get 3 year's worth of historical price data for Microsoft and Coca-Cola
raw_commod = api.get_barset(
    tickers,
    timeframe,
    start=start_date,
    end=end_date,
    limit=1000,
).df

# Display sample data
raw_commod.head()

Unnamed: 0_level_0,AAAU,AAAU,AAAU,AAAU,AAAU,BAR,BAR,BAR,BAR,BAR,SGOL,SGOL,SGOL,SGOL,SGOL,TIP,TIP,TIP,TIP,TIP
Unnamed: 0_level_1,open,high,low,close,volume,open,high,low,close,volume,open,high,low,close,volume,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
2018-09-11 00:00:00-04:00,11.9,11.9579,11.8765,11.955,7772.0,118.79,119.5066,118.61,119.4,52674.0,114.92,115.6301,114.7046,115.5,31833,111.08,111.13,110.97,111.025,1329289.0
2018-09-12 00:00:00-04:00,12.03,12.073,12.02,12.073,1554.0,119.29,120.59,119.29,120.32,36876.0,115.39,116.67,115.33,116.4,59121,111.19,111.235,111.11,111.13,826503.0
2018-09-13 00:00:00-04:00,12.12,12.12,12.01,12.01,7313.0,120.94,120.94,119.7933,119.94,20380.0,116.99,117.04,115.9,116.04,17783,111.12,111.12,110.96,110.96,951833.0
2018-09-14 00:00:00-04:00,12.01,12.01,11.93,11.93,14957.0,119.95,120.03,119.1,119.15,27025.0,116.06,116.12,115.28,115.2828,34874,110.78,110.91,110.72,110.78,1029880.0
2018-09-17 00:00:00-04:00,12.01,12.04,12.0,12.007,13757.0,119.82,120.23,119.7699,119.7699,19442.0,115.88,116.2865,115.84,115.88,39543,110.71,110.84,110.69,110.79,1040555.0


In [6]:
commod_data = pd.DataFrame()

for c in raw_commod.columns:
    if c[1] in "close":
        commod_data[c[0]] = raw_commod[c[0]][c[1]]

In [7]:
commod_data.index= commod_data.index.date
commod_data= commod_data.dropna()
commod_data

Unnamed: 0,AAAU,BAR,SGOL,TIP
2018-09-11,11.955,119.4000,115.5000,111.025
2018-09-12,12.073,120.3200,116.4000,111.130
2018-09-13,12.010,119.9400,116.0400,110.960
2018-09-14,11.930,119.1500,115.2828,110.780
2018-09-17,12.007,119.7699,115.8800,110.790
...,...,...,...,...
2021-09-03,18.190,18.1500,17.5600,128.620
2021-09-07,17.830,17.8000,17.2100,128.330
2021-09-08,17.780,17.7650,17.1750,128.830
2021-09-09,17.860,17.8300,17.2410,129.510


In [8]:
def window_data(commod_data, window, feature_col_number, target_col_number):
    """
    This function accepts the column number for the features (X) and the target (y).
    It chunks the data up with a rolling window of Xt - window to predict Xt.
    It returns two numpy arrays of X and y.
    """
    X = []
    y = []
    for i in range(len(commod_data) - window):
        features = commod_data.iloc[i : (i + window), feature_col_number]
        target = commod_data.iloc[(i + window), target_col_number]
        X.append(features)
        y.append(target)
    return np.array(X), np.array(y).reshape(-1, 1)

In [9]:
# Creating the features (X) and target (y) data using the window_data() function.
window_size = 5

feature_column = 2
target_column = 2
X, y = window_data(commod_data, window_size, feature_column, target_column)
print (f"X sample values:\n{X[:5]} \n")
print (f"y sample values:\n{y[:5]}")

X sample values:
[[115.5    116.4    116.04   115.2828 115.88  ]
 [116.4    116.04   115.2828 115.88   115.6868]
 [116.04   115.2828 115.88   115.6868 116.1368]
 [115.2828 115.88   115.6868 116.1368 116.57  ]
 [115.88   115.6868 116.1368 116.57   115.76  ]] 

y sample values:
[[115.6868]
 [116.1368]
 [116.57  ]
 [115.76  ]
 [115.74  ]]


### Training

In [10]:
# Use 70% of the data for training and the remainder for testing
split = int(0.7 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]
X_train

array([[115.5   , 116.4   , 116.04  , 115.2828, 115.88  ],
       [116.4   , 116.04  , 115.2828, 115.88  , 115.6868],
       [116.04  , 115.2828, 115.88  , 115.6868, 116.1368],
       ...,
       [ 18.22  ,  18.55  ,  18.495 ,  18.205 ,  18.26  ],
       [ 18.55  ,  18.495 ,  18.205 ,  18.26  ,  18.33  ],
       [ 18.495 ,  18.205 ,  18.26  ,  18.33  ,  18.265 ]])

In [11]:
from sklearn.preprocessing import MinMaxScaler

# Create a MinMaxScaler object
X_scaler = MinMaxScaler()
y_scaler = MinMaxScaler()

# Fit the MinMaxScaler object with the training feature data X_train
X_scaler.fit(X_train)

# Scale the features training and testing sets
X_train = X_scaler.transform(X_train)
X_test = X_scaler.transform(X_test)

# Fit the MinMaxScaler object with the training target data y_train
y_scaler.fit(y_train)

# Scale the target training and testing sets
y_train = y_scaler.transform(y_train)
y_test = y_scaler.transform(y_test)

In [12]:
# Reshape the features for the model
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
print (f"X_train sample values:\n{X_train[:5]} \n")
print (f"X_test sample values:\n{X_test[:5]}")

X_train sample values:
[[[0.7472576 ]
  [0.75388353]
  [0.75123316]
  [0.74565854]
  [0.75005522]]

 [[0.75388353]
  [0.75123316]
  [0.74565854]
  [0.75005522]
  [0.74863285]]

 [[0.75123316]
  [0.74565854]
  [0.75005522]
  [0.74863285]
  [0.75194581]]

 [[0.74565854]
  [0.75005522]
  [0.74863285]
  [0.75194581]
  [0.7551351 ]]

 [[0.75005522]
  [0.74863285]
  [0.75194581]
  [0.7551351 ]
  [0.74917176]]] 

X_test sample values:
[[[0.03095781]
  [0.03136273]
  [0.03187808]
  [0.03139954]
  [0.03147316]]

 [[0.03136273]
  [0.03187808]
  [0.03139954]
  [0.03147316]
  [0.03209895]]

 [[0.03187808]
  [0.03139954]
  [0.03147316]
  [0.03209895]
  [0.03318854]]

 [[0.03139954]
  [0.03147316]
  [0.03209895]
  [0.03318854]
  [0.03180446]]

 [[0.03147316]
  [0.03209895]
  [0.03318854]
  [0.03180446]
  [0.03165722]]]


### Build and Train the LSTM RNN

In [13]:
# Import required Keras modules
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [14]:
# Define the LSTM RNN model.
model = Sequential()

number_units = 5
dropout_fraction = 0.2

# Layer 1
model.add(LSTM(
    units=number_units,
    return_sequences=True,
    input_shape=(X_train.shape[1], 1))
    )
model.add(Dropout(dropout_fraction))
# Layer 2
model.add(LSTM(units=number_units, return_sequences=True))
model.add(Dropout(dropout_fraction))
# Layer 3
model.add(LSTM(units=number_units))
model.add(Dropout(dropout_fraction))
# Output layer
model.add(Dense(1))

### Compiling the LSTM RNN Model

In [15]:
# Compile the model
model.compile(optimizer="adam", loss="mean_squared_error")

In [16]:
# Summarize the model
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 5, 5)              140       
_________________________________________________________________
dropout (Dropout)            (None, 5, 5)              0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 5, 5)              220       
_________________________________________________________________
dropout_1 (Dropout)          (None, 5, 5)              0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 5)                 220       
_________________________________________________________________
dropout_2 (Dropout)          (None, 5)                 0         
_________________________________________________________________
dense (Dense)                (None, 1)                 6

### Training the Model

In [17]:
# Train the model
model.fit(X_train, y_train, epochs=10, shuffle=False, batch_size=1, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2319e785708>

### Model Performance

In [18]:
# Evaluate the model
model.evaluate(X_test, y_test)



0.0007592019974254072

### Making Predictions

In [19]:
# Make some predictions
predicted = model.predict(X_test)

In [20]:
# Recover the original prices instead of the scaled version
predicted_prices = y_scaler.inverse_transform(predicted)
real_prices = y_scaler.inverse_transform(y_test.reshape(-1, 1))

In [21]:
commod = pd.DataFrame({
    "Real": real_prices.ravel(),
    "Predicted": predicted_prices.ravel()
    }, index = commod_data.index[-len(real_prices): ])
commod.head()

Unnamed: 0,Real,Predicted
2020-10-20,18.36,21.756998
2020-10-21,18.508,21.781689
2020-10-22,18.32,21.799635
2020-10-23,18.3,21.800856
2020-10-26,18.29,21.827835


### Prepare data for forcasting

In [22]:
commod[-10:]["Real"]

2021-08-27    17.470
2021-08-30    17.385
2021-08-31    17.420
2021-09-01    17.415
2021-09-02    17.380
2021-09-03    17.560
2021-09-07    17.210
2021-09-08    17.175
2021-09-09    17.241
2021-09-10    17.160
Name: Real, dtype: float64

In [23]:
x_future, _ = window_data(pd.DataFrame(commod[-10:]["Real"]), 5, 0, 0)
x_future = X_scaler.transform(x_future)
x_future = x_future.reshape((x_future.shape[0],x_future.shape[1], 1))

### Forcast commodity movement

In [24]:
last = model.predict(x_future)[-1:]
last = y_scaler.inverse_transform(last)

In [25]:
commod[-9:]["Real"].append(pd.Series(last[0]))

2021-08-30    17.385000
2021-08-31    17.420000
2021-09-01    17.415000
2021-09-02    17.380000
2021-09-03    17.560000
2021-09-07    17.210000
2021-09-08    17.175000
2021-09-09    17.241000
2021-09-10    17.160000
0             21.113001
dtype: float64

In [26]:
from datetime import timedelta

def forecast_lstm(model, 
                  data,  
                  x_scaler, 
                  y_scaler, 
                  num_of_obs = 10 ,
                  steps_ahead = 15, 
                  window_size = 5,
                  target_col="Real"
                 ):
    """
    This function requires window_data to be defined
    """
    for i in range(0, steps_ahead):
        x_future, _ = window_data(pd.DataFrame(data[-num_of_obs:][target_col]), window_size, 0, 0)
        x_future = X_scaler.transform(x_future)
        x_future = x_future.reshape((x_future.shape[0],x_future.shape[1], 1))
        last = model.predict(x_future)[-1:]
        last = y_scaler.inverse_transform(last)
        #print(last)
        new_data = data[-(num_of_obs-1):].copy()
        
        last_date = new_data.iloc[[-1]].index
        last_date = last_date + timedelta(days=1)
        last_date = pd.to_datetime(last_date[0])
        
        new_data = new_data.append(pd.DataFrame(index=[last_date]))
        new_data.iloc[-1][target_col] = last
        data = new_data
        #print(data)
        #break
    return data

In [27]:
x_future, _ = window_data(pd.DataFrame(commod[-10:]["Real"]), 5, 0, 0)

In [28]:
forecast_lstm(model, pd.DataFrame(commod["Real"]), X_scaler, y_scaler)

Unnamed: 0,Real
2021-09-16,22.749533
2021-09-17,23.956741
2021-09-18,24.057383
2021-09-19,24.301756
2021-09-20,24.736547
2021-09-21,25.291792
2021-09-22,25.875269
2021-09-23,26.365055
2021-09-24,26.586864
2021-09-25,26.918497


In [29]:
first15= forecast_lstm(model, pd.DataFrame(commod["Real"]), X_scaler, y_scaler)
