In [None]:
import math
import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
#import data (10 year time frame)
commodity_data = yf.download('GC=F', start='2012-10-10', end='2022-10-10')
commodity_data.head()

In [None]:
print(commodity_data.isnull().sum()) #check how many null values are in out dataset
commodity_data.dropna(inplace=True)

In [None]:
plt.figure(figsize=(15, 8))
plt.title('Gold Price History (10-years)')
plt.plot(commodity_data['Close'])
plt.xlabel('Date')
plt.ylabel('Prices ($)')

In [None]:
close_prices = commodity_data['Close']
values = close_prices.values
training_data_len = math.ceil(len(values)* 0.8) #take 80% of the total data as training data

scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(values.reshape(-1,1)) #Scale the prices between (-1, 1) to avoid intensive computation

train_data = scaled_data[0: training_data_len, :]

In [None]:
# x_train = [] #will be the inputs for training set
# y_train = [] #is the output for training set
# for i in range(60, len(train_data)):
#     x_train.append(train_data[i-60:i, 0])
#     y_train.append(train_data[i, 0])

# #X_train is a nested list, which contains lists of 60 time-stamp prices.
# #y_train is a list of stock prices which is the next day stock price, corresponding to each list in X_train
# x_train, y_train = np.array(x_train), np.array(y_train)
# x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

# test_data = scaled_data[training_data_len-60: , : ]
# x_test = []
# y_test = values[training_data_len:]

# for i in range(60, len(test_data)):
#     x_test.append(test_data[i-60:i, 0])

# x_test = np.array(x_test)
# x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

In [None]:
# Setting the window size (in this case, we'll use 7 days for a weekly prediction)
window_size = 14

# Initialize empty lists to store the training and test sets
x_train, y_train, x_test, y_test = [], [], [], []

# Iterate over the data
for i in range(len(scaled_data) - window_size):
    # Get the data for the current window
    x = scaled_data[i:i+window_size, 0]
    y = scaled_data[i+window_size, 0]

    # Split the data into training and test sets
    if i < training_data_len - window_size:
        x_train.append(x)
        y_train.append(y)
    else:
        x_test.append(x)
        y_test.append(y)

# Convert the lists to numpy arrays
x_train, y_train = np.array(x_train), np.array(y_train)
x_test, y_test = np.array(x_test), np.array(y_test)

# Reshape the data for the LSTM model
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

In [None]:
print("x_train size and shape: {}".format(x_train.shape))
print("y_train size and shape: {}".format(y_train.shape))
print("x_test size and shape: {}".format(x_test.shape))
print("y_test size and shape: {}".format(y_test.shape))

In [None]:
model = keras.Sequential()
model.add(layers.LSTM(100, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(layers.Dropout(0.2)) # Add dropout with a rate of 0.2
model.add(layers.LSTM(100, return_sequences=False))
model.add(layers.Dropout(0.2)) # Add dropout with a rate of 0.2
model.add(layers.Dense(25))
model.add(layers.Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error') #optimizer and loss
model.summary()

In [None]:
model.fit(x_train, y_train, batch_size=32, epochs=10)

In [None]:
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions) #inverse scaling
y_test = scaler.inverse_transform(y_test.reshape(-1, 1)) #inverse scaling

In [None]:
y_true = np.random.randint(0, 2, size=(2, 3))
y_pred = np.random.random(size=(2, 3))
#print(np.array_equal(loss.numpy(), np.mean(np.square(y_test - predictions), axis=-1)))


In [None]:
data = commodity_data.filter(['Close'])
train = data[:training_data_len]
validation = data[training_data_len:].copy()
validation.loc[:, 'Predictions'] = predictions
plt.figure(figsize=(16,8))
plt.title('Model')
plt.xlabel('Date')
plt.ylabel('Close Price USD ($)')
plt.plot(train)
plt.plot(validation[['Close', 'Predictions']])
plt.legend(['Train', 'Val', 'Predictions'], loc='lower right')
plt.show()

In [None]:
from metrics import *
y_test = np.array(y_test) # convert y_test to a numpy array
y_test = y_test.reshape(-1, 1) # reshape y_test to a one-dimensional array
print(f'r2  : {R2(predictions, y_test):.3F}')
print(f'rse : {RSE(predictions, y_test):.3F}')
print(f'mae : {MAE(predictions, y_test):.3F}')
print(f'mse : {MSE(predictions, y_test):.3F}')
print(f'rmse: {RMSE(predictions, y_test):.3F}')
print(f'mape: {MAPE(predictions, y_test):.3F}')
print(f'mspe: {MSPE(predictions, y_test):.7F}')