In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import tensorflow as tf

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from keras.models import Sequential
from keras.layers import LSTM,Dense,Dropout
from keras.optimizers import Adam
from keras.losses import MSE

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, r2_score, mean_squared_error

In [2]:
df = yf.download("GC=F")
df['Different'] = df['Adj Close'].shift(1) - df['Open']
df = df.fillna(0)
df

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Different
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2000-08-30,273.899994,273.899994,273.899994,273.899994,273.899994,0,0.000000
2000-08-31,274.799988,278.299988,274.799988,278.299988,278.299988,0,-0.899994
2000-09-01,277.000000,277.000000,277.000000,277.000000,277.000000,0,1.299988
2000-09-05,275.799988,275.799988,275.799988,275.799988,275.799988,2,1.200012
2000-09-06,274.200012,274.200012,274.200012,274.200012,274.200012,0,1.599976
...,...,...,...,...,...,...,...
2023-10-09,1842.500000,1861.500000,1842.500000,1849.500000,1849.500000,232,-12.300049
2023-10-10,1857.000000,1861.400024,1852.800049,1861.000000,1861.000000,236,-7.500000
2023-10-11,1869.500000,1874.599976,1869.099976,1872.800049,1872.800049,65,-8.500000
2023-10-12,1874.699951,1881.199951,1867.000000,1869.300049,1869.300049,65,-1.899902


In [3]:
df.corr()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,Different
Open,1.0,0.999892,0.999849,0.999776,0.999776,0.055759,-0.015435
High,0.999892,1.0,0.999796,0.999875,0.999875,0.054904,-0.015504
Low,0.999849,0.999796,1.0,0.999901,0.999901,0.056527,-0.015173
Close,0.999776,0.999875,0.999901,1.0,1.0,0.055655,-0.015774
Adj Close,0.999776,0.999875,0.999901,1.0,1.0,0.055655,-0.015774
Volume,0.055759,0.054904,0.056527,0.055655,0.055655,1.0,0.024825
Different,-0.015435,-0.015504,-0.015173,-0.015774,-0.015774,0.024825,1.0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5801 entries, 2000-08-30 to 2023-10-13
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       5801 non-null   float64
 1   High       5801 non-null   float64
 2   Low        5801 non-null   float64
 3   Close      5801 non-null   float64
 4   Adj Close  5801 non-null   float64
 5   Volume     5801 non-null   int64  
 6   Different  5801 non-null   float64
dtypes: float64(6), int64(1)
memory usage: 362.6 KB


In [5]:
fig = px.line(x=df.index,y=df['Open'],
              title='Gold Price History Data')
fig.update_layout(xaxis_title="Date",yaxis_title="Price Gold (USD/Oz.)")
fig.show()

In [6]:
fig = px.scatter(x=df.index, y=df['Different'],title="Different")
fig.update_layout(xaxis_title="Date",yaxis_title="Different open and close")
fig.show()

In [7]:
data = df.filter(['Open'])
data

Unnamed: 0_level_0,Open
Date,Unnamed: 1_level_1
2000-08-30,273.899994
2000-08-31,274.799988
2000-09-01,277.000000
2000-09-05,275.799988
2000-09-06,274.200012
...,...
2023-10-09,1842.500000
2023-10-10,1857.000000
2023-10-11,1869.500000
2023-10-12,1874.699951


In [8]:
dataset = data.values
dataset

array([[ 273.8999939 ],
       [ 274.79998779],
       [ 277.        ],
       ...,
       [1869.5       ],
       [1874.69995117],
       [1881.69995117]])

In [9]:
scaler = MinMaxScaler(feature_range=(0,1))
scalar_price = scaler.fit_transform(dataset)
scalar_price

array([[0.01050817],
       [0.01100855],
       [0.01223174],
       ...,
       [0.89764256],
       [0.90053367],
       [0.90442559]])

In [10]:
def DataSeq(data,windows_size):
    X = []
    y = []
    for i in range(windows_size, len(data)):
        X.append(data[i-windows_size:i, 0])
        y.append(data[i , 0])

    return np.array(X), np.array(y)


In [11]:
windows_size = 10

data_size  = int(dataset.shape[0] * 0.90)
Train_data = scalar_price[:data_size - windows_size]
Test_data  = scalar_price[ data_size - windows_size :]


In [12]:
fig = px.scatter(title="Split Data")
fig.add_scatter(x=data[:len(Train_data):].index,
                y =data[:len(Train_data):]['Open'],
                name='Train data')

fig.add_scatter(x=data[len(Train_data):].index,
                y =data[len(Train_data):]['Open'],
                name='Test data')

fig.update_layout(xaxis_title="Date",
                  yaxis_title="Price Gold (USD/Oz.)")
fig.show()

In [13]:
X_train, y_train = DataSeq(data=Train_data,windows_size=windows_size)

In [14]:
X_test, y_test= DataSeq(data=Test_data,windows_size=windows_size)

In [15]:
X_train = X_train.reshape(-1,windows_size,1)
X_test = X_test.reshape(-1,windows_size,1)

In [16]:
print('DataSet Shape     : ', df.shape)
print()
print('---------------------------')
print('Train Shape       : ', Train_data.shape)
print('Test  Shape       : ', Test_data.shape)
print()
print('---------------------------')
print('X_train Shape     : ', X_train.shape)
print('y_train Shape     : ', y_train.shape)
print('X_test  Shape     : ', X_test.shape)
print('y_test  Shape     : ', y_test.shape)

DataSet Shape     :  (5801, 7)

---------------------------
Train Shape       :  (5210, 1)
Test  Shape       :  (591, 1)

---------------------------
X_train Shape     :  (5200, 10, 1)
y_train Shape     :  (5200,)
X_test  Shape     :  (581, 10, 1)
y_test  Shape     :  (581,)


In [17]:
def Model(opt,loss,metrics):
    Model = Sequential()
    Model.add(LSTM(40,activation='relu', return_sequences=True, input_shape=(X_train.shape[1],1)))
    Model.add(LSTM(512, activation='relu', return_sequences=True))
    Model.add(LSTM(32, activation='relu', return_sequences=False))
    Model.add(Dense(16,activation='relu'))
    Model.add(Dense(1,activation='linear'))
    Model.compile(optimizer=opt, loss=loss,metrics=[metrics])
    Model.summary()

    return Model

In [18]:
model = Model(opt=Adam(learning_rate=0.00001),loss=MSE ,metrics='mae')
history = model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=10)



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 10, 40)            6720      
                                                                 
 lstm_1 (LSTM)               (None, 10, 512)           1132544   
                                                                 
 lstm_2 (LSTM)               (None, 32)                69760     
                                                                 
 dense (Dense)               (None, 16)                528       
                                                                 
 dense_1 (Dense)             (None, 1)                 17        
                                                                 
Total params: 1209569 (4.61 MB)
Trainable params: 1209569 (4.61 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2

In [19]:
fig = px.line(title='Loss and MAE')
fig.add_scatter(y=history.history['loss'], name='loss')
fig.add_scatter(y=history.history['val_loss'], name='val_loss')
fig.add_scatter(y=history.history['mae'], name='mae')
fig.add_scatter(y=history.history['val_mae'], name='val_mae')
fig.show()


In [20]:
predictions = model.predict(X_test)



In [21]:
print('MSE       : ', mean_squared_error(y_test,predictions))
print('MAE       : ', mean_absolute_error(y_test,predictions))
print('MAPE      : ', mean_absolute_percentage_error(y_test,predictions))
print('Accuracy  : ', 1 - mean_absolute_percentage_error(y_test,predictions))
print('R-squared : ', r2_score(y_test,predictions))

MSE       :  0.0004953418553644503
MAE       :  0.017764597243409445
MAPE      :  0.020173968860940222
Accuracy  :  0.9798260311390597
R-squared :  0.8177032379333844


In [22]:
predictions = scaler.inverse_transform(predictions)
train = data[:data_size]
test = data[data_size:]

In [24]:
fig = px.scatter(title="Prediction vs Actual Data")
fig.add_scatter(x=test.index, y =test['Open'], name='Actual data')
fig.add_scatter(x=test.index, y=predictions[:,0], name=f'Predictions data ({windows_size} day / 1 day)')
fig.update_layout(xaxis_title="Date",yaxis_title="Price Gold (USD/Oz.)")
fig.show()

In [25]:
fig = px.scatter(title="Prediction vs Data")
fig.add_scatter(x=train.index, y=train['Open'], name='Train data')
fig.add_scatter(x=test.index, y =test['Open'], name='Actual data')
fig.add_scatter(x=test.index, y=predictions[:,0], name='Predictions data')
fig.update_layout(xaxis_title="Date",yaxis_title="Price Gold (USD/Oz.)")
fig.show()