In [1]:
import yfinance as yf

import pandas as pd
import numpy as np
import tensorflow as tf

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from keras.models import Sequential
from keras.layers import Dense,LSTM
from keras.optimizers import Adam
from keras.losses import MSE

from sklearn.preprocessing import MinMaxScaler  
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error

In [2]:
df = yf.download("GC=F")
df['Different'] = df['Adj Close'].shift(1) - df['Open']
df = df.fillna(0)
df

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Different
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2000-08-30,273.899994,273.899994,273.899994,273.899994,273.899994,0,0.000000
2000-08-31,274.799988,278.299988,274.799988,278.299988,278.299988,0,-0.899994
2000-09-01,277.000000,277.000000,277.000000,277.000000,277.000000,0,1.299988
2000-09-05,275.799988,275.799988,275.799988,275.799988,275.799988,2,1.200012
2000-09-06,274.200012,274.200012,274.200012,274.200012,274.200012,0,1.599976
...,...,...,...,...,...,...,...
2023-10-05,1826.300049,1826.300049,1815.599976,1816.599976,1816.599976,393,-7.800049
2023-10-06,1819.000000,1831.800049,1809.400024,1830.199951,1830.199951,390,-2.400024
2023-10-09,1842.500000,1861.500000,1842.500000,1849.500000,1849.500000,232,-12.300049
2023-10-10,1857.000000,1861.400024,1852.800049,1861.000000,1861.000000,232,-7.500000


In [3]:
df.corr()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,Different
Open,1.0,0.999893,0.999849,0.999776,0.999776,0.053995,-0.015087
High,0.999893,1.0,0.999796,0.999875,0.999875,0.05298,-0.015128
Low,0.999849,0.999796,1.0,0.999901,0.999901,0.054739,-0.01482
Close,0.999776,0.999875,0.999901,1.0,1.0,0.053707,-0.015394
Adj Close,0.999776,0.999875,0.999901,1.0,1.0,0.053707,-0.015394
Volume,0.053995,0.05298,0.054739,0.053707,0.053707,1.0,0.027789
Different,-0.015087,-0.015128,-0.01482,-0.015394,-0.015394,0.027789,1.0


In [4]:
df.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,Different
count,5799.0,5799.0,5799.0,5799.0,5799.0,5799.0,5799.0
mean,1099.807691,1105.267167,1094.014554,1099.707035,1099.707035,4286.390757,-0.376928
std,526.082911,529.00566,522.949048,526.011491,526.011491,24398.029181,6.728674
min,255.0,256.100006,255.0,255.100006,255.100006,0.0,-61.300049
25%,605.549988,608.200012,606.450012,605.899994,605.899994,20.0,-2.899902
50%,1216.0,1222.699951,1210.0,1217.400024,1217.400024,104.0,-0.100006
75%,1507.599976,1514.150024,1498.450012,1507.200012,1507.200012,393.0,2.0
max,2053.600098,2072.0,2040.0,2051.5,2051.5,386334.0,47.699951


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5799 entries, 2000-08-30 to 2023-10-11
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       5799 non-null   float64
 1   High       5799 non-null   float64
 2   Low        5799 non-null   float64
 3   Close      5799 non-null   float64
 4   Adj Close  5799 non-null   float64
 5   Volume     5799 non-null   int64  
 6   Different  5799 non-null   float64
dtypes: float64(6), int64(1)
memory usage: 362.4 KB


In [6]:
fig = px.line(x=df.index,y=df['Open'],
              title='Gold Price History Data')
fig.update_layout(xaxis_title="Date",yaxis_title="Price Gold (USD/Oz.)")
fig.show()

In [7]:
fig = px.scatter_matrix(df,
                        dimensions=['Open','High','Low','Close','Volume'],
                        color='Volume')
fig.show()

In [8]:
fig = make_subplots(rows=1, cols=5)
fig.add_trace(
    go.Scatter(x=df.index, y=df['Open']),
    row=1, col=1
)
fig.add_trace(
    go.Scatter(x=df.index, y=df['High']),
    row=1, col=2
)
fig.add_trace(
    go.Scatter(x=df.index, y=df['Low']),
    row=1, col=3
)
fig.add_trace(
    go.Scatter(x=df.index, y=df['Close']),
    row=1, col=4
)
fig.add_trace(
    go.Scatter(x=df.index, y=df['Volume']),
    row=1, col=5
)

In [9]:
fig = px.scatter(x=df.index, y=df['Different'],title="Different")
fig.update_layout(xaxis_title="Date",yaxis_title="Different open and close")
fig.show()

In [10]:
data = df.filter(['Open'])
data = data.shift(-1)
data = data.fillna(method="ffill")
data

Unnamed: 0_level_0,Open
Date,Unnamed: 1_level_1
2000-08-30,274.799988
2000-08-31,277.000000
2000-09-01,275.799988
2000-09-05,274.200012
2000-09-06,274.000000
...,...
2023-10-05,1819.000000
2023-10-06,1842.500000
2023-10-09,1857.000000
2023-10-10,1873.699951


In [11]:
fig = px.line(x=data.index ,y=data['Open'])
fig.update_layout(xaxis_title="Date",yaxis_title="MinMaxScaler")
fig.show()

In [12]:
dataset = data.values
dataset

array([[ 274.79998779],
       [ 277.        ],
       [ 275.79998779],
       ...,
       [1857.        ],
       [1873.69995117],
       [1873.69995117]])

In [13]:
scaler = MinMaxScaler(feature_range=(0,1))
scalar_price = scaler.fit_transform(dataset)
scalar_price

array([[0.01100855],
       [0.01223174],
       [0.01156454],
       ...,
       [0.89069271],
       [0.89997768],
       [0.89997768]])

In [14]:
fig = px.line(y=scalar_price[:,0] , title="Data Open")
fig.update_layout(xaxis_title="Count data",yaxis_title="Price Gold (USD/Oz.)")
fig.show()

In [15]:
def DataSeq(data,windows_size,mode):
    X = []
    y = []
    if mode == True:
        for i in range(windows_size, len(data)):
            X.append(data[i-windows_size:i, 0])
            if i <= windows_size + 1:
                print(X)
                print()
        return np.array(X)
    else:
        for i in range(windows_size, len(data)):
            X.append(data[i-windows_size:i, 0])
            y.append(data[i , 0])
            if i <= windows_size + 1:
                print(X)
                print(y)
                print()
        return np.array(X), np.array(y)


In [16]:
windows_size = 60

data_size = int(dataset.shape[0] * 0.80)
Train_data = scalar_price[0:int(data_size),:]
Test_data = scalar_price[ data_size - windows_size : , :]

In [17]:
fig = px.scatter(title="Split Data")
fig.add_scatter(x=data[:data_size].index,y =data[:data_size]['Open'], name='Train data')
fig.add_scatter(x=data[data_size-windows_size:].index,y =data[data_size-windows_size:]['Open'], name='Test data')
fig.update_layout(xaxis_title="Date",yaxis_title="Price Gold (USD/Oz.)")
fig.show()

In [18]:
X_train, y_train = DataSeq(data=Train_data,windows_size=windows_size,mode=False)

[array([0.01100855, 0.01223174, 0.01156454, 0.01067498, 0.01056377,
       0.01017457, 0.01006339, 0.00995218, 0.00989658, 0.00967419,
       0.00961858, 0.0091182 , 0.00939619, 0.00778383, 0.00850661,
       0.00934059, 0.01061937, 0.01050817, 0.01301011, 0.01250973,
       0.01089737, 0.00989658, 0.0095074 , 0.00861781, 0.00850661,
       0.00822862, 0.00783943, 0.00967419, 0.00972979, 0.01056377,
       0.01067498, 0.0091738 , 0.00895141, 0.00850661, 0.00839542,
       0.00900701, 0.00928501, 0.00822862, 0.00622707, 0.00611587,
       0.00583787, 0.00517068, 0.00528189, 0.00494829, 0.00522628,
       0.00533749, 0.00528189, 0.00517068, 0.00555988, 0.00600466,
       0.00528189, 0.00517068, 0.00550428, 0.00561548, 0.00600466,
       0.00567108, 0.00628266, 0.00594908, 0.00628266, 0.00839542])]
[0.008228615028017944]

[array([0.01100855, 0.01223174, 0.01156454, 0.01067498, 0.01056377,
       0.01017457, 0.01006339, 0.00995218, 0.00989658, 0.00967419,
       0.00961858, 0.0091182 , 0.0

In [19]:
y_test = dataset[data_size : , : ]
X_test = DataSeq(data=Test_data,windows_size=windows_size,mode=True)

[array([0.54670299, 0.55298561, 0.5508729 , 0.5505949 , 0.5495941 ,
       0.54859329, 0.54725898, 0.55220722, 0.55643278, 0.54942728,
       0.55893472, 0.55893472, 0.56393862, 0.56805287, 0.56627374,
       0.56877568, 0.57322359, 0.5736684 , 0.57778266, 0.5736684 ,
       0.57016565, 0.56955408, 0.57250078, 0.5761147 , 0.57678192,
       0.57550312, 0.57633711, 0.57633711, 0.57061047, 0.57127763,
       0.57083281, 0.56938726, 0.57956186, 0.58145224, 0.58556649,
       0.58678971, 0.59212718, 0.592294  , 0.59107079, 0.58717887,
       0.58795727, 0.5826198 , 0.5862893 , 0.58845767, 0.5865673 ,
       0.58745687, 0.58378736, 0.58856884, 0.59262758, 0.60447008,
       0.59840987, 0.5944623 , 0.59724224, 0.59396197, 0.59640826,
       0.59273875, 0.58779044, 0.57294559, 0.57377958, 0.57383516])]

[array([0.54670299, 0.55298561, 0.5508729 , 0.5505949 , 0.5495941 ,
       0.54859329, 0.54725898, 0.55220722, 0.55643278, 0.54942728,
       0.55893472, 0.55893472, 0.56393862, 0.56805287, 0.

In [20]:
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [21]:
print('DataSet Shape     : ', df.shape)
print('---------------------------')
print('Train Shape       : ', Train_data.shape)
print('Test  Shape       : ', Test_data.shape)
print('---------------------------')
print('X_train Shape     : ', X_train.shape)
print('y_train Shape     : ', y_train.shape)
print('X_test  Shape     : ', X_test.shape)
print('y_test  Shape     : ', y_test.shape)

DataSet Shape     :  (5799, 7)
---------------------------
Train Shape       :  (4639, 1)
Test  Shape       :  (1220, 1)
---------------------------
X_train Shape     :  (4579, 60, 1)
y_train Shape     :  (4579,)
X_test  Shape     :  (1160, 60, 1)
y_test  Shape     :  (1160, 1)


In [22]:
def Model(opt,loss,metrics):
    Model = Sequential()
    Model.add(LSTM(40,activation='relu', return_sequences=True, input_shape=(X_train.shape[1],1)))
    Model.add(LSTM(512, activation='relu', return_sequences=True))
    Model.add(LSTM(32, activation='relu', return_sequences=False))
    Model.add(Dense(16,activation='relu'))
    Model.add(Dense(1,activation='linear'))
    Model.compile(optimizer=opt, loss=loss,metrics=[metrics])
    Model.summary()
    
    return Model

In [23]:
auto_lr = tf.keras.optimizers.schedules.InverseTimeDecay(
    0.00001, decay_rate=1e-6, decay_steps=1, staircase=False)

In [24]:
model = Model(opt=Adam(learning_rate=auto_lr),loss=MSE ,metrics='mae')
history = model.fit(X_train, y_train, validation_split=0.2, epochs=10)





Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 60, 40)            6720      
                                                                 
 lstm_1 (LSTM)               (None, 60, 512)           1132544   
                                                                 
 lstm_2 (LSTM)               (None, 32)                69760     
                                                                 
 dense (Dense)               (None, 16)                528       
                                                                 
 dense_1 (Dense)             (None, 1)                 17        
                                                                 
Total params: 1209569 (4.61 MB)
Trainable params: 1209569 (4.61 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2

In [25]:
fig = px.line(title='Loss and MAE')
fig.add_scatter(y=history.history['loss'], name='loss')
fig.add_scatter(y=history.history['val_loss'], name='val_loss')
fig.add_scatter(y=history.history['mae'], name='mae')
fig.add_scatter(y=history.history['val_mae'], name='val_mae')
fig.show()


In [26]:
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions)



In [27]:
MAE = mean_absolute_error(y_test,predictions)
MAPE = mean_absolute_percentage_error(y_test,predictions)
Accuracy = 1-MAPE

In [28]:
print('MAE      : ', MAE)
print('MAPE     : ', MAPE)
print('Accuracy : ', Accuracy)

MAE      :  59.63784127070986
MAPE     :  0.03387501373961424
Accuracy :  0.9661249862603858


In [29]:
train = data[:data_size]
test = data[data_size:]

In [30]:
fig = px.scatter(title="Prediction vs Actual Data")
fig.add_scatter(x=test.index, y =test['Open'], name='Actual data')
fig.add_scatter(x=test.index, y=predictions[:,0], name='Predictions data')
fig.update_layout(xaxis_title="Date",yaxis_title="Price Gold (USD/Oz.)")
fig.show()

In [31]:
fig = px.scatter(title="Prediction vs Data")
fig.add_scatter(x=train.index, y=train['Open'], name='Train data')
fig.add_scatter(x=test.index, y =test['Open'], name='Actual data')
fig.add_scatter(x=test.index, y=predictions[:,0], name='Predictions data')
fig.update_layout(xaxis_title="Date",yaxis_title="Price Gold (USD/Oz.)")
fig.show()