In [33]:
import yfinance as yf
import pandas as pd
import pandas_datareader.data as web
from datetime import datetime

import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras
from tensorflow.keras import layers

from sklearn.metrics import mean_squared_error, mean_absolute_error
import math

In [2]:

start = datetime(2010, 1, 1)
end = datetime(2025, 1, 1)

In [3]:
# Get historical market data for gold
gold_data = yf.download("GC=F", start="2010-01-01", end="2025-01-01", interval="1d")


  gold_data = yf.download("GC=F", start="2010-01-01", end="2025-01-01", interval="1d")
[*********************100%***********************]  1 of 1 completed


In [4]:
# Technical Indicators - add columns to dataframe
gold_data['SMA_20'] = gold_data['Close'].rolling(window=20).mean()

gold_data['EMA_20'] = gold_data['Close'].ewm(span=20, adjust=False).mean()

# RSI
def rsi(series, period=14):
    delta = series.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

gold_data['RSI_14'] = rsi(gold_data['Close'], 14)

# MACD
ema_12 = gold_data['Close'].ewm(span=12, adjust=False).mean()
ema_26 = gold_data['Close'].ewm(span=26, adjust=False).mean()
gold_data['MACD'] = ema_12 - ema_26
gold_data['MACD_Signal'] = gold_data['MACD'].ewm(span=9, adjust=False).mean()

# Bollinger Bands
window = 20
gold_data['BB_Middle'] = gold_data['Close'].rolling(window=window).mean()
gold_data['BB_STD'] = gold_data['Close'].rolling(window=window).std()
gold_data['BB_Upper'] = gold_data['BB_Middle'] + (gold_data['BB_STD'] * 2)
gold_data['BB_Lower'] = gold_data['BB_Middle'] - (gold_data['BB_STD'] * 2)

# ATR
high_low = gold_data['High'] - gold_data['Low']
high_close = (gold_data['High'] - gold_data['Close'].shift()).abs()
low_close = (gold_data['Low'] - gold_data['Close'].shift()).abs()
true_range = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
gold_data['ATR'] = true_range.rolling(window=14).mean()

# 3. Stochastic Oscillator (%K and %D)
low_min = gold_data['Low'].rolling(window=14).min()
high_max = gold_data['High'].rolling(window=14).max()
gold_data['%K'] = (gold_data['Close'] - low_min) / (high_max - low_min) * 100
gold_data['%D'] = gold_data['%K'].rolling(window=3).mean()

gold_data.dropna(inplace=True)


In [5]:
gold_data.tail()

Price,Close,High,Low,Open,Volume,SMA_20,EMA_20,RSI_14,MACD,MACD_Signal,BB_Middle,BB_STD,BB_Upper,BB_Lower,ATR,%K,%D
Ticker,GC=F,GC=F,GC=F,GC=F,GC=F,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
2024-12-24,2620.0,2620.0,2609.5,2613.0,35,2647.054993,2643.143816,44.979194,-9.144972,-3.899556,2647.054993,31.698883,2710.452758,2583.657227,34.842896,24.983464,25.203196
2024-12-26,2638.800049,2638.800049,2627.899902,2628.5,84,2647.979993,2642.730124,51.858616,-8.295974,-4.77884,2647.979993,31.142093,2710.264178,2585.695807,34.092896,37.376381,27.42251
2024-12-27,2617.199951,2617.699951,2616.399902,2617.699951,642,2646.844995,2640.298679,46.832424,-9.259349,-5.674941,2646.844995,31.857518,2710.560031,2583.129959,33.864328,23.137683,28.499176
2024-12-30,2606.100098,2626.899902,2597.0,2620.699951,794,2644.3,2637.041671,40.88658,-10.794069,-6.698767,2644.3,33.015634,2710.331269,2578.268731,32.692889,15.820704,25.444922
2024-12-31,2629.199951,2629.199951,2604.899902,2608.399902,401,2644.015002,2636.29484,39.073451,-10.030752,-7.365164,2644.015002,33.125467,2710.265937,2577.764068,31.771467,31.048035,23.335474


Macro Economics Variables

In [6]:
# USD Index (DXY)
dxy = yf.download("DX-Y.NYB", start='2010-01-01', end = "2025-01-01")

  dxy = yf.download("DX-Y.NYB", start='2010-01-01', end = "2025-01-01")
[*********************100%***********************]  1 of 1 completed


In [7]:
# US Treasury Yields (Interest Rates)
tnx = yf.download("^TNX", start='2010-01-01', end="2025-01-01")

  tnx = yf.download("^TNX", start='2010-01-01', end="2025-01-01")
[*********************100%***********************]  1 of 1 completed


In [8]:
# Inflation Data (CPI)
inflation = web.DataReader('CPIAUCSL', 'fred', start, end)

In [9]:
# Oil Prices WTI
oil = yf.download("CL=F", start='2010-01-01', end="2025-01-01")


  oil = yf.download("CL=F", start='2010-01-01', end="2025-01-01")
[*********************100%***********************]  1 of 1 completed


In [10]:
# Silver Prices
silver_data = yf.download("SI=F", start='2010-01-01', end="2025-01-01")

  silver_data = yf.download("SI=F", start='2010-01-01', end="2025-01-01")
[*********************100%***********************]  1 of 1 completed


In [11]:
inflation.tail()

Unnamed: 0_level_0,CPIAUCSL
DATE,Unnamed: 1_level_1
2024-09-01,314.851
2024-10-01,315.564
2024-11-01,316.449
2024-12-01,317.603
2025-01-01,319.086


In [12]:
# S&P 500 Index
sp500 = yf.download("^GSPC", start='2010-01-01', end="2025-01-01")


  sp500 = yf.download("^GSPC", start='2010-01-01', end="2025-01-01")
[*********************100%***********************]  1 of 1 completed


In [13]:
# Gold ETF Holdings
gold_etf = yf.download("GLD", start='2010-01-01', end="2025-01-01")


  gold_etf = yf.download("GLD", start='2010-01-01', end="2025-01-01")
[*********************100%***********************]  1 of 1 completed


In [14]:
gold_data['DXY_Close'] = dxy['Close']
gold_data['TNX_Close'] = tnx['Close']
gold_data['Oil_Close'] = oil['Close']
gold_data['SP500_Close'] = sp500['Close']
gold_data['Gold_ETF_Close'] = gold_etf['Close']
gold_data['CPIAUCSL'] = inflation['CPIAUCSL'].reindex(gold_data.index, method='ffill')
gold_data['Silver_Close'] = silver_data['Close']


In [15]:
gold_data.tail()

Price,Close,High,Low,Open,Volume,SMA_20,EMA_20,RSI_14,MACD,MACD_Signal,...,ATR,%K,%D,DXY_Close,TNX_Close,Oil_Close,SP500_Close,Gold_ETF_Close,CPIAUCSL,Silver_Close
Ticker,GC=F,GC=F,GC=F,GC=F,GC=F,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,...,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2024-12-24,2620.0,2620.0,2609.5,2613.0,35,2647.054993,2643.143816,44.979194,-9.144972,-3.899556,...,34.842896,24.983464,25.203196,108.260002,4.591,70.099998,6040.040039,241.440002,317.603,29.974001
2024-12-26,2638.800049,2638.800049,2627.899902,2628.5,84,2647.979993,2642.730124,51.858616,-8.295974,-4.77884,...,34.092896,37.376381,27.42251,108.129997,4.579,69.620003,6037.589844,243.070007,317.603,30.047001
2024-12-27,2617.199951,2617.699951,2616.399902,2617.699951,642,2646.844995,2640.298679,46.832424,-9.259349,-5.674941,...,33.864328,23.137683,28.499176,108.0,4.619,70.599998,5970.839844,241.399994,317.603,29.655001
2024-12-30,2606.100098,2626.899902,2597.0,2620.699951,794,2644.3,2637.041671,40.88658,-10.794069,-6.698767,...,32.692889,15.820704,25.444922,108.129997,4.545,70.989998,5906.939941,240.630005,317.603,29.106001
2024-12-31,2629.199951,2629.199951,2604.899902,2608.399902,401,2644.015002,2636.29484,39.073451,-10.030752,-7.365164,...,31.771467,31.048035,23.335474,108.489998,4.573,71.720001,5881.629883,242.130005,317.603,28.940001


In [23]:
print(gold_data.dtypes)

Close_GC=F        float64
High_GC=F         float64
Low_GC=F          float64
Open_GC=F         float64
Volume_GC=F         int64
SMA_20            float64
EMA_20            float64
RSI_14            float64
MACD              float64
MACD_Signal       float64
BB_Middle         float64
BB_STD            float64
BB_Upper          float64
BB_Lower          float64
ATR               float64
%K                float64
%D                float64
DXY_Close         float64
TNX_Close         float64
Oil_Close         float64
SP500_Close       float64
Gold_ETF_Close    float64
CPIAUCSL          float64
Silver_Close      float64
dtype: object


In [17]:
# Flatten MultiIndex Columns
if isinstance(gold_data.columns, pd.MultiIndex):
    gold_data = gold_data.copy()
    gold_data.columns = [
        (f"{lvl0}" if (lvl1 is None or lvl1 == '') else f"{lvl0}_{lvl1}")
        for lvl0, lvl1 in gold_data.columns
    ]

In [18]:
#Preprocessing Data
data = gold_data.copy().dropna()


target_col = 'Close_GC=F'
feature_col = data.columns # All the features including 'Close'
scalar = MinMaxScaler()
scaled = scalar.fit_transform(data[feature_col])

seq_length = 60 # Past 60 days
horizon = 1  # Predicting the next day


In [20]:
def make_sequencs(arr, target_index, seq_len=60, horizon=1):
    X, y = [], []
    
    for i in range(seq_len, len(arr)-horizon+1):
        X.append(arr[i-seq_len:i,:])
        y.append(arr[i + horizon -1, target_index])
    return np.array(X), np.array(y)

target_index = list(feature_col).index(target_col)
X, y = make_sequencs(scaled, target_index, seq_length, horizon)

# Train / Test Split (chromological)
split = int(len(X) * 0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((2952, 60, 24), (2952,), (739, 60, 24), (739,))

Model 1: CNN -> LSTM

In [21]:
# CNN - LSTM

def build_cnn_lstm(input_shape):
    inp = layers.Input(shape=input_shape)
    
    #CNN
    x = layers.Conv1D(filters=64, kernel_size=3, padding='causal', activation='relu')(inp)
    x = layers.Conv1D(filters=64, kernel_size=3, padding='causal', activation='relu')(x)
    x = layers.MaxPooling1D(pool_size=2)(x)
    x = layers.LSTM(64, return_sequences=False)(x)
    x = layers.Dropout(0.3)(x)
    
    out = layers.Dense(1, activation='linear')(x)
    model = keras.Model(inputs=inp, outputs=out)
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

model_cnn_lstm = build_cnn_lstm(X_train.shape[1:])
model_cnn_lstm.summary()

2025-08-24 09:45:32.788748: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Model 2: LSTM -> CNN (use LSTM features across time then temporal co

In [24]:
# LSTM - CNN

def build_lstm_cnn(input_shape):
    
    inp = layers.Input(shape=input_shape)
    
    # LSTM
    x = layers.LSTM(units=64, return_sequences=True)(inp)
    x = layers.LSTM(units=64, return_sequences=True)(x)
    # 1D conv across the sequentce of LSTM hidden states
    x = layers.Conv1D(filters=64, kernel_size=3, padding='same', activation='relu')(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dropout(0.3)(x)
    
    out = layers.Dense(1, activation='linear')(x)
    model = keras.Model(inputs=inp, outputs=out)
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    
    return model

model_lstm_cnn = build_lstm_cnn(X_train.shape[1:])
model_lstm_cnn.summary()

Model 3: Parallel CNN and LSTM branches

In [28]:
# CNN + LSTM Parallel

def build_parallel_cnn_lstm(input_shape):
    inp = layers.Input(shape=input_shape)
    # CNN Branch
    c = layers.Conv1D(64, 3, padding='causal', activation='relu')(inp)
    c = layers.Conv1D(64, 3, padding='causal', activation='relu')(c)
    c = layers.GlobalAveragePooling1D()(c)
    # LSTM Branch
    l = layers.LSTM(64, return_sequences=True)(inp)  # noqa: E741
    l = layers.LSTM(32)(l)  # noqa: E741
    # Merge (correct usage)
    x = layers.Concatenate()([c, l])
    x = layers.Dense(64, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    out = layers.Dense(1, activation='linear')(x)
    model = keras.Model(inputs=inp, outputs=out)
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

model_parallel = build_parallel_cnn_lstm(X_train.shape[1:])
model_parallel.summary()
    
    

Training (shared settings)

In [29]:
callbacks = [
    keras.callbacks.EarlyStopping(patience=8, restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(patience=4, factor=0.5, min_lr=1e-5)
]

batch_size = 64
epochs = 50

In [30]:
# CNN - LSTM Model Run
hist_cnn_lstm = model_cnn_lstm.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=epochs,
    batch_size=batch_size,
    callbacks=callbacks,
    verbose=1
)

Epoch 1/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - loss: 0.0055 - mae: 0.0508 - val_loss: 6.8023e-04 - val_mae: 0.0217 - learning_rate: 0.0010
Epoch 2/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 0.0012 - mae: 0.0255 - val_loss: 0.0016 - val_mae: 0.0362 - learning_rate: 0.0010
Epoch 3/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - loss: 9.1363e-04 - mae: 0.0221 - val_loss: 5.6289e-04 - val_mae: 0.0193 - learning_rate: 0.0010
Epoch 4/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 7.9025e-04 - mae: 0.0206 - val_loss: 0.0015 - val_mae: 0.0355 - learning_rate: 0.0010
Epoch 5/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 7.7325e-04 - mae: 0.0203 - val_loss: 4.6577e-04 - val_mae: 0.0165 - learning_rate: 0.0010
Epoch 6/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 7.7207e-

In [31]:
# LSTM - CNN Model Run
hist_lstm_cnn = model_lstm_cnn.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=epochs,
    batch_size=batch_size,
    callbacks=callbacks,
    verbose=1
)


Epoch 1/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 42ms/step - loss: 0.0060 - mae: 0.0530 - val_loss: 0.0028 - val_mae: 0.0450 - learning_rate: 0.0010
Epoch 2/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 36ms/step - loss: 0.0022 - mae: 0.0341 - val_loss: 0.0019 - val_mae: 0.0365 - learning_rate: 0.0010
Epoch 3/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 39ms/step - loss: 0.0018 - mae: 0.0316 - val_loss: 9.4807e-04 - val_mae: 0.0242 - learning_rate: 0.0010
Epoch 4/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - loss: 0.0017 - mae: 0.0302 - val_loss: 0.0016 - val_mae: 0.0337 - learning_rate: 0.0010
Epoch 5/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - loss: 0.0016 - mae: 0.0286 - val_loss: 8.0973e-04 - val_mae: 0.0219 - learning_rate: 5.0000e-04
Epoch 6/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - loss: 0.0014 - mae: 0.0272

In [32]:
#  CNN + LSTM Parallel Model Run
hist_cnn_lstm_parallel = model_parallel.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=epochs,
    batch_size=batch_size,
    callbacks=callbacks,
    verbose=1
)


Epoch 1/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 39ms/step - loss: 0.0074 - mae: 0.0579 - val_loss: 0.0011 - val_mae: 0.0299 - learning_rate: 0.0010
Epoch 2/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 37ms/step - loss: 0.0017 - mae: 0.0303 - val_loss: 4.2669e-04 - val_mae: 0.0177 - learning_rate: 0.0010
Epoch 3/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step - loss: 0.0015 - mae: 0.0276 - val_loss: 3.6812e-04 - val_mae: 0.0152 - learning_rate: 0.0010
Epoch 4/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 42ms/step - loss: 0.0014 - mae: 0.0267 - val_loss: 4.8927e-04 - val_mae: 0.0189 - learning_rate: 0.0010
Epoch 5/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 40ms/step - loss: 0.0013 - mae: 0.0248 - val_loss: 2.9914e-04 - val_mae: 0.0140 - learning_rate: 5.0000e-04
Epoch 6/50
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 40ms/step - loss: 0.0012 - mae

Evaluation

In [34]:
def evaluate_model(model, X_train, y_train, X_test, y_test, name):
    pred_train = model.predict(X_train)
    pred_test = model.predict(X_test)
    
    rmse_train = math.sqrt(mean_squared_error(y_train, pred_train))
    rmse_test = math.sqrt(mean_squared_error(y_test, pred_test))
    mae_test = mean_absolute_error(y_test, pred_test)
    
    print(f'{name} - Train RMSE: {rmse_train:.4f}, Test RMSE: {rmse_test:.4f}, Test MAE: {mae_test:.4f}')
    

In [35]:
evaluate_model(model_cnn_lstm, X_train, y_train, X_test, y_test, "CNN-LSTM")
evaluate_model(model_lstm_cnn, X_train, y_train, X_test, y_test, "LSTM-CNN")
evaluate_model(model_parallel, X_train, y_train, X_test, y_test, "Parallel")

[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
CNN-LSTM - Train RMSE: 0.0144, Test RMSE: 0.1016, Test MAE: 0.0901
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
LSTM-CNN - Train RMSE: 0.0384, Test RMSE: 0.1291, Test MAE: 0.1097
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
Parallel - Train RMSE: 0.0269, Test RMSE: 0.1294, Test MAE: 0.1170
