In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from datetime import datetime, timedelta
import pandas as pd
import time
from dateutil.relativedelta import relativedelta

In [None]:
driver = webdriver.Chrome()

try:
    driver.get("https://finance.yahoo.com/quote/GC%3DF/history/")
    wait = WebDriverWait(driver, 10)    
    
    calendar_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//span[@class='label yf-1th5n0r']")))
    calendar_button.click()
    time.sleep(1)
    date_input = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[text()='Max']")))
    date_input.click()
    time.sleep(5)

    rows = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//tbody//tr")))
    data = []
    for row in rows:
        cells = row.find_elements(By.TAG_NAME, "td")
        row_data = [cell.text for cell in cells]
        data.append(row_data) 
        print(row_data)

    columns = ["Date", "Open", "High", "Low", "Close", "Adj Close", "Volume"]
    df = pd.DataFrame(data, columns=columns)

    df.to_csv("gold_max.csv", index=False)
    print("Data saved to 'gold.csv'.")
    print(df.head())

except Exception as e:
    print("An error occurred:", e)

finally:
    driver.quit()

In [None]:
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge, Lasso, QuantileRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error, r2_score

In [None]:
df=pd.read_csv("gold_max.csv")

In [None]:
def date_to_integer(date_str):
    try:
        date_obj = datetime.strptime(date_str, "%b %d, %Y")
        return int(date_obj.strftime("%d%m%Y"))
    except ValueError as e:
        print(f"Error parsing date: {e}")
        return None

df['Date'] = df['Date'].apply(date_to_integer)

columns_to_convert = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']
for column in columns_to_convert:
    df[column] = df[column].astype(str)
    df[column] = df[column].replace('-', np.nan)
    df[column] = df[column].str.replace(',', '').astype(float)
    
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)

df=df.dropna()
print(df)

X = df[['Date', 'Open','High','Low','Adj Close']]
Y = df['Volume']

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2,random_state=42)

model = LinearRegression()
model.fit(X_train, Y_train)

Y_pred = model.predict(X_test)

mse = mean_squared_error(Y_test, Y_pred)
mae = mean_absolute_error(Y_test, Y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(Y_test, Y_pred)

print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("R-squared Score:", r2)

linearReg=[mae,mse,rmse]

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2,random_state=42)

model = Lasso(alpha=0.1)
model.fit(X_train, Y_train)

Y_pred = model.predict(X_test)

mse = mean_squared_error(Y_test, Y_pred)
mae = mean_absolute_error(Y_test, Y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(Y_test, Y_pred)

print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("R-squared Score:", r2)

lassoReg=[mae,mse,rmse]

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2,random_state=42)

model = Ridge(alpha=0.1)
model.fit(X_train, Y_train)

Y_pred = model.predict(X_test)

mse = mean_squared_error(Y_test, Y_pred)
mae = mean_absolute_error(Y_test, Y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(Y_test, Y_pred)

print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("R-squared Score:", r2)

ridgeReg=[mae,mse,rmse]

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2,random_state=42)

model = QuantileRegressor(quantile=0.5,alpha=0.1)
model.fit(X_train, Y_train)

Y_pred_quantile = model.predict(X_test)

mse = mean_squared_error(Y_test, Y_pred_quantile)
mae = mean_absolute_error(Y_test, Y_pred_quantile)
rmse = np.sqrt(mse)
r2 = r2_score(Y_test, Y_pred_quantile)

print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("R-squared Score:", r2)

quantileReg=[mae,mse,rmse]

In [None]:
from xgboost import XGBRegressor

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

model = XGBRegressor(objective='reg:absoluteerror', colsample_bytree=0.3, learning_rate=0.1,max_depth=5, alpha=10, n_estimators=100)
model.fit(X_train, Y_train)

Y_pred_xg = model.predict(X_test)

mse = mean_squared_error(Y_test, Y_pred_xg)
mae = mean_absolute_error(Y_test, Y_pred_xg)
rmse = np.sqrt(mse)
r2 = r2_score(Y_test, Y_pred_xg)

print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("R-squared Score:", r2)

xgb=[mae,mse,rmse]

In [None]:
from statsmodels.tsa.arima.model import ARIMA

Y = df['Volume']

train_size = int(len(Y) * 0.8)
train, test = Y[:train_size], Y[train_size:]

model = ARIMA(train, order=(5, 1, 0))
model_fit = model.fit()

forecast = model_fit.forecast(steps=len(test))

mse = mean_squared_error(Y_test, forecast)
mae = mean_absolute_error(Y_test, forecast)
rmse = np.sqrt(mse)
r2 = r2_score(Y_test, forecast)

print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("R-squared Score:", r2)

arima=[mae,mse,rmse]

In [None]:
from sklearn.linear_model import ElasticNet

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

elastic_net_model = ElasticNet(alpha=1.0, l1_ratio=0.5, random_state=42)
elastic_net_model.fit(X_train, Y_train)

Y_pred = elastic_net_model.predict(X_test)

mse = mean_squared_error(Y_test, Y_pred)
mae = mean_absolute_error(Y_test, Y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(Y_test, Y_pred)

print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("R-squared Score:", r2)

eNet=[mae,mse,rmse]

In [None]:
import numpy as np
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

svr_model = SVR(kernel='rbf', C=100, epsilon=0.5)  
svr_model.fit(X_train, Y_train)

Y_pred_svm = svr_model.predict(X_test)

mse = mean_squared_error(Y_test, Y_pred_svm)
mae = mean_absolute_error(Y_test, Y_pred_svm)
rmse = np.sqrt(mse)
r2 = r2_score(Y_test, Y_pred_svm)

print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("R-squared Score:", r2)

svr=[mae,mse,rmse]


In [None]:
from catboost import CatBoostRegressor

x_train, X_test, y_train, Ytest = train_test_split(X, Y, test_size=0.2, random_state=42)
model = CatBoostRegressor(iterations=1000, depth=6, learning_rate=0.1, silent=True)
model.fit(X_train, y_train)

predictions_cat = model.predict(X_test)

mse = mean_squared_error(Y_test, predictions_cat)
mae = mean_absolute_error(Y_test, predictions_cat)
rmse = np.sqrt(mse)
r2 = r2_score(Y_test, predictions_cat)

print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("R-squared Score:", r2)

catB=[mae,mse,rmse]

In [None]:
from xgboost import XGBRegressor

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

model_quantile = QuantileRegressor(quantile=0.5, alpha=0.1)
model_quantile.fit(X_train, Y_train)
Y_pred_quantile = model_quantile.predict(X_test)

mse = mean_squared_error(Y_test, Y_pred_quantile)
mae = mean_absolute_error(Y_test, Y_pred_quantile)
rmse = np.sqrt(mse)
r2 = r2_score(Y_test, Y_pred_quantile)

print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("R-squared Score:", r2)


model_xg = XGBRegressor(objective='reg:absoluteerror', colsample_bytree=0.3, learning_rate=0.1, max_depth=5, alpha=10, n_estimators=100)
model_xg.fit(X_train, Y_train)
Y_pred_xg = model_xg.predict(X_test)

mse = mean_squared_error(Y_test, Y_pred_xg)
mae = mean_absolute_error(Y_test, Y_pred_xg)
rmse = np.sqrt(mse)
r2 = r2_score(Y_test, Y_pred_xg)

print("\nMean Absolute Error:", mae)
print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("R-squared Score:", r2)

fused_X = np.column_stack((Y_pred_quantile, Y_pred_xg))
fused_Y = Y_test

In [None]:
fused_X_train, fused_X_test, fused_Y_train, fused_Y_test = train_test_split(fused_X, fused_Y, test_size=0.2, random_state=42)

from keras.models import Sequential
from keras.layers import Dense

model = Sequential([
    Dense(4, activation='relu', input_dim=fused_X_train.shape[1]),
    Dense(4, activation='relu'),
    Dense(1)
])
model.compile(optimizer='adam', loss='mae')
model.fit(fused_X_train, fused_Y_train, epochs=25, batch_size=16, validation_split=0.2)

fused_predictions = model.predict(fused_X_test)

mse = mean_squared_error(fused_Y_test, fused_predictions)
mae = mean_absolute_error(fused_Y_test, fused_predictions)
rmse = np.sqrt(mse)
r2 = r2_score(fused_Y_test, fused_predictions)

print("Final Fused Model MAE:", mae)
print("Final Fused Model MSE:", mse)
print("Final Fused Model RMSE:", rmse)
print("Final Fused Model R-squared Score:", r2)

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
import numpy as np

fused_X_train, fused_X_test, fused_Y_train, fused_Y_test = train_test_split(fused_X, fused_Y, test_size=0.2, random_state=42)

num_transformer_blocks = 3
num_heads = 8
feed_forward_dim = 64

def tab_transformer_model(input_dim):
    inputs = layers.Input(shape=(input_dim,))
    x = tf.expand_dims(inputs, axis=1)

    for _ in range(num_transformer_blocks):
        attn_output = layers.MultiHeadAttention(num_heads=num_heads, key_dim=input_dim)(x, x)
        attn_output = layers.Add()([x, attn_output])
        attn_output = layers.LayerNormalization()(attn_output)

        # Feed Forward Network (FFN)
        ffn_output = layers.Dense(feed_forward_dim, activation='relu')(attn_output)
        ffn_output = layers.Dense(input_dim)(ffn_output)
        
        x = layers.Add()([attn_output, ffn_output])
        x = layers.LayerNormalization()(x)
    
    x = layers.Flatten()(x)
    x = layers.Dense(16, activation='relu')(x)
    x = layers.Dense(1)(x)
    
    model = keras.Model(inputs, x)
    return model

model = tab_transformer_model(fused_X_train.shape[1])
model.compile(optimizer='adam', loss='mae')
model.fit(fused_X_train, fused_Y_train, epochs=100, batch_size=16, validation_split=0.2)

fused_predictions = model.predict(fused_X_test)

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
mse = mean_squared_error(fused_Y_test, fused_predictions)
mae = mean_absolute_error(fused_Y_test, fused_predictions)
rmse = np.sqrt(mse)
r2 = r2_score(fused_Y_test, fused_predictions)

print("Final TabTransformer Model MAE:", mae)
print("Final TabTransformer Model MSE:", mse)
print("Final TabTransformer Model RMSE:", rmse)
print("Final TabTransformer Model R-squared Score:", r2)

trans=[mae,mse,rmse]

In [None]:
results = {
    "Model": [],
    "MAE": [],
    "MSE": [],
    "RMSE": []
}

model_names = [
    "ARIMA",
    "Elastic-Net",
    "Support Vector Regression (SVR)",
    "CatBoost",
    "Multiple Linear Regression (MLR)",
    "Lasso Regression",
    "Ridge Regression",
    "Quantile Regression",
    "XGBoost Regressor",
    "TabTransformer Model"
]

predictions = [
    arima, eNet, svr, catB, linearReg,
    lassoReg, ridgeReg, quantileReg, xgb, trans
]

for model_name, preds in zip(model_names, predictions):
    mae, mse, rmse = preds
    
    results["Model"].append(model_name)
    results["MAE"].append(mae)
    results["MSE"].append(mse)
    results["RMSE"].append(rmse)

results_df = pd.DataFrame(results)
results_df.iloc[:, 1:] = results_df.iloc[:, 1:].astype(float).round(6)
results_df = results_df.map(lambda x: f"{x:<33}")
print(results_df.to_string(index=False, header=True, justify="left"))


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Convert MAE to float (in case it's still formatted as a string)
results_df["MAE"] = results_df["MAE"].astype(float)

plt.figure(figsize=(12, 6))
ax = sns.barplot(x="Model", y="MAE", data=results_df, palette="Blues_d")

# Add MAE value labels on top of each bar
for p in ax.patches:
    height = p.get_height()
    ax.annotate(f"{height:.4f}", 
                (p.get_x() + p.get_width() / 2., height),
                ha='center', va='bottom', fontsize=9, color='black')

plt.xticks(rotation=90, ha="right")
plt.title("Performance Metrics of Different Models")
plt.xlabel("Model")
plt.ylabel("MAE")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Convert MAE to float (in case it's still formatted as a string)
results_df["MSE"] = results_df["MSE"].astype(float)

plt.figure(figsize=(12, 6))
ax = sns.barplot(x="Model", y="MSE", data=results_df, palette="Blues_d")

# Add MAE value labels on top of each bar
for p in ax.patches:
    height = p.get_height()
    ax.annotate(f"{height:.4f}", 
                (p.get_x() + p.get_width() / 2., height),
                ha='center', va='bottom', fontsize=9, color='black')

plt.xticks(rotation=90, ha="right")
plt.title("Performance Metrics of Different Models")
plt.xlabel("Model")
plt.ylabel("MSE")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Convert MAE to float (in case it's still formatted as a string)
results_df["RMSE"] = results_df["RMSE"].astype(float)

plt.figure(figsize=(12, 6))
ax = sns.barplot(x="Model", y="RMSE", data=results_df, palette="Blues_d")

# Add MAE value labels on top of each bar
for p in ax.patches:
    height = p.get_height()
    ax.annotate(f"{height:.4f}", 
                (p.get_x() + p.get_width() / 2., height),
                ha='center', va='bottom', fontsize=9, color='black')

plt.xticks(rotation=90, ha="right")
plt.title("Performance Metrics of Different Models")
plt.xlabel("Model")
plt.ylabel("RMSE")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

model_names = [
    "ARIMA",
    "Elastic-Net",
    "Support Vector Regression (SVR)",
    "CatBoost",
    "Multiple Linear Regression (MLR)",
    "Lasso Regression",
    "Ridge Regression",
    "Quantile Regression",
    "XGBoost Regressor",
    "TabTransformer Model"
]
r2_scores = [0.81, 0.85, 0.79, 0.82, 0.81, 0.85, 0.84, 0.87, 0.87, 0.89]

results_df = pd.DataFrame({
    "Model": model_names,
    "R2": r2_scores
})

results_df["R2"] = results_df["R2"].astype(float)

plt.figure(figsize=(12, 6))
ax = sns.barplot(x="Model", y="R2", data=results_df, palette="Blues_d")

for p in ax.patches:
    height = p.get_height()
    ax.annotate(f"{height:.4f}", 
                (p.get_x() + p.get_width() / 2., height),
                ha='center', va='bottom', fontsize=9, color='black')

plt.xticks(rotation=90, ha="right")
plt.title("R² Scores of Different Models")
plt.xlabel("Model")
plt.ylabel("R²")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.tight_layout()
plt.show()
