In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import mean_absolute_error


In [2]:
df = pd.read_csv("./gld_price_data.csv")

In [3]:
print("Dataset Preview:\n", df.head(10))
print("Dataset Info:\n", df.info())

Dataset Preview:
         Date          SPX        GLD        USO        SLV   EUR/USD
0   1/2/2008  1447.160034  84.860001  78.470001  15.180000  1.471692
1   1/3/2008  1447.160034  85.570000  78.370003  15.285000  1.474491
2   1/4/2008  1411.630005  85.129997  77.309998  15.167000  1.475492
3   1/7/2008  1416.180054  84.769997  75.500000  15.053000  1.468299
4   1/8/2008  1390.189941  86.779999  76.059998  15.590000  1.557099
5   1/9/2008  1409.130005  86.550003  75.250000  15.520000  1.466405
6  1/10/2008  1420.329956  88.250000  74.019997  16.061001  1.480100
7  1/11/2008  1401.020020  88.580002  73.089996  16.077000  1.479006
8  1/14/2008  1416.250000  89.540001  74.250000  16.280001  1.486900
9  1/15/2008  1380.949951  87.989998  72.779999  15.834000  1.480210
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2290 entries, 0 to 2289
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Date     2290 non-null   object 
 1 

In [4]:
df = df.drop(columns=["Date", "EUR/USD"])


In [5]:
X = df.drop(columns=["GLD"])
y = df["GLD"]


In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [7]:
print(f"Training set size: {X_train.shape[0]} samples")
print(f"Test set size: {X_test.shape[0]} samples")

Training set size: 1603 samples
Test set size: 687 samples


In [8]:
rf_model = RandomForestRegressor(random_state=42, n_estimators=100)
svm_model = SVR(kernel='rbf')
mlr_model = LinearRegression()

In [9]:
rf_model.fit(X_train, y_train)
svm_model.fit(X_train, y_train)
mlr_model.fit(X_train, y_train)

In [10]:
rf_pred = rf_model.predict(X_test)
svm_pred = svm_model.predict(X_test)
mlr_pred = mlr_model.predict(X_test)

In [11]:
def evaluate_model(y_true, y_pred, model_name):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = mse ** 0.5
    r2 = r2_score(y_true, y_pred)
    return {"Model": model_name, "RMSE": rmse, "MSE": mse, "MAE": mae, "R² Score": r2}

In [12]:
results = [
    evaluate_model(y_test, rf_pred, "Random Forest"),
    evaluate_model(y_test, svm_pred, "SVM"),
    evaluate_model(y_test, mlr_pred, "Multiple Linear Regression")
]


In [13]:
results_df = pd.DataFrame(results)
print(results_df)


                        Model       RMSE         MSE        MAE  R² Score
0               Random Forest   3.058973    9.357318   1.649560  0.983018
1                         SVM  21.855463  477.661274  16.205316  0.133121
2  Multiple Linear Regression   7.649669   58.517437   5.770271  0.893800
