In [None]:
## 1. Import Libraries and Check Device
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
import warnings
import os
import joblib

warnings.filterwarnings('ignore')

# Set random seeds
np.random.seed(42)
torch.manual_seed(42)

# Plot settings
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')
plt.rcParams['figure.figsize'] = (14, 6)

# Check device
if torch.cuda.is_available():
    device = torch.device('cuda')
    torch.cuda.manual_seed_all(42)
    print(f"✓ Running on CUDA GPU: {torch.cuda.get_device_name(0)}")
elif torch.backends.mps.is_available():
    device = torch.device('mps')
    print("✓ Running on Apple Silicon (MPS)")
else:
    device = torch.device('cpu')
    print("⚠ Running on CPU")

In [None]:
## 2. Load and Prepare Data
# Load dataset
df = pd.read_csv('../data/gold_silver.csv')

# Convert to datetime and set proper frequency
df['DATE'] = pd.to_datetime(df['DATE'])
df = df.sort_values('DATE')
df.set_index('DATE', inplace=True)
df = df.asfreq('B')  # Business day frequency

# Calculate log returns
df['GOLD_LOG_RETURN'] = np.log(df['GOLD_PRICE']) - np.log(df['GOLD_PRICE'].shift(1))

print(f"Dataset: {len(df)} observations")
print(f"Date range: {df.index.min()} to {df.index.max()}")

In [None]:
## 3. Feature Engineering (Technical Indicators Only)
# Create feature dataframe
features_df = pd.DataFrame(index=df.index)

# 1. Target variable (endogenous)
features_df['GOLD_LOG_RETURN'] = df['GOLD_LOG_RETURN']

# 2. Technical Indicators (LAGGED by 1 period to avoid leakage)

# RSI (Relative Strength Index)
def calculate_rsi(prices, period=14):
    delta = prices.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

features_df['RSI_LAGGED'] = calculate_rsi(df['GOLD_PRICE']).shift(1)

# Simple Moving Average (20-day)
features_df['SMA20_LAGGED'] = df['GOLD_PRICE'].rolling(20).mean().shift(1)

# Volatility (20-day rolling std of log returns)
features_df['VOLATILITY_LAGGED'] = df['GOLD_LOG_RETURN'].rolling(20).std().shift(1)

# Drop rows with NaN values
features_df = features_df.dropna()

print(f"✓ Features engineered (No GPRD)")
print(f"Features used: {features_df.columns.tolist()}")
print(f"Final dataset size: {len(features_df)}")

In [None]:
## 4. Visualize Technical Indicators
fig, axes = plt.subplots(3, 1, figsize=(14, 12), sharex=True)

# Price vs SMA
axes[0].plot(df.index, df['GOLD_PRICE'], label='Gold Price', color='gold')
axes[0].plot(features_df.index, features_df['SMA20_LAGGED'].shift(-1), label='SMA 20', color='blue', linestyle='--') # Shift back for viz alignment
axes[0].set_title('Gold Price vs SMA 20', fontweight='bold')
axes[0].legend()

# RSI
axes[1].plot(features_df.index, features_df['RSI_LAGGED'], label='RSI', color='purple')
axes[1].axhline(70, color='red', linestyle='--', alpha=0.5)
axes[1].axhline(30, color='green', linestyle='--', alpha=0.5)
axes[1].set_title('Relative Strength Index (RSI)', fontweight='bold')
axes[1].set_ylim(0, 100)

# Volatility
axes[2].plot(features_df.index, features_df['VOLATILITY_LAGGED'], label='Volatility (20d)', color='orange')
axes[2].set_title('Market Volatility', fontweight='bold')

plt.tight_layout()
plt.show()

In [None]:
## 5. Define Multivariate LSTM Model
class MultivariateLSTMForecaster(nn.Module):
    def __init__(self, input_size, hidden_size_1=64, hidden_size_2=32, 
                 forecast_horizon=5, dropout=0.2):
        super(MultivariateLSTMForecaster, self).__init__()
        
        self.lstm1 = nn.LSTM(input_size=input_size, hidden_size=hidden_size_1, batch_first=True, dropout=dropout)
        self.lstm2 = nn.LSTM(input_size=hidden_size_1, hidden_size=hidden_size_2, batch_first=True)
        self.dropout = nn.Dropout(dropout)
        self.fc1 = nn.Linear(hidden_size_2, 16)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(16, forecast_horizon)
    
    def forward(self, x):
        out, _ = self.lstm1(x)
        out = self.dropout(out)
        out, _ = self.lstm2(out)
        out = self.dropout(out)
        last_step = out[:, -1, :]
        x = self.fc1(last_step)
        x = self.relu(x)
        x = self.dropout(x)
        return self.fc2(x)

print("✓ Model class defined")

In [None]:
## 6. Prepare Sequences
def create_multivariate_sequences(data, target_col_idx, lookback, forecast_horizon):
    X, y = [], []
    for i in range(len(data) - lookback - forecast_horizon + 1):
        X.append(data[i:i+lookback, :])
        y.append(data[i+lookback:i+lookback+forecast_horizon, target_col_idx])
    return np.array(X), np.array(y)

lookback = 20
forecast_horizon = 5
data_array = features_df.values
target_col_idx = 0  # GOLD_LOG_RETURN is first column

X, y = create_multivariate_sequences(data_array, target_col_idx, lookback, forecast_horizon)
print(f"Input shape: {X.shape} (samples, lookback, features)")

In [None]:
## 7. Train/Test Split and Scaling
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

n_features = X.shape[2]
scaler_X = MinMaxScaler(feature_range=(-1, 1))
scaler_y = MinMaxScaler(feature_range=(-1, 1))

# Reshape for scaling
X_train_reshaped = X_train.reshape(-1, n_features)
X_test_reshaped = X_test.reshape(-1, n_features)

X_train_scaled = scaler_X.fit_transform(X_train_reshaped).reshape(X_train.shape)
X_test_scaled = scaler_X.transform(X_test_reshaped).reshape(X_test.shape)

y_train_scaled = scaler_y.fit_transform(y_train)
y_test_scaled = scaler_y.transform(y_test)

# To Tensor
X_train_tensor = torch.FloatTensor(X_train_scaled).to(device)
y_train_tensor = torch.FloatTensor(y_train_scaled).to(device)
X_test_tensor = torch.FloatTensor(X_test_scaled).to(device)
y_test_tensor = torch.FloatTensor(y_test_scaled).to(device)

print("✓ Data scaled and converted to tensors")

In [None]:
## 8. Training Setup
# DataLoaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_size = int(len(train_dataset) * 0.2)
train_subset, val_subset = torch.utils.data.random_split(
    train_dataset, [len(train_dataset)-val_size, val_size],
    generator=torch.Generator().manual_seed(42)
)
train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_subset, batch_size=32, shuffle=False)

# Model Init
model = MultivariateLSTMForecaster(
    input_size=n_features,
    hidden_size_1=64, 
    hidden_size_2=32,
    forecast_horizon=forecast_horizon
).to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10)

In [None]:
## 9. Training Loop
def train_epoch(model, loader, criterion, optimizer):
    model.train()
    total_loss = 0
    for X_b, y_b in loader:
        optimizer.zero_grad()
        pred = model(X_b)
        loss = criterion(pred, y_b)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

def validate(model, loader, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for X_b, y_b in loader:
            pred = model(X_b)
            loss = criterion(pred, y_b)
            total_loss += loss.item()
    return total_loss / len(loader)

print("Starting training...")
best_val_loss = float('inf')
patience = 20
counter = 0

model_dir = '../models/lstm-multivariate-technical'
os.makedirs(model_dir, exist_ok=True)

for epoch in range(100):
    train_loss = train_epoch(model, train_loader, criterion, optimizer)
    val_loss = validate(model, val_loader, criterion)
    scheduler.step(val_loss)
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        counter = 0
        torch.save(model.state_dict(), f'{model_dir}/best_model.pt')
    else:
        counter += 1
        if counter >= patience:
            print(f"Early stopping at epoch {epoch}")
            break
            
    if (epoch+1) % 10 == 0:
        print(f"Epoch {epoch+1} | Train: {train_loss:.6f} | Val: {val_loss:.6f}")

model.load_state_dict(torch.load(f'{model_dir}/best_model.pt'))
print("✓ Training completed")

In [None]:
## 10. Forward-Fill Validation
def forward_fill_forecast(model, data_array, train_size, lookback, forecast_horizon, scaler_X, scaler_y, device):
    model.eval()
    predictions, actuals = [], []
    test_start = train_size + lookback
    
    with torch.no_grad():
        for i in range(test_start, len(data_array) - forecast_horizon + 1, forecast_horizon):
            X_window = data_array[i-lookback:i, :]
            X_scaled = scaler_X.transform(X_window.reshape(-1, n_features)).reshape(1, lookback, n_features)
            X_tensor = torch.FloatTensor(X_scaled).to(device)
            
            y_pred = scaler_y.inverse_transform(model(X_tensor).cpu().numpy())[0]
            y_actual = data_array[i:i+forecast_horizon, 0]
            
            predictions.extend(y_pred)
            actuals.extend(y_actual)
            
    return np.array(predictions), np.array(actuals)

print("Performing walk-forward validation...")
preds_log, actuals_log = forward_fill_forecast(
    model, data_array, train_size, lookback, forecast_horizon, scaler_X, scaler_y, device
)

In [None]:
## 11. Convert to Prices
predictions_price = []
actuals_price = []
test_start_idx = train_size + lookback
gold_prices = df.loc[features_df.index, 'GOLD_PRICE']

for i in range(0, len(preds_log), forecast_horizon):
    window_idx = test_start_idx + i
    current_price = gold_prices.iloc[window_idx - 1]
    
    for j in range(forecast_horizon):
        if i + j < len(preds_log):
            pred_price = current_price * np.exp(preds_log[i+j])
            predictions_price.append(pred_price)
            current_price = pred_price
            actuals_price.append(gold_prices.iloc[window_idx + j])

predictions_price = np.array(predictions_price)
actuals_price = np.array(actuals_price)

In [None]:
## 12. Evaluation and Comparison
rmse = np.sqrt(mean_squared_error(actuals_price, predictions_price))
mae = mean_absolute_error(actuals_price, predictions_price)

print("="*60)
print("MODEL RESULTS: LSTM Technical (No GPRD)")
print("="*60)
print(f"RMSE: ${rmse:.2f}")
print(f"MAE:  ${mae:.2f}")

# Compare with GPRD model if available
try:
    gprd_results = pd.read_csv('../models/lstm-multivariate-exogenous/results.csv')
    rmse_gprd = gprd_results['rmse'].values[0]
    
    print("\nComparison with GPRD Model:")
    print(f"LSTM (Technical Only): ${rmse:.2f}")
    print(f"LSTM (with GPRD):      ${rmse_gprd:.2f}")
    
    diff = rmse - rmse_gprd
    if diff > 0:
        print(f"-> GPRD model is better by ${diff:.2f} (RMSE)")
        print("Conclusion: Geopolitical risk ADDS predictive value.")
    else:
        print(f"-> Technical model is better by ${-diff:.2f} (RMSE)")
        print("Conclusion: GPRD does NOT add predictive value over technicals.")
except:
    print("\n(GPRD model results not found for comparison)")

In [None]:
## 13. Save Results
results = {
    'model': 'LSTM_Multivariate_Technical_Only',
    'rmse': rmse,
    'mae': mae,
    'features': ', '.join(features_df.columns.tolist())
}
pd.DataFrame([results]).to_csv(f'{model_dir}/results.csv', index=False)
joblib.dump(scaler_X, f'{model_dir}/scaler_X.pkl')
joblib.dump(scaler_y, f'{model_dir}/scaler_y.pkl')
print(f"\nResults saved to {model_dir}")