### RNN Prediction Model

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset

In [2]:
# Load the dataset
data_path = 'combined_ticks.csv'
df = pd.read_csv(data_path)
df.drop(columns=['Unnamed: 0','Iron_Close'], inplace=True)

# Fill null values with the average of previous and next rows
df.fillna((df.shift() + df.shift(-1)) / 2, inplace=True)

# List all features
features = df.columns.tolist()
print("Features:", features)

# Assume 'Gold_Close' is the target variable
target = 'Gold_Close'
X = df.drop(columns=[target]).values
y = df[target].values

# Normalize the features
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(df.isnull().sum())

Features: ['S&P500_Close', 'Crude Oil_Close', 'Silver_Close', 'Natural Gas_Close', 'NYSE Composite_Close', 'Chinese Yuan_Close', 'Candian Dollar_Close', 'Euro_Close', 'VIX_Close', 'Interest Rate 10y_Close', 'Interest Rate 30y_Close', 'Gold_Close']
S&P500_Close               0
Crude Oil_Close            0
Silver_Close               0
Natural Gas_Close          0
NYSE Composite_Close       0
Chinese Yuan_Close         0
Candian Dollar_Close       0
Euro_Close                 0
VIX_Close                  0
Interest Rate 10y_Close    0
Interest Rate 30y_Close    0
Gold_Close                 0
dtype: int64


In [3]:
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).unsqueeze(1)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# Create a DataLoader for batch processing
batch_size = 64
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)


In [4]:
# Define the GRU model (replaces the RNN)
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(GRUModel, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        out = self.fc(out[:, -1, :])
        return out

In [9]:
# Hyperparameters
input_size = X_train.shape[1]
hidden_size = 32  # Reduced hidden size for faster training
num_layers = 2
output_size = 1
learning_rate = 0.001
num_epochs = 1000

In [10]:
# Model, loss function, and optimizer
model = GRUModel(input_size, hidden_size, num_layers, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [11]:
# Training loop with batch processing
for epoch in range(num_epochs):
    model.train()
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/1000], Loss: 2234935.2500
Epoch [20/1000], Loss: 2371289.7500
Epoch [30/1000], Loss: 2487553.0000
Epoch [40/1000], Loss: 2204820.2500
Epoch [50/1000], Loss: 2665964.7500
Epoch [60/1000], Loss: 2260763.2500
Epoch [70/1000], Loss: 2263360.7500
Epoch [80/1000], Loss: 2233677.5000
Epoch [90/1000], Loss: 2010586.5000
Epoch [100/1000], Loss: 2013828.0000
Epoch [110/1000], Loss: 1954649.1250
Epoch [120/1000], Loss: 1891309.3750
Epoch [130/1000], Loss: 1984965.5000
Epoch [140/1000], Loss: 1948747.3750
Epoch [150/1000], Loss: 1788617.0000
Epoch [160/1000], Loss: 1709181.0000
Epoch [170/1000], Loss: 1608931.6250
Epoch [180/1000], Loss: 1501138.3750
Epoch [190/1000], Loss: 1973523.3750
Epoch [200/1000], Loss: 1411477.6250
Epoch [210/1000], Loss: 1699370.0000
Epoch [220/1000], Loss: 1377452.7500
Epoch [230/1000], Loss: 1556647.2500
Epoch [240/1000], Loss: 1553925.2500
Epoch [250/1000], Loss: 1384946.8750
Epoch [260/1000], Loss: 1478721.7500
Epoch [270/1000], Loss: 1247602.2500
Epoch [280

In [12]:
# Evaluate the model
model.eval()
with torch.no_grad():
    predictions = model(X_test_tensor)
    test_loss = criterion(predictions, y_test_tensor)
    print(f'Test Loss: {test_loss.item():.4f}')

Test Loss: 121783.2422
