In [3]:
import os
import pandas as pd
import numpy as np
from datetime import datetime
from collections import defaultdict
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler
from sklearn.metrics import r2_score, mean_squared_error

# === Device ===
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# === Config ===
input_path = r"your file path\USD,ZAR AND EUR 2025 DATA.csv"
seq_length = 5
hidden_size = 64
num_layers = 2
dropout_rate = 0.2
batch_size = 64
epochs = 30
learning_rate = 0.001
weight_decay = 1e-4

# === Load Data ===
df = pd.read_csv(input_path, parse_dates=['DateTime'])
df = df.dropna(subset=['Actual', 'Previous', 'Currency', 'Event', 'Impact'])

df['EventMonth'] = df['DateTime'].dt.to_period('M')
df['EventDate'] = df['DateTime'].dt.date
current_month = pd.Timestamp.now().to_period('M')

# Count high-impact events per day
high_impact_counts = (
    df[df['Impact'] == 3]
    .groupby(['Currency', 'EventDate'])
    .size()
    .rename("HighImpactCount")
    .reset_index()
)
df = pd.merge(df, high_impact_counts, how='left', on=['Currency', 'EventDate'])
df['HighImpactCount'] = df['HighImpactCount'].fillna(0)

# Lag features
df = df.sort_values(['Currency', 'Event', 'DateTime'])
df['Actual_lag1'] = df.groupby(['Currency', 'Event'])['Actual'].shift(1)
df['Previous_lag1'] = df.groupby(['Currency', 'Event'])['Previous'].shift(1)
df['Actual_lag1'] = df['Actual_lag1'].fillna(method='bfill').fillna(method='ffill')
df['Previous_lag1'] = df['Previous_lag1'].fillna(method='bfill').fillna(method='ffill')

grouped = df.groupby(['Currency', 'Event'])

all_train_samples = []
all_test_samples = []
event_norm_params = {}
latest_month_data = {}

print("\n=== Preparing sequences per event ===")

def normalize_feature(arr):
    mn = arr.min()
    mx = arr.max()
    rng = mx - mn if mx != mn else 1
    return (arr - mn) / rng, mn, rng

for key, group in grouped:
    group = group.sort_values('DateTime')
    if len(group) < seq_length + 1:
        continue

    actuals = group['Actual'].values
    previous = group['Previous'].values
    impacts = group['Impact'].astype(float).values
    hicount = group['HighImpactCount'].astype(float).values
    actual_lag1 = group['Actual_lag1'].values
    prev_lag1 = group['Previous_lag1'].values
    months = group['EventMonth'].values

    norm_actuals, min_act, range_act = normalize_feature(actuals)
    norm_previous, min_prev, range_prev = normalize_feature(previous)
    norm_impacts, min_imp, range_imp = normalize_feature(impacts)
    norm_hic, min_hic, range_hic = normalize_feature(hicount)
    norm_al1, min_al1, range_al1 = normalize_feature(actual_lag1)
    norm_pl1, min_pl1, range_pl1 = normalize_feature(prev_lag1)

    event_norm_params[key] = {
        'actual': (min_act, range_act),
        'previous': (min_prev, range_prev),
        'impact': (min_imp, range_imp),
        'high_impact_count': (min_hic, range_hic),
        'actual_lag1': (min_al1, range_al1),
        'previous_lag1': (min_pl1, range_pl1),
    }

    features = np.stack([
        norm_actuals, norm_previous, norm_impacts,
        norm_hic, norm_al1, norm_pl1
    ], axis=1)

    targets = norm_actuals
    split_idx = int(len(features) * 0.7)

    for i in range(split_idx - seq_length):
        x = features[i:i+seq_length]
        y = targets[i+seq_length]
        all_train_samples.append((x, y, key))

    for i in range(split_idx, len(features) - seq_length):
        x = features[i:i+seq_length]
        y = targets[i+seq_length]
        all_test_samples.append((x, y, key))

    latest_month_data[key] = {
        'seq': features[-seq_length:],
        'has_current_month': current_month in months
    }

print(f"Total train samples: {len(all_train_samples)}")
print(f"Total test samples:  {len(all_test_samples)}")

# === Dataset + DataLoader ===
class EventDataset(Dataset):
    def __init__(self, samples):
        self.samples = samples

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        x, y, key = self.samples[idx]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32), key

def custom_collate_fn(batch):
    xs, ys, keys = zip(*batch)
    return torch.stack(xs), torch.tensor(ys), list(keys)

train_dl = DataLoader(EventDataset(all_train_samples), batch_size=batch_size, shuffle=True, collate_fn=custom_collate_fn)
test_dl = DataLoader(EventDataset(all_test_samples), batch_size=batch_size, shuffle=False, collate_fn=custom_collate_fn)

# === Model ===
class RNNModel(nn.Module):
    def __init__(self, input_size=6, hidden_size=64, num_layers=2, dropout=0.2):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, dropout=dropout, batch_first=True)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.dropout(out[:, -1])
        return self.fc(out)

model = RNNModel().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
loss_fn = nn.MSELoss()
scaler = GradScaler()

# === Training ===
print("\n=== Training Starts ===")
model.train()
for epoch in range(epochs):
    total_loss = 0
    for xb, yb, _ in train_dl:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        with autocast():
            pred = model(xb).squeeze()
            loss = loss_fn(pred, yb)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item()
    print(f"Epoch {epoch+1:02}/{epochs} - Loss: {total_loss / len(train_dl):.6f}")

# === Evaluation ===
print("\n=== Evaluation Per Event ===")
model.eval()
predictions = defaultdict(list)
actuals = defaultdict(list)

with torch.no_grad():
    for xb, yb, keys in test_dl:
        xb, yb = xb.to(device), yb.to(device)
        with autocast():
            preds = model(xb).squeeze()
        preds = preds.cpu().numpy()
        yb = yb.cpu().numpy()
        for i, key in enumerate(keys):
            predictions[key].append(preds[i])
            actuals[key].append(yb[i])

# === Metrics ===
event_stats = []
print(f"\n{'Currency':<10} | {'Event':<35} | {'R²':>6} | {'MSE':>10} | {'Samples':>8}")
print("-" * 80)

for key in sorted(predictions.keys()):
    if key not in event_norm_params:
        continue

    preds = predictions[key]
    trues = actuals[key]
    n = len(preds)

    if n < 5:
        continue

    min_act, range_act = event_norm_params[key]['actual']
    denorm_preds = [p * range_act + min_act for p in preds]
    denorm_trues = [a * range_act + min_act for a in trues]

    r2 = r2_score(denorm_trues, denorm_preds)
    mse = mean_squared_error(denorm_trues, denorm_preds)

    event_stats.append((key[0], key[1], r2, mse, n))
    print(f"{key[0]:<10} | {key[1][:35]:<35} | {r2:6.4f} | {mse:10.4f} | {n:8d}")

# === Save Output ===
output_dir = os.path.dirname(input_path)
summary_path = os.path.join(output_dir, "event_rnn_evaluation_summary.csv")
forecast_path = os.path.join(output_dir, "event_rnn_month_forecasts.csv")
model_path = os.path.join(output_dir, "event_rnn_model.pth")

summary_df = pd.DataFrame(event_stats, columns=["Currency", "Event", "R2", "MSE", "Samples"])
summary_df.to_csv(summary_path, index=False)
print(f"\n✅ Accuracy summary saved to: {summary_path}")

# === Forecast ===
print("\n=== Forecast for This/Next Month ===")
forecast_rows = []

for key, data in latest_month_data.items():
    seq = data['seq']
    month_target = "THIS" if not data['has_current_month'] else "NEXT"
    input_tensor = torch.tensor(seq, dtype=torch.float32).unsqueeze(0).to(device)
    with torch.no_grad(), autocast():
        pred = model(input_tensor).item()
    min_act, range_act = event_norm_params[key]['actual']
    denorm_pred = pred * range_act + min_act
    forecast_rows.append([key[0], key[1], month_target, denorm_pred])
    print(f"{key[0]} - {key[1][:35]:<35}: 🔮 Forecast for {month_target} month = {denorm_pred:.2f}")

forecast_df = pd.DataFrame(forecast_rows, columns=["Currency", "Event", "ForecastMonth", "ForecastValue"])
forecast_df.to_csv(forecast_path, index=False)
print(f"✅ Forecasts saved to: {forecast_path}")

# === Save Model ===
torch.save(model.state_dict(), model_path)
print(f"✅ Model weights saved to: {model_path}")


Using device: cpu


  df['Actual_lag1'] = df['Actual_lag1'].fillna(method='bfill').fillna(method='ffill')
  df['Previous_lag1'] = df['Previous_lag1'].fillna(method='bfill').fillna(method='ffill')



=== Preparing sequences per event ===
Total train samples: 52553
Total test samples:  21779


  scaler = GradScaler()
  with autocast():



=== Training Starts ===
Epoch 01/30 - Loss: 0.017635
Epoch 02/30 - Loss: 0.012518
Epoch 03/30 - Loss: 0.011722
Epoch 04/30 - Loss: 0.011274
Epoch 05/30 - Loss: 0.011076
Epoch 06/30 - Loss: 0.010879
Epoch 07/30 - Loss: 0.010867
Epoch 08/30 - Loss: 0.010725
Epoch 09/30 - Loss: 0.010689
Epoch 10/30 - Loss: 0.010706
Epoch 11/30 - Loss: 0.010665
Epoch 12/30 - Loss: 0.010604
Epoch 13/30 - Loss: 0.010617
Epoch 14/30 - Loss: 0.010588
Epoch 15/30 - Loss: 0.010692
Epoch 16/30 - Loss: 0.010574
Epoch 17/30 - Loss: 0.010550
Epoch 18/30 - Loss: 0.010526
Epoch 19/30 - Loss: 0.010534
Epoch 20/30 - Loss: 0.010521
Epoch 21/30 - Loss: 0.010526
Epoch 22/30 - Loss: 0.010508
Epoch 23/30 - Loss: 0.010544
Epoch 24/30 - Loss: 0.010549
Epoch 25/30 - Loss: 0.010549
Epoch 26/30 - Loss: 0.010535
Epoch 27/30 - Loss: 0.010548
Epoch 28/30 - Loss: 0.010473
Epoch 29/30 - Loss: 0.010539
Epoch 30/30 - Loss: 0.010566

=== Evaluation Per Event ===


  with autocast():



Currency   | Event                               |     R² |        MSE |  Samples
--------------------------------------------------------------------------------
EUR        | 10-Year BTP Auction                 | 0.3845 | 285517240278.0360 |       37
EUR        | 10-Year Bond Auction                | -0.4041 | 65963759566.2693 |       48
EUR        | 10-Year OAT Auction                 | 0.2031 | 177714576507.7480 |       36
EUR        | 10-Year Obligacion Auction          | 0.8881 | 173541369617.8269 |       45
EUR        | 12-Month BOT Auction                | 0.7340 | 289986109284.6966 |       39
EUR        | 12-Month BTF Auction                | 0.9647 | 54786952329.6801 |      171
EUR        | 12-Month Letras Auction             | 0.7145 | 262891050560.3381 |       38
EUR        | 15-Year Obligacion Auction          | -26.8796 | 1346191677612.7664 |        8
EUR        | 2-Year CTZ Auction                  | -0.3286 | 435241070072.0900 |       21
EUR        | 2-Year Note Auction

  with torch.no_grad(), autocast():


EUR - 10-Year BTP Auction                : 🔮 Forecast for THIS month = 3459141.18
EUR - 10-Year Bond Auction               : 🔮 Forecast for THIS month = 2361680.30
EUR - 10-Year OAT Auction                : 🔮 Forecast for THIS month = 3074328.04
EUR - 10-Year Obligacion Auction         : 🔮 Forecast for THIS month = 3186863.76
EUR - 12-Month BOT Auction               : 🔮 Forecast for THIS month = 1998498.41
EUR - 12-Month BTF Auction               : 🔮 Forecast for THIS month = 1841767.77
EUR - 12-Month Letras Auction            : 🔮 Forecast for THIS month = 1869169.17
EUR - 15-Year Obligacion Auction         : 🔮 Forecast for THIS month = 3450767.84
EUR - 2-Year CTZ Auction                 : 🔮 Forecast for THIS month = 2106620.24
EUR - 2-Year Note Auction                : 🔮 Forecast for THIS month = 1741064.05
EUR - 3-Month BTF Auction                : 🔮 Forecast for THIS month = 1869240.35
EUR - 3-Month Letras Auction             : 🔮 Forecast for THIS month = 1939629.28
EUR - 3-Year BTP