In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from collections import deque
import random
import yfinance as yf

class DuelingDDQN(nn.Module):
    """Dueling DDQN for VaR prediction - 3D market state only"""
    def __init__(self, state_dim=3, action_dim=9):  # 9 quantiles: 1-99%
        super().__init__()
        self.embedding = nn.Sequential(
            nn.Linear(state_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU()
        )
        # Dueling streams
        self.value_stream = nn.Sequential(
            nn.Linear(64, 32), nn.ReLU(),
            nn.Linear(32, 1)
        )
        self.advantage_stream = nn.Sequential(
            nn.Linear(64, 32), nn.ReLU(),
            nn.Linear(32, action_dim)
        )

    def forward(self, x):
        feat = self.embedding(x)
        value = self.value_stream(feat)
        advantage = self.advantage_stream(feat)
        return value + (advantage - advantage.mean(dim=1, keepdim=True))

class DDQNVarAgent:
    def __init__(self, state_dim=3, action_dim=9):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.q_net = DuelingDDQN(state_dim, action_dim)
        self.target_net = DuelingDDQN(state_dim, action_dim)
        self.target_net.load_state_dict(self.q_net.state_dict())
        self.optimizer = optim.Adam(self.q_net.parameters(), lr=0.001)
        self.memory = deque(maxlen=5000)
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.quantile_levels = np.array([0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99])

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.random() <= self.epsilon:
            return random.randrange(self.action_dim)
        state = torch.FloatTensor(state).unsqueeze(0)
        with torch.no_grad():
            q_values = self.q_net(state)
        return q_values.argmax(1).item()

    def replay(self, batch_size=32):
        if len(self.memory) < batch_size: return

        minibatch = random.sample(self.memory, batch_size)
        states = torch.FloatTensor(np.array([e[0] for e in minibatch]))
        actions = torch.LongTensor([e[1] for e in minibatch])
        rewards = torch.FloatTensor([e[2] for e in minibatch])
        next_states = torch.FloatTensor(np.array([e[3] for e in minibatch]))
        dones = torch.BoolTensor([e[4] for e in minibatch])

        current_q = self.q_net(states).gather(1, actions.unsqueeze(1))
        next_q = self.target_net(next_states).max(1)[0].detach()
        target_q = rewards + (0.99 * next_q * ~dones)

        loss = nn.MSELoss()(current_q.squeeze(), target_q)
        self.optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.q_net.parameters(), 1.0)
        self.optimizer.step()

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def update_target(self):
        self.target_net.load_state_dict(self.q_net.state_dict())

    def predict_var(self, state, confidence=0.95):
        self.q_net.eval()
        with torch.no_grad():
            state_t = torch.FloatTensor(state).unsqueeze(0)
            qvals = self.q_net(state_t)
            idx = np.argmin(np.abs(self.quantile_levels - confidence))
            var_pred = abs(qvals[0, idx].item())
        self.q_net.train()
        return var_pred

# 3D Feature Engineering (No News)
def create_market_features():
    print("Fetching Nifty-50 & VIX...")
    nifty = yf.download('^NSEI', start='2020-01-01', end='2026-01-27', progress=False)
    vix = yf.download('^INDIAVIX', start='2020-01-01', end='2026-01-27', progress=False)

    df = pd.concat([nifty['Close'], vix['Close']], axis=1, keys=['nifty_close', 'vix'])
    df = df.bfill()

    # 3D features exactly as Nifty50 report
    df['returns'] = df['nifty_close'].pct_change()
    df['log_price_norm'] = np.log(df['nifty_close']) - np.log(df['nifty_close']).median()
    df['vix_norm'] = (df['vix'] - df['vix'].mean()) / df['vix'].std()

    # Stack into 3D state vectors
    features = np.column_stack([
        df['returns'].values[1:],      # lag returns
        df['vix_norm'].values[1:],     # VIX z-score
        df['log_price_norm'].values[1:] # normalized price
    ])

    # True VaR labels (rolling 252-day 5th percentile)
    true_var = []
    for i in range(252, len(df)):
        window = df['returns'].iloc[i-252:i].dropna()
        true_var.append(abs(np.percentile(window, 5)))

    return features[251:-1], np.array(true_var), df.index[252:]

# Training (10000 episodes as per report)
def train_ddqn_market_only():
    features, true_vars, dates = create_market_features()

    split = int(0.8 * len(features))
    train_features, test_features = features[:split], features[split:]
    train_vars, test_vars = true_vars[:split], true_vars[split:]

    agent = DDQNVarAgent(state_dim=3, action_dim=9)

    print("Training DDQN (Market-only 3D)...")
    for episode in range(10000):
        episode_reward = 0

        # Single episode = full training sequence
        for i in range(len(train_features)):
            state = train_features[i]
            true_var = train_vars[i]

            action = agent.act(state)
            qvals = agent.q_net(torch.FloatTensor(state).unsqueeze(0))
            pred_var = abs(qvals[0, action].item())

            # Reward: negative MAPE
            reward = -abs((true_var - pred_var) / true_var)
            next_state = train_features[min(i+1, len(train_features)-1)]
            done = (i == len(train_features) - 1)

            agent.remember(state, action, reward, next_state, done)
            episode_reward += reward
            agent.replay()

        if episode % 1000 == 0:
            agent.update_target()
            avg_reward = episode_reward / len(train_features)
            print(f"Episode {episode}, Avg Reward: {avg_reward:.4f}, ε: {agent.epsilon:.3f}")

    return agent, test_features, test_vars, dates[split:]

# Run & Evaluate
agent, test_features, test_vars, test_dates = train_ddqn_market_only()

print("\n=== TEST RESULTS (95% VaR) ===")
mape = 0
for i, (state, true_var) in enumerate(zip(test_features[:20], test_vars[:20])):
    pred_var = agent.predict_var(state, confidence=0.95)
    error_pct = abs((true_var - pred_var) / true_var) * 100
    mape += error_pct
    print(f"{test_dates[i].date()}: True={true_var:.4f}, Pred={pred_var:.4f}, Error={error_pct:.1f}%")

print(f"\nDDQN(3D Market) MAPE: {mape/len(test_vars):.1f}%")
print("Expected: ~17.8% (report benchmark with news)")


Fetching Nifty-50 & VIX...
Training DDQN (Market-only 3D)...
Episode 0, Avg Reward: -651648.6319, ε: 0.010
Episode 100, Avg Reward: -256211259252.1356, ε: 0.010
Episode 200, Avg Reward: -258222036681.0588, ε: 0.010
Episode 300, Avg Reward: -263933164710.7338, ε: 0.010
Episode 400, Avg Reward: -266046926391.7320, ε: 0.010
Episode 500, Avg Reward: -266046932808.3934, ε: 0.010
Episode 600, Avg Reward: -270248823744.6336, ε: 0.010
Episode 700, Avg Reward: -270248549014.8958, ε: 0.010


KeyboardInterrupt: 