In [None]:
#load data

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque, namedtuple
import random
import matplotlib.pyplot as plt

class DataBasedEconomyEnv:
    def __init__(self, data_path, lookback_periods=2):
        """
        Environment using real economic data
        
        Args:
            data_path: Path to CSV file with columns ['date', 'inflation', 'output_gap', 'interest_rate']
            lookback_periods: Number of previous periods to include in state
        """
        # Load and preprocess data
        self.data = pd.read_csv(data_path)
        self.data['date'] = pd.to_datetime(self.data['date'])
        self.data = self.data.sort_values('date').reset_index(drop=True)
        
        # Validate data columns
        required_cols = ['inflation', 'output_gap', 'interest_rate']
        if not all(col in self.data.columns for col in required_cols):
            raise ValueError(f"Data must contain columns: {required_cols}")
        
        self.lookback_periods = lookback_periods
        self.current_idx = lookback_periods
        self.max_idx = len(self.data) - 1
        
        # Target values (can be modified based on policy goals)
        self.inflation_target = 2.0
        self.output_gap_target = 0.0
        
        # Store data statistics for normalization
        self.data_stats = {
            'inflation_mean': self.data['inflation'].mean(),
            'inflation_std': self.data['inflation'].std(),
            'output_gap_mean': self.data['output_gap'].mean(),
            'output_gap_std': self.data['output_gap'].std(),
            'interest_rate_mean': self.data['interest_rate'].mean(),
            'interest_rate_std': self.data['interest_rate'].std()
        }
    
    def normalize_data(self, data, variable):
        """Normalize data using stored statistics"""
        return (data - self.data_stats[f'{variable}_mean']) / self.data_stats[f'{variable}_std']
    
    def denormalize_data(self, data, variable):
        """Denormalize data using stored statistics"""
        return data * self.data_stats[f'{variable}_std'] + self.data_stats[f'{variable}_mean']
    
    def get_state(self):
        """Get current state including lookback periods"""
        start_idx = self.current_idx - self.lookback_periods
        end_idx = self.current_idx + 1
        
        state_data = {
            'inflation': self.data['inflation'].iloc[start_idx:end_idx].values,
            'output_gap': self.data['output_gap'].iloc[start_idx:end_idx].values,
            'interest_rate': self.data['interest_rate'].iloc[start_idx:end_idx-1].values
        }
        
        # Normalize data
        normalized_state = []
        for var in ['inflation', 'output_gap']:
            normalized_state.extend(self.normalize_data(state_data[var], var))
        for var in ['interest_rate']:
            normalized_state.extend(self.normalize_data(state_data[var], var))
            
        return np.array(normalized_state)
    
    def reset(self):
        """Reset to start of data (after lookback periods)"""
        self.current_idx = self.lookback_periods
        return self.get_state()
    
    def compute_reward(self, inflation, output_gap):
        """Compute reward based on paper's specification"""
        omega_pi = omega_y = 0.5  # Equal weights as per paper
        
        # Basic quadratic loss
        inflation_loss = -omega_pi * (inflation - self.inflation_target)**2
        output_gap_loss = -omega_y * output_gap**2
        reward = inflation_loss + output_gap_loss
        
        # Additional penalty for large deviations
        if abs(inflation - self.inflation_target) > 2:
            reward *= 0.1
        if abs(output_gap) > 2:
            reward *= 0.1
            
        return reward
    
    def step(self, action):
        """
        Execute one step using historical data
        
        Args:
            action: Normalized interest rate decision
            
        Returns:
            next_state: Next state
            reward: Reward value
            done: Whether episode is finished
            info: Additional information
        """
        if self.current_idx >= self.max_idx:
            return self.get_state(), 0, True, {}
        
        # Denormalize action (interest rate decision)
        actual_action = self.denormalize_data(action, 'interest_rate')
        
        # Move to next time step
        self.current_idx += 1
        
        # Get next state
        next_state = self.get_state()
        
        # Get actual values for reward computation
        current_inflation = self.data['inflation'].iloc[self.current_idx]
        current_output_gap = self.data['output_gap'].iloc[self.current_idx]
        
        # Compute reward
        reward = self.compute_reward(current_inflation, current_output_gap)
        
        # Check if episode is done
        done = self.current_idx >= self.max_idx
        
        # Additional info for monitoring
        info = {
            'actual_inflation': current_inflation,
            'actual_output_gap': current_output_gap,
            'actual_interest_rate': actual_action,
            'date': self.data['date'].iloc[self.current_idx]
        }
        
        return next_state, reward, done, info

def main():
    # Example usage with dataset
    env = DataBasedEconomyEnv(
        data_path='economic_data.csv',
        lookback_periods=2
    )
    
    # Get state dimension from environment
    state_dim = len(env.reset())
    action_dim = 1
    
    # Initialize agent
    agent = DDPGAgent(
        state_dim=state_dim,
        action_dim=action_dim,
        hidden_dim=64,
        buffer_size=10000,
        batch_size=64,
        gamma=0.99,
        tau=0.001
    )
    
    # Training parameters
    num_episodes = 500
    noise = OUNoise(action_dim)
    
    # Lists to store results
    episode_rewards = []
    inflation_history = []
    output_gap_history = []
    interest_rate_history = []
    
    # Training loop
    for episode in range(num_episodes):
        state = env.reset()
        noise.reset()
        episode_reward = 0
        done = False
        
        while not done:
            # Select action with exploration noise
            action = agent.select_action(state)
            action = action + noise.sample()
            
            # Execute action
            next_state, reward, done, info = env.step(action)
            
            # Store experience and train
            agent.store_experience(state, action, reward, next_state)
            agent.train()
            
            # Update state and accumulate reward
            state = next_state
            episode_reward += reward
            
            # Store history
            inflation_history.append(info['actual_inflation'])
            output_gap_history.append(info['actual_output_gap'])
            interest_rate_history.append(info['actual_interest_rate'])
        
        episode_rewards.append(episode_reward)
        
        # Print progress
        if (episode + 1) % 10 == 0:
            print(f"Episode {episode + 1}/{num_episodes}, Reward: {episode_reward:.2f}")
    
    # Plot results
    plot_training_results(
        episode_rewards,
        inflation_history,
        output_gap_history,
        interest_rate_history
    )
    
    return agent, env

if __name__ == "__main__":
    trained_agent, trained_env = main()

## Data Scrapping

### 1. Output Gap Data extraction

First I got the quarterly GDP for the period [Office for National Statistics]:
https://www.ons.gov.uk/economy/grossdomesticproductgdp/timeseries/ybha/qna

I got the yearly output gap [Office for Budget Responsibility (OBR)]
https://obr.uk/public-finances-databank-2024-25/

Using the quarterly estimates developed [OBR: Output gap measurement: judgement and uncertainty] I replicated the shape of the quarterly output gaps in %.
https://obr.uk/docs/dlm_uploads/WorkingPaperNo5.pdf

In [6]:
import pandas as pd
import datetime as dt

# Use the raw URL from the GitHub repository
xlsx_url = "https://raw.githubusercontent.com/guri99uy/ST449_Project/b88d036db466e3c752b7118b38301203ee4ac6b5/outputgap.xlsx"

# Load the Excel file
df_outputgap = pd.read_excel(xlsx_url, engine='openpyxl')  # Ensure you specify the 'openpyxl' engine for .xlsx files

# Define a function to parse QQYYYY
def parse_qqyyyy(qqyyyy):
    # Extract the quarter and year
    quarter = int(qqyyyy[1])
    year = int(qqyyyy[2:])
    
    # Map the quarter to the first month of that quarter
    quarter_start_month = {1: 1, 2: 4, 3: 7, 4: 10}
    month = quarter_start_month[quarter]
    
    # Create a datetime object for the first day of the quarter
    return dt.datetime(year, month, 1)

# Apply the function to the first column 'QQYYYY' to convert it to datetime
df_outputgap['QQYYYY'] = df_outputgap['QQYYYY'].apply(parse_qqyyyy)

# Display the first few rows of the transformed DataFrame
print(df_outputgap.head())


      QQYYYY  GDP_Real (m£)   GDP_Pot (m£)  Output_gap (%)
0 1997-01-01         232749  235518.713100        1.190000
1 1997-04-01         235998  239467.170600        1.470000
2 1997-07-01         243388  246211.300800        1.160000
3 1997-10-01         241270  243738.981873        1.023327
4 1998-01-01         245958  248319.196800        0.960000


### 2. Interest Rate
Got .xlsx file from [Bank of Engalnd]
https://www.bankofengland.co.uk/boeapps/database/Bank-Rate.asp
