# Environment Building for Earnings Event-Driven RL Trading

## Overview
Complete RL environment with proper State, Action, Reward definitions

## Step 1: Import Libraries

In [None]:
import pandas as pd
import numpy as np
import gymnasium as gym
from gymnasium import spaces
import warnings
warnings.filterwarnings('ignore')

print('✓ Libraries imported successfully')

✓ Libraries imported successfully


## Step 2: Load CSV Data

In [5]:
CSV_PATH = 'earnings_events_data.csv'
df = pd.read_csv(CSV_PATH)
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['earnings_date'] = pd.to_datetime(df['earnings_date'])

print(f'✓ Loaded {len(df):,} rows from {CSV_PATH}')
print(f'  - Total Events: {df["event_id"].nunique()}')
print(f'  - Date Range: {df["timestamp"].min()} to {df["timestamp"].max()}')

✓ Loaded 14,815 rows from earnings_events_data.csv
  - Total Events: 138
  - Date Range: 2019-01-25 00:00:00+00:00 to 2024-11-05 23:00:00+00:00


## Step 3: Define State, Action, and Reward

### ENVIRONMENT SPECIFICATION - Complete Definition Block

In [6]:
print('''
╔════════════════════════════════════════════════════════════════════════════════╗
║                    STATE SPACE DEFINITION                                       ║
╚════════════════════════════════════════════════════════════════════════════════╝

State is a 7-dimensional vector:

  state = [momentum, volatility, pre_close, current_price, position, cash, window_pnl]
           [   0   ,      1     ,     2    ,       3     ,    4    ,  5  ,   6   ]

• state[0] - momentum: Pre-earnings 3-day momentum (float)
• state[1] - volatility: Pre-earnings 3-day volatility (float)
• state[2] - pre_close: Closing price before earnings ($)
• state[3] - current_price: Current price at this step ($)
• state[4] - position: Trading position (0=flat, 1=long)
• state[5] - cash: Available cash ($)
• state[6] - window_pnl: Event Window PnL (%)

Observation Space: Box(shape=(7,), dtype=float32)
''')

print('''
╔════════════════════════════════════════════════════════════════════════════════╗
║                    ACTION SPACE DEFINITION                                      ║
╚════════════════════════════════════════════════════════════════════════════════╝

Action is a discrete choice among 3 actions:

  action = 0: HOLD  - Do nothing (always available)
  action = 1: BUY   - Enter long position (only if flat)
  action = 2: SELL  - Exit long position (only if long)

Action Space: Discrete(3)
Transaction Cost: 0.05% (0.0005)
''')

print('''
╔════════════════════════════════════════════════════════════════════════════════╗
║                    REWARD DEFINITION                                            ║
╚════════════════════════════════════════════════════════════════════════════════╝

Reward is based on EVENT WINDOW PnL (not individual trades):

• During episode (intermediate steps):
  reward = 0

• At episode end (done=True):
  reward = (final_portfolio_value - initial_cash) / initial_cash
  This is the total Event Window PnL

Reward Range: [-0.3, 0.3] (typical)

Example:
  • Buy at 150, Sell at 153 in event window
  • After transaction costs: ~1.8% profit
  • Final reward: 0.018 (at episode end)
''')


╔════════════════════════════════════════════════════════════════════════════════╗
║                    STATE SPACE DEFINITION                                       ║
╚════════════════════════════════════════════════════════════════════════════════╝

State is a 7-dimensional vector:

  state = [momentum, volatility, pre_close, current_price, position, cash, window_pnl]
           [   0   ,      1     ,     2    ,       3     ,    4    ,  5  ,   6   ]

• state[0] - momentum: Pre-earnings 3-day momentum (float)
• state[1] - volatility: Pre-earnings 3-day volatility (float)
• state[2] - pre_close: Closing price before earnings ($)
• state[3] - current_price: Current price at this step ($)
• state[4] - position: Trading position (0=flat, 1=long)
• state[5] - cash: Available cash ($)
• state[6] - window_pnl: Event Window PnL (%)

Observation Space: Box(shape=(7,), dtype=float32)


╔════════════════════════════════════════════════════════════════════════════════╗
║                    ACTION

## Step 4: Define EarningsEventEnv Class

In [7]:
class EarningsEventEnv(gym.Env):
    '''Trading environment for earnings event-driven strategy.'''
    
    def __init__(self, event_data, transaction_cost=0.0005, initial_cash=10000):
        super(EarningsEventEnv, self).__init__()
        
        self.event_data = event_data.sort_values('timestamp').reset_index(drop=True)
        self.transaction_cost = transaction_cost
        self.initial_cash = initial_cash
        
        # Extract metadata
        self.ticker = event_data['ticker_event'].iloc[0]
        self.earnings_date = event_data['earnings_date'].iloc[0]
        self.event_id = event_data['event_id'].iloc[0]
        self.momentum = event_data['momentum'].iloc[0]
        self.volatility = event_data['volatility'].iloc[0]
        self.pre_close = event_data['pre_close'].iloc[0]
        
        # Define spaces
        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(7,), dtype=np.float32)
        
        # Initialize state
        self.current_step = 0
        self.position = 0
        self.cash = initial_cash
        self.trades = []
    
    def reset(self):
        self.current_step = 0
        self.position = 0
        self.cash = self.initial_cash
        self.trades = []
        return self._get_observation()
    
    def _get_observation(self):
        if self.current_step >= len(self.event_data):
            current_price = self.event_data['close'].iloc[-1]
        else:
            current_price = self.event_data['close'].iloc[self.current_step]
        
        portfolio_value = self.cash + (self.position * current_price)
        window_pnl = (portfolio_value - self.initial_cash) / self.initial_cash
        
        state = np.array([
            self.momentum,
            self.volatility,
            self.pre_close,
            current_price,
            float(self.position),
            self.cash,
            window_pnl
        ], dtype=np.float32)
        return state
    
    def step(self, action):
        current_price = self.event_data['close'].iloc[self.current_step]
        reward = 0
        
        if action == 1 and self.position == 0:
            cost = current_price * (1 + self.transaction_cost)
            if self.cash >= cost:
                self.position = 1
                self.cash -= cost
                self.trades.append({'action': 'buy', 'price': current_price})
        
        elif action == 2 and self.position == 1:
            proceeds = current_price * (1 - self.transaction_cost)
            self.cash += proceeds
            self.position = 0
            self.trades.append({'action': 'sell', 'price': current_price})
        
        self.current_step += 1
        done = (self.current_step >= len(self.event_data))
        
        if done and self.position == 1:
            final_price = self.event_data['close'].iloc[-1]
            self.cash += final_price * (1 - self.transaction_cost)
            self.position = 0
        
        if done:
            final_value = self.cash
            reward = (final_value - self.initial_cash) / self.initial_cash
        
        observation = self._get_observation()
        return observation, reward, done, {}
    
    def get_window_pnl(self):
        portfolio = self.cash + (self.position * self.event_data['close'].iloc[-1])
        return (portfolio - self.initial_cash) / self.initial_cash

print('✓ EarningsEventEnv class defined successfully')

✓ EarningsEventEnv class defined successfully


## Step 5: Create Environments

In [9]:
# Extract unique events
events = df.groupby('event_id').first().reset_index()
train_events = events[events['split'] == 'train']
test_events = events[events['split'] == 'test']

print(f'Total Events: {len(events)}')
print(f'Train Events: {len(train_events)}')
print(f'Test Events: {len(test_events)}')

# Create environments
train_envs = {}
for event_id in train_events['event_id']:
    event_data = df[df['event_id'] == event_id]
    train_envs[event_id] = EarningsEventEnv(event_data)

test_envs = {}
for event_id in test_events['event_id']:
    event_data = df[df['event_id'] == event_id]
    test_envs[event_id] = EarningsEventEnv(event_data)

print(f'\n✓ Created {len(train_envs)} training + {len(test_envs)} test environments')

Total Events: 138
Train Events: 110
Test Events: 28

✓ Created 110 training + 28 test environments


## Step 6: Test Environment

In [10]:
env = train_envs[list(train_envs.keys())[0]]
state = env.reset()

print(f'Initial State: {state}')
print(f'State Shape: {state.shape}')
print(f'State Type: {state.dtype}')

# Run a few steps
done = False
step = 0
while not done and step < 10:
    action = np.random.choice([0, 1, 2])
    state, reward, done, _ = env.step(action)
    step += 1

print(f'\nAfter {step} steps:')
print(f'  Reward (intermediate): {reward}')
print(f'  Position: {env.position}')
print(f'  Cash: ${env.cash:.2f}')
print(f'  Window PnL: {env.get_window_pnl():.4f}')

Initial State: [1.15539384e-04 5.19670604e-04 1.73520004e+02 1.68850006e+02
 0.00000000e+00 1.00000000e+04 0.00000000e+00]
State Shape: (7,)
State Type: float32

After 10 steps:
  Reward (intermediate): 0
  Position: 1
  Cash: $9829.69
  Window PnL: 0.0013
