# RL tabular MM analysis
Set parameters, optionally run `mm_compare` with `--mm-type rl_tabular`, then plot PnL, rewards, actions, inventory, and spread.

In [None]:
import subprocess
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

pd.set_option('display.max_rows', 50)
pd.set_option('display.max_colwidth', None)

# ---- parameters ----
run_simulation = False  # set True to launch a run from the notebook
seed = 1
ticker = 'AAPL'
historical_date = '20000101'
start_time = '09:30:00'
end_time = '11:30:00'
mm_type = 'rl_tabular'
log_name = f'rl_tabular_seed_{seed}'
repo_root = Path('..')
log_dir = repo_root / 'log' / log_name
# --------------------

print('Log dir:', log_dir.resolve())

## Run simulation (optional)

In [None]:
if run_simulation:
    cmd = [
        'python', 'abides.py',
        '-c', 'mm_compare',
        '-t', ticker,
        '-d', historical_date,
        '--start-time', start_time,
        '--end-time', end_time,
        '--seed', str(seed),
        '--log_dir', log_name,
        '--mm-type', mm_type,
    ]
    print('Running:', ' '.join(cmd))
    result = subprocess.run(cmd, cwd=repo_root)
    print('Return code:', result.returncode)
else:
    print('Skipping simulation run; set run_simulation=True to execute.')

## Load logs

In [None]:
summary_path = log_dir / 'summary_log.bz2'
orderbook_file = next(log_dir.glob('ORDERBOOK_*_FULL.bz2'), None)
if not summary_path.exists():
    raise FileNotFoundError(summary_path)

summary = pd.read_pickle(summary_path)
summary.head()

### Summary PnL by strategy

In [None]:
wide = summary.pivot_table(index=['AgentID', 'AgentStrategy'], columns='EventType', values='Event', aggfunc='first')
wide['PNL'] = wide['ENDING_CASH'] - wide['STARTING_CASH']
wide.reset_index().sort_values('PNL', ascending=False).head(10)

## RL tabular agent log

In [None]:
rl_files = list(log_dir.glob('RL_TABULAR_MARKET_MAKER_AGENT_*.bz2'))
if not rl_files:
    raise FileNotFoundError('No RL_TABULAR_MARKET_MAKER_AGENT logs found')
rl_log = pd.read_pickle(rl_files[0])
print('RL log columns:', list(rl_log.columns))
rl_log.head()

### Inventory, cash, MTM, spread, actions

In [None]:
state_rows = rl_log[rl_log['EventType'] == 'STATE']
if not state_rows.empty:
    state_df = pd.DataFrame(list(state_rows['Event']))
    state_df = state_df.set_index('time')
    state_df.index = pd.to_datetime(state_df.index)
    state_df[['inventory']].plot(figsize=(10,3), title='RL inventory', legend=True)
    plt.tight_layout()
    state_df[['cash']].plot(figsize=(10,3), title='RL cash', legend=True)
    plt.tight_layout()
    state_df[['mtm']].plot(figsize=(10,3), title='RL mark-to-market PnL', legend=True)
    plt.tight_layout()
    state_df[['spread']].plot(figsize=(10,3), title='Spread observed by RL', legend=True)
    plt.tight_layout()
    state_df['last_action'].plot(kind='hist', bins=10, figsize=(6,3), title='Action histogram', label='last_action')
    plt.legend()
    plt.tight_layout()
else:
    print('No STATE events found in RL log')

### Rewards and PnL per step

In [None]:
if not state_rows.empty:
    state_df['reward'].plot(figsize=(10,3), title='Step reward', label='reward')
    plt.legend()
    plt.tight_layout()
    state_df['reward'].ewm(span=100).mean().plot(figsize=(10,3), title='Reward EWMA (span=100)', label='reward_ewma')
    plt.legend()
    plt.tight_layout()
    state_df['cum_reward'].plot(figsize=(10,3), title='Cumulative reward', label='cum_reward')
    plt.legend()
    plt.tight_layout()
    # Approx per-step PnL from mtm diffs
    state_df['mtm_diff'] = state_df['mtm'].diff()
    state_df['mtm_diff'].plot(figsize=(10,3), title='Per-step MTM change', label='mtm_diff')
    plt.legend()
    plt.tight_layout()
else:
    print('No STATE events found in RL log')

### Actions over time and Q proxy

In [None]:
if not state_rows.empty:
    action_share = state_df['last_action'].resample('5T').apply(lambda s: s.value_counts()).unstack(fill_value=0)
    action_share.plot.area(figsize=(10,3), title='Action counts per 5 min')
    plt.legend(title='action')
    plt.tight_layout()
else:
    print('No STATE events found in RL log')

### State visitation heatmap

In [None]:
if not state_rows.empty:
    visit = state_df.groupby(['inventory_bin', 'spread_bin']).size().unstack(fill_value=0)
    plt.figure(figsize=(6,4))
    plt.imshow(visit, origin='lower', aspect='auto')
    plt.colorbar(label='visits')
    plt.xlabel('spread_bin')
    plt.ylabel('inventory_bin')
    plt.title('State visitation')
    plt.tight_layout()
else:
    print('No STATE events found in RL log')

## Order book and fundamental

In [None]:
if orderbook_file:
    orderbook = pd.read_pickle(orderbook_file)
    print('Orderbook columns:', list(orderbook.columns))
    def find_col(columns, substrings):
        for col in columns:
            name = ' '.join(col) if isinstance(col, tuple) else str(col)
            lname = name.lower()
            if all(sub in lname for sub in substrings):
                return col
        return None
    bid_price_col = find_col(orderbook.columns, ['best', 'bid']) or find_col(orderbook.columns, ['bid', 'price'])
    ask_price_col = find_col(orderbook.columns, ['best', 'ask']) or find_col(orderbook.columns, ['ask', 'price'])
    if bid_price_col and ask_price_col:
        top = orderbook[[bid_price_col, ask_price_col]].copy()
        top.columns = ['best_bid_price', 'best_ask_price']
        top['mid'] = (top['best_bid_price'] + top['best_ask_price']) / 2
        top['spread'] = top['best_ask_price'] - top['best_bid_price']
        top[['mid']].plot(figsize=(10,3), title='Mid price', legend=True)
        plt.tight_layout()
        top[['spread']].plot(figsize=(10,3), title='Quoted spread', legend=True)
        plt.tight_layout()
    else:
        display(orderbook.head())
else:
    print('Orderbook file not found')

fund_file = next(log_dir.glob('fundamental_*.bz2'), None)
if fund_file:
    fundamental = pd.read_pickle(fund_file).reset_index()
    fundamental.plot(x='FundamentalTime', y='FundamentalValue', legend=True, figsize=(10,3), title='Fundamental path')
    plt.tight_layout()
else:
    print('Fundamental file not found')