In [None]:
# Create detailed tick-by-tick visualization
class DetailedAgentVisualizer:
    """Visualize exactly what the agent is doing at each tick."""
    
    def __init__(self, config_path='configs/ppo.yaml'):
        """Initialize the visualizer."""
        self.config = load_config(config_path)
        self.feed = SyntheticFeed(seed=42)
        self.env = MarketMakerEnv(self.feed, self.config, seed=42)
        
    def visualize_agent_behavior(self, agent, num_ticks=50, agent_name="Agent"):
        """Show detailed tick-by-tick behavior."""
        print(f"🔍 Analyzing {agent_name} behavior for {num_ticks} ticks...")
        
        # Reset environment
        obs, _ = self.env.reset()
        
        # Data collection
        tick_data = []
        
        for tick in range(num_ticks):
            # Get current market state
            current_tick = self.env.current_tick
            market_state = {
                'midprice': current_tick.midprice,
                'spread': current_tick.spread,
                'bid': current_tick.bid,
                'ask': current_tick.ask
            }
            
            # Get agent's action (bid/ask prices)
            if hasattr(agent, 'predict'):
                action, _ = agent.predict(obs, deterministic=True)
            else:
                action = agent.get_action(obs, market_state)
            
            # Take step and get results
            obs, reward, terminated, truncated, info = self.env.step(action)
            
            # Record detailed data
            tick_info = {
                'tick': tick,
                'market_midprice': current_tick.midprice,
                'market_bid': current_tick.bid,
                'market_ask': current_tick.ask,
                'market_spread': current_tick.spread,
                'agent_bid': action[0] if isinstance(action, (list, np.ndarray)) else action,
                'agent_ask': action[1] if isinstance(action, (list, np.ndarray)) and len(action) > 1 else action,
                'inventory': info.get('inventory', 0),
                'pnl': info.get('total_pnl', 0),
                'reward': reward,
                'filled_bid': info.get('filled_bid', False),
                'filled_ask': info.get('filled_ask', False),
                'position': info.get('position', 0)
            }
            
            tick_data.append(tick_info)
            
            if terminated or truncated:
                break
        
        return tick_data
    
    def plot_detailed_behavior(self, tick_data, agent_name="Agent"):
        """Create detailed plots showing agent behavior."""
        if not tick_data:
            print("❌ No tick data to visualize")
            return
        
        # Convert to DataFrame for easier plotting
        df = pd.DataFrame(tick_data)
        
        # Create comprehensive visualization
        fig, axes = plt.subplots(3, 2, figsize=(20, 15))
        fig.suptitle(f'{agent_name} Detailed Behavior Analysis', fontsize=16, fontweight='bold')
        
        # Plot 1: Price Levels Over Time
        ax1 = axes[0, 0]
        ax1.plot(df['tick'], df['market_midprice'], 'b-', label='Market Midprice', linewidth=2)
        ax1.plot(df['tick'], df['market_bid'], 'g--', label='Market Bid', alpha=0.7)
        ax1.plot(df['tick'], df['market_ask'], 'r--', label='Market Ask', alpha=0.7)
        ax1.plot(df['tick'], df['agent_bid'], 'go', label='Agent Bid', markersize=4)
        ax1.plot(df['tick'], df['agent_ask'], 'ro', label='Agent Ask', markersize=4)
        ax1.set_title('Price Levels Over Time')
        ax1.set_xlabel('Tick')
        ax1.set_ylabel('Price')
        ax1.legend()
        ax1.grid(True, alpha=0.3)
        
        # Plot 2: Spread Analysis
        ax2 = axes[0, 1]
        market_spread = df['market_ask'] - df['market_bid']
        agent_spread = df['agent_ask'] - df['agent_bid']
        ax2.plot(df['tick'], market_spread, 'b-', label='Market Spread', linewidth=2)
        ax2.plot(df['tick'], agent_spread, 'r-', label='Agent Spread', linewidth=2)
        ax2.set_title('Spread Comparison')
        ax2.set_xlabel('Tick')
        ax2.set_ylabel('Spread')
        ax2.legend()
        ax2.grid(True, alpha=0.3)
        
        # Plot 3: Inventory and PnL
        ax3 = axes[1, 0]
        ax3_twin = ax3.twinx()
        
        # Inventory
        ax3.plot(df['tick'], df['inventory'], 'purple', label='Inventory', linewidth=2)
        ax3.set_ylabel('Inventory', color='purple')
        ax3.tick_params(axis='y', labelcolor='purple')
        
        # PnL
        ax3_twin.plot(df['tick'], df['pnl'], 'orange', label='PnL', linewidth=2)
        ax3_twin.set_ylabel('PnL', color='orange')
        ax3_twin.tick_params(axis='y', labelcolor='orange')
        
        ax3.set_title('Inventory and PnL Over Time')
        ax3.set_xlabel('Tick')
        ax3.grid(True, alpha=0.3)
        
        # Plot 4: Reward and Fills
        ax4 = axes[1, 1]
        ax4.plot(df['tick'], df['reward'], 'green', label='Reward', linewidth=2)
        
        # Mark filled orders
        filled_bid_ticks = df[df['filled_bid'] == True]['tick']
        filled_ask_ticks = df[df['filled_ask'] == True]['tick']
        
        if len(filled_bid_ticks) > 0:
            ax4.scatter(filled_bid_ticks, df.loc[filled_bid_ticks, 'reward'], 
                       color='blue', s=50, label='Bid Filled', marker='^')
        if len(filled_ask_ticks) > 0:
            ax4.scatter(filled_ask_ticks, df.loc[filled_ask_ticks, 'reward'], 
                       color='red', s=50, label='Ask Filled', marker='v')
        
        ax4.set_title('Reward and Order Fills')
        ax4.set_xlabel('Tick')
        ax4.set_ylabel('Reward')
        ax4.legend()
        ax4.grid(True, alpha=0.3)
        
        # Plot 5: Agent vs Market Prices (Scatter)
        ax5 = axes[2, 0]
        ax5.scatter(df['market_bid'], df['agent_bid'], alpha=0.6, label='Bid Prices', s=30)
        ax5.scatter(df['market_ask'], df['agent_ask'], alpha=0.6, label='Ask Prices', s=30)
        
        # Add diagonal line for reference
        min_price = min(df['market_bid'].min(), df['agent_bid'].min())
        max_price = max(df['market_ask'].max(), df['agent_ask'].max())
        ax5.plot([min_price, max_price], [min_price, max_price], 'k--', alpha=0.5, label='Market = Agent')
        
        ax5.set_title('Agent Prices vs Market Prices')
        ax5.set_xlabel('Market Price')
        ax5.set_ylabel('Agent Price')
        ax5.legend()
        ax5.grid(True, alpha=0.3)
        
        # Plot 6: Price Difference Analysis
        ax6 = axes[2, 1]
        bid_diff = df['agent_bid'] - df['market_bid']
        ask_diff = df['agent_ask'] - df['market_ask']
        
        ax6.plot(df['tick'], bid_diff, 'g-', label='Bid Difference (Agent - Market)', linewidth=2)
        ax6.plot(df['tick'], ask_diff, 'r-', label='Ask Difference (Agent - Market)', linewidth=2)
        ax6.axhline(y=0, color='black', linestyle='-', alpha=0.3)
        
        ax6.set_title('Price Differences (Agent - Market)')
        ax6.set_xlabel('Tick')
        ax6.set_ylabel('Price Difference')
        ax6.legend()
        ax6.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
        
        # Print detailed summary
        print(f"\n📊 {agent_name} Behavior Summary:")
        print(f"   📈 Market Midprice Range: {df['market_midprice'].min():.2f} - {df['market_midprice'].max():.2f}")
        print(f"   💰 Agent Bid Range: {df['agent_bid'].min():.2f} - {df['agent_bid'].max():.2f}")
        print(f"   💰 Agent Ask Range: {df['agent_ask'].min():.2f} - {df['agent_ask'].max():.2f}")
        print(f"   📦 Final Inventory: {df['inventory'].iloc[-1]:.2f}")
        print(f"   💵 Final PnL: {df['pnl'].iloc[-1]:.2f}")
        print(f"   🎯 Total Fills: {df['filled_bid'].sum()} bids, {df['filled_ask'].sum()} asks")
        print(f"   📊 Avg Spread: Market={market_spread.mean():.3f}, Agent={agent_spread.mean():.3f}")
        
        return df

# Initialize detailed visualizer
detailed_viz = DetailedAgentVisualizer()
print("✅ Detailed agent visualizer initialized!")


In [None]:
# Let's see what the FixedSpread agent is actually doing!
print("🔍 ANALYZING FIXED SPREAD AGENT BEHAVIOR")
print("=" * 50)

# Create a FixedSpread agent
fixed_spread_agent = FixedSpreadStrategy(spread_ticks=2, max_inventory=100.0)

# Analyze its behavior
tick_data = detailed_viz.visualize_agent_behavior(fixed_spread_agent, num_ticks=30, agent_name="FixedSpread")

# Create the detailed visualization
df = detailed_viz.plot_detailed_behavior(tick_data, "FixedSpread")

print("\n🎯 What the FixedSpread agent is doing:")
print("   - It sets a fixed spread around the market midprice")
print("   - Bid = Midprice - spread_ticks/2")
print("   - Ask = Midprice + spread_ticks/2")
print("   - It doesn't adapt to market conditions")
print("   - It just maintains a constant spread")


In [None]:
# Now let's see what the Random agent is doing!
print("\n🔍 ANALYZING RANDOM AGENT BEHAVIOR")
print("=" * 50)

# Create a Random agent
random_agent = RandomStrategy(seed=42)

# Analyze its behavior
tick_data_random = detailed_viz.visualize_agent_behavior(random_agent, num_ticks=30, agent_name="Random")

# Create the detailed visualization
df_random = detailed_viz.plot_detailed_behavior(tick_data_random, "Random")

print("\n🎯 What the Random agent is doing:")
print("   - It randomly sets bid and ask prices")
print("   - No strategy, just random decisions")
print("   - Sometimes sets prices above/below market")
print("   - Very unpredictable behavior")


In [None]:
# Let's see the InventoryMeanReversion agent!
print("\n🔍 ANALYZING INVENTORY MEAN REVERSION AGENT BEHAVIOR")
print("=" * 50)

# Create an InventoryMeanReversion agent
inventory_agent = InventoryMeanReversionStrategy(base_spread=2, max_inventory=100.0)

# Analyze its behavior
tick_data_inventory = detailed_viz.visualize_agent_behavior(inventory_agent, num_ticks=30, agent_name="InventoryMeanReversion")

# Create the detailed visualization
df_inventory = detailed_viz.plot_detailed_behavior(tick_data_inventory, "InventoryMeanReversion")

print("\n🎯 What the InventoryMeanReversion agent is doing:")
print("   - It adjusts prices based on inventory level")
print("   - When inventory is high (long position), it lowers ask prices to sell")
print("   - When inventory is low (short position), it raises bid prices to buy")
print("   - It tries to keep inventory close to zero")
print("   - This is a smart strategy for market making!")


In [None]:
# Create a side-by-side comparison of all agents
def compare_agent_behaviors():
    """Compare all agents side by side."""
    print("🔄 COMPARING ALL AGENT BEHAVIORS SIDE BY SIDE")
    print("=" * 60)
    
    # Create all agents
    agents = {
        'FixedSpread': FixedSpreadStrategy(spread_ticks=2, max_inventory=100.0),
        'Random': RandomStrategy(seed=42),
        'InventoryMeanReversion': InventoryMeanReversionStrategy(base_spread=2, max_inventory=100.0)
    }
    
    # Collect data from all agents
    all_data = {}
    
    for agent_name, agent in agents.items():
        print(f"\n📊 Analyzing {agent_name}...")
        tick_data = detailed_viz.visualize_agent_behavior(agent, num_ticks=20, agent_name=agent_name)
        all_data[agent_name] = pd.DataFrame(tick_data)
    
    # Create comparison plots
    fig, axes = plt.subplots(2, 3, figsize=(24, 12))
    fig.suptitle('Agent Behavior Comparison - Side by Side', fontsize=16, fontweight='bold')
    
    colors = {'FixedSpread': 'blue', 'Random': 'red', 'InventoryMeanReversion': 'green'}
    
    # Plot 1: Price Levels Comparison
    ax1 = axes[0, 0]
    for agent_name, df in all_data.items():
        ax1.plot(df['tick'], df['market_midprice'], 'k-', alpha=0.5, linewidth=1)
        ax1.plot(df['tick'], df['agent_bid'], 'o-', color=colors[agent_name], 
                label=f'{agent_name} Bid', markersize=3, alpha=0.7)
        ax1.plot(df['tick'], df['agent_ask'], 's-', color=colors[agent_name], 
                label=f'{agent_name} Ask', markersize=3, alpha=0.7)
    ax1.set_title('Price Levels Comparison')
    ax1.set_xlabel('Tick')
    ax1.set_ylabel('Price')
    ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax1.grid(True, alpha=0.3)
    
    # Plot 2: Spread Comparison
    ax2 = axes[0, 1]
    for agent_name, df in all_data.items():
        agent_spread = df['agent_ask'] - df['agent_bid']
        ax2.plot(df['tick'], agent_spread, 'o-', color=colors[agent_name], 
                label=f'{agent_name} Spread', linewidth=2)
    ax2.set_title('Spread Comparison')
    ax2.set_xlabel('Tick')
    ax2.set_ylabel('Spread')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    # Plot 3: Inventory Comparison
    ax3 = axes[0, 2]
    for agent_name, df in all_data.items():
        ax3.plot(df['tick'], df['inventory'], 'o-', color=colors[agent_name], 
                label=f'{agent_name} Inventory', linewidth=2)
    ax3.set_title('Inventory Comparison')
    ax3.set_xlabel('Tick')
    ax3.set_ylabel('Inventory')
    ax3.legend()
    ax3.grid(True, alpha=0.3)
    
    # Plot 4: PnL Comparison
    ax4 = axes[1, 0]
    for agent_name, df in all_data.items():
        ax4.plot(df['tick'], df['pnl'], 'o-', color=colors[agent_name], 
                label=f'{agent_name} PnL', linewidth=2)
    ax4.set_title('PnL Comparison')
    ax4.set_xlabel('Tick')
    ax4.set_ylabel('PnL')
    ax4.legend()
    ax4.grid(True, alpha=0.3)
    
    # Plot 5: Reward Comparison
    ax5 = axes[1, 1]
    for agent_name, df in all_data.items():
        ax5.plot(df['tick'], df['reward'], 'o-', color=colors[agent_name], 
                label=f'{agent_name} Reward', linewidth=2)
    ax5.set_title('Reward Comparison')
    ax5.set_xlabel('Tick')
    ax5.set_ylabel('Reward')
    ax5.legend()
    ax5.grid(True, alpha=0.3)
    
    # Plot 6: Summary Statistics
    ax6 = axes[1, 2]
    ax6.axis('off')
    
    # Create summary table
    summary_data = []
    for agent_name, df in all_data.items():
        summary_data.append([
            agent_name,
            f"{df['agent_bid'].mean():.2f}",
            f"{df['agent_ask'].mean():.2f}",
            f"{(df['agent_ask'] - df['agent_bid']).mean():.2f}",
            f"{df['inventory'].iloc[-1]:.1f}",
            f"{df['pnl'].iloc[-1]:.1f}"
        ])
    
    table = ax6.table(cellText=summary_data,
                     colLabels=['Agent', 'Avg Bid', 'Avg Ask', 'Avg Spread', 'Final Inv', 'Final PnL'],
                     cellLoc='center',
                     loc='center',
                     bbox=[0, 0, 1, 1])
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.scale(1, 2)
    ax6.set_title('Summary Statistics', fontweight='bold', pad=20)
    
    plt.tight_layout()
    plt.show()
    
    return all_data

# Run the comparison
comparison_data = compare_agent_behaviors()
print("\n✅ Agent behavior comparison complete!")
