# Action Trace Analysis - Stairways V3
## Analyzing Agent Behavior Patterns During Good vs Bad Episodes

This notebook analyzes agent behavior patterns to identify:
- Position holding patterns
- Trade timing and frequency
- Reward hacking detection (flip-flops, position gaming)
- P&L attribution to genuine market moves

**Usage**: Run after Phase 1A freeze-early validation tests

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("📊 Action Trace Analysis - Stairways V3")
print("=" * 50)

## 1. Data Loading and Preprocessing

In [None]:
# Configuration
DIAGNOSTIC_PATH = Path('../diagnostic_runs/phase1a_freeze_early')
RESULTS_PATH = Path('../diagnostic_runs/phase3_actions')
RESULTS_PATH.mkdir(exist_ok=True)

# Load action traces (generated during Phase 1A evaluation)
try:
    action_traces = pd.read_parquet(DIAGNOSTIC_PATH / 'action_traces.parquet')
    print(f"✅ Loaded {len(action_traces)} action records")
except FileNotFoundError:
    print("❌ Action traces not found. Run Phase 1A freeze-early tests first.")
    print("   Expected file: diagnostic_runs/phase1a_freeze_early/action_traces.parquet")
    # Create sample data for demonstration
    np.random.seed(42)
    n_samples = 10000
    action_traces = pd.DataFrame({
        'episode_id': np.repeat(range(100), 100),
        'step': np.tile(range(100), 100),
        'episode_reward': np.repeat(np.random.normal(0.2, 1.5, 100), 100),
        'nvda_position': np.random.choice([-1, 0, 1], n_samples, p=[0.3, 0.4, 0.3]),
        'msft_position': np.random.choice([-1, 0, 1], n_samples, p=[0.3, 0.4, 0.3]),
        'step_pnl': np.random.normal(0, 50, n_samples),
        'nvda_price': 485 + np.random.normal(0, 10, n_samples),
        'msft_price': 412 + np.random.normal(0, 8, n_samples),
        'action': np.random.randint(0, 5, n_samples),
        'timestamp': pd.date_range('2024-02-01', periods=n_samples, freq='1min')
    })
    print("📝 Using sample data for demonstration")

print(f"📈 Data shape: {action_traces.shape}")
print(f"📅 Date range: {action_traces['timestamp'].min()} to {action_traces['timestamp'].max()}")

In [None]:
# Separate good vs bad episodes based on reward thresholds
GOOD_THRESHOLD = 0.5
BAD_THRESHOLD = 0.0

good_episodes = action_traces[action_traces['episode_reward'] > GOOD_THRESHOLD]
bad_episodes = action_traces[action_traces['episode_reward'] < BAD_THRESHOLD]
neutral_episodes = action_traces[
    (action_traces['episode_reward'] >= BAD_THRESHOLD) & 
    (action_traces['episode_reward'] <= GOOD_THRESHOLD)
]

print(f"🟢 Good episodes (reward > {GOOD_THRESHOLD}): {len(good_episodes.groupby('episode_id'))} episodes")
print(f"🔴 Bad episodes (reward < {BAD_THRESHOLD}): {len(bad_episodes.groupby('episode_id'))} episodes")
print(f"🟡 Neutral episodes: {len(neutral_episodes.groupby('episode_id'))} episodes")

# Episode-level statistics
episode_stats = action_traces.groupby('episode_id').agg({
    'episode_reward': 'first',
    'step': 'count',
    'step_pnl': ['sum', 'std'],
    'nvda_position': lambda x: (x.diff() != 0).sum(),  # Position changes
    'msft_position': lambda x: (x.diff() != 0).sum()   # Position changes
}).round(3)

episode_stats.columns = ['reward', 'episode_length', 'total_pnl', 'pnl_volatility', 'nvda_changes', 'msft_changes']
print(f"\n📊 Episode Statistics Summary:")
print(episode_stats.describe())

## 2. Position Pattern Analysis

In [None]:
def analyze_position_patterns(df, title, color='blue'):
    """Analyze position holding patterns for a given episode subset"""
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    fig.suptitle(f'{title} - Position Pattern Analysis', fontsize=16, fontweight='bold')
    
    # 1. Position Distribution
    axes[0,0].hist(df['nvda_position'], bins=[-1.5, -0.5, 0.5, 1.5], alpha=0.7, 
                   label='NVDA', color='green', edgecolor='black')
    axes[0,0].hist(df['msft_position'], bins=[-1.5, -0.5, 0.5, 1.5], alpha=0.7, 
                   label='MSFT', color='blue', edgecolor='black')
    axes[0,0].set_title('Position Distribution')
    axes[0,0].set_xlabel('Position (-1: Short, 0: Neutral, 1: Long)')
    axes[0,0].set_ylabel('Frequency')
    axes[0,0].legend()
    axes[0,0].set_xticks([-1, 0, 1])
    
    # 2. Position Change Frequency (Flip-flop detection)
    df_copy = df.copy().sort_values(['episode_id', 'step'])
    df_copy['nvda_pos_change'] = df_copy.groupby('episode_id')['nvda_position'].diff().abs()
    df_copy['msft_pos_change'] = df_copy.groupby('episode_id')['msft_position'].diff().abs()
    
    # Rolling average of position changes
    window = min(50, len(df_copy) // 10)
    if window > 1:
        nvda_changes = df_copy['nvda_pos_change'].rolling(window, min_periods=1).mean()
        msft_changes = df_copy['msft_pos_change'].rolling(window, min_periods=1).mean()
        
        axes[0,1].plot(nvda_changes, label='NVDA', color='green', alpha=0.8)
        axes[0,1].plot(msft_changes, label='MSFT', color='blue', alpha=0.8)
        axes[0,1].set_title(f'Position Change Frequency (Rolling {window}-step avg)')
        axes[0,1].set_xlabel('Time Steps')
        axes[0,1].set_ylabel('Avg Position Changes')
        axes[0,1].legend()
    
    # 3. P&L vs NVDA Position
    for pos in [-1, 0, 1]:
        pos_data = df[df['nvda_position'] == pos]['step_pnl']
        if len(pos_data) > 0:
            axes[0,2].hist(pos_data, alpha=0.6, label=f'NVDA Pos {pos}', bins=20)
    axes[0,2].set_title('P&L Distribution by NVDA Position')
    axes[0,2].set_xlabel('Step P&L')
    axes[0,2].set_ylabel('Frequency')
    axes[0,2].legend()
    
    # 4. P&L vs MSFT Position
    for pos in [-1, 0, 1]:
        pos_data = df[df['msft_position'] == pos]['step_pnl']
        if len(pos_data) > 0:
            axes[1,0].hist(pos_data, alpha=0.6, label=f'MSFT Pos {pos}', bins=20)
    axes[1,0].set_title('P&L Distribution by MSFT Position')
    axes[1,0].set_xlabel('Step P&L')
    axes[1,0].set_ylabel('Frequency')
    axes[1,0].legend()
    
    # 5. Position vs Price Movement Correlation
    if 'nvda_price' in df.columns:
        df_copy['nvda_price_change'] = df_copy.groupby('episode_id')['nvda_price'].diff()
        
        # Scatter plot: Position vs Price Change
        valid_data = df_copy.dropna(subset=['nvda_price_change'])
        if len(valid_data) > 0:
            axes[1,1].scatter(valid_data['nvda_position'], valid_data['nvda_price_change'], 
                            alpha=0.5, s=10, color='green')
            axes[1,1].set_title('NVDA Position vs Price Change')
            axes[1,1].set_xlabel('NVDA Position')
            axes[1,1].set_ylabel('NVDA Price Change')
            axes[1,1].set_xticks([-1, 0, 1])
    
    # 6. Action Distribution
    if 'action' in df.columns:
        action_counts = df['action'].value_counts().sort_index()
        axes[1,2].bar(action_counts.index, action_counts.values, color=color, alpha=0.7)
        axes[1,2].set_title('Action Distribution')
        axes[1,2].set_xlabel('Action ID')
        axes[1,2].set_ylabel('Frequency')
    
    plt.tight_layout()
    return fig

# Generate analysis for good episodes
if len(good_episodes) > 0:
    good_fig = analyze_position_patterns(good_episodes, "Good Episodes (Reward > 0.5)", 'green')
    good_fig.savefig(RESULTS_PATH / 'good_episodes_analysis.png', dpi=300, bbox_inches='tight')
    plt.show()
else:
    print("⚠️ No good episodes found for analysis")

In [None]:
# Generate analysis for bad episodes
if len(bad_episodes) > 0:
    bad_fig = analyze_position_patterns(bad_episodes, "Bad Episodes (Reward < 0)", 'red')
    bad_fig.savefig(RESULTS_PATH / 'bad_episodes_analysis.png', dpi=300, bbox_inches='tight')
    plt.show()
else:
    print("⚠️ No bad episodes found for analysis")

## 3. Reward Hacking Detection

In [None]:
def detect_reward_hacking(df, title):
    """Detect potential reward hacking patterns"""
    print(f"\n🔍 {title} - Reward Hacking Analysis")
    print("=" * 60)
    
    # Calculate flip-flop rates per episode
    episode_flip_rates = []
    
    for episode_id in df['episode_id'].unique():
        episode_data = df[df['episode_id'] == episode_id].sort_values('step')
        
        if len(episode_data) > 1:
            nvda_flips = (episode_data['nvda_position'].diff() != 0).sum()
            msft_flips = (episode_data['msft_position'].diff() != 0).sum()
            total_steps = len(episode_data)
            
            flip_rate = (nvda_flips + msft_flips) / (2 * total_steps)  # Normalize by positions and steps
            episode_flip_rates.append({
                'episode_id': episode_id,
                'flip_rate': flip_rate,
                'nvda_flips': nvda_flips,
                'msft_flips': msft_flips,
                'total_steps': total_steps,
                'episode_reward': episode_data['episode_reward'].iloc[0]
            })
    
    flip_df = pd.DataFrame(episode_flip_rates)
    
    if len(flip_df) > 0:
        # Statistics
        avg_flip_rate = flip_df['flip_rate'].mean()
        high_flip_episodes = flip_df[flip_df['flip_rate'] > 0.1]  # More than 10% flip rate
        
        print(f"📊 Average flip rate: {avg_flip_rate:.3f}")
        print(f"⚠️  High flip-rate episodes (>10%): {len(high_flip_episodes)}")
        
        if len(high_flip_episodes) > 0:
            print(f"🚨 Potential reward hacking detected in {len(high_flip_episodes)} episodes")
            print("   Top flip-rate episodes:")
            top_flippers = high_flip_episodes.nlargest(5, 'flip_rate')
            for _, row in top_flippers.iterrows():
                print(f"   Episode {row['episode_id']}: {row['flip_rate']:.3f} flip rate, reward: {row['episode_reward']:.3f}")
        else:
            print("✅ No obvious flip-flop patterns detected")
        
        # Visualization
        fig, axes = plt.subplots(1, 2, figsize=(15, 5))
        
        # Flip rate distribution
        axes[0].hist(flip_df['flip_rate'], bins=20, alpha=0.7, edgecolor='black')
        axes[0].axvline(avg_flip_rate, color='red', linestyle='--', label=f'Mean: {avg_flip_rate:.3f}')
        axes[0].axvline(0.1, color='orange', linestyle='--', label='High flip threshold (0.1)')
        axes[0].set_title(f'{title}: Flip Rate Distribution')
        axes[0].set_xlabel('Flip Rate (position changes per step)')
        axes[0].set_ylabel('Number of Episodes')
        axes[0].legend()
        
        # Flip rate vs reward correlation
        axes[1].scatter(flip_df['flip_rate'], flip_df['episode_reward'], alpha=0.6)
        axes[1].set_title(f'{title}: Flip Rate vs Episode Reward')
        axes[1].set_xlabel('Flip Rate')
        axes[1].set_ylabel('Episode Reward')
        
        # Add correlation coefficient
        correlation = flip_df['flip_rate'].corr(flip_df['episode_reward'])
        axes[1].text(0.05, 0.95, f'Correlation: {correlation:.3f}', 
                    transform=axes[1].transAxes, bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
        
        plt.tight_layout()
        plt.savefig(RESULTS_PATH / f'{title.lower().replace(" ", "_")}_flip_analysis.png', 
                   dpi=300, bbox_inches='tight')
        plt.show()
        
        return flip_df
    else:
        print("❌ No episodes found for analysis")
        return pd.DataFrame()

# Analyze good and bad episodes for reward hacking
if len(good_episodes) > 0:
    good_flip_analysis = detect_reward_hacking(good_episodes, "Good Episodes")

if len(bad_episodes) > 0:
    bad_flip_analysis = detect_reward_hacking(bad_episodes, "Bad Episodes")

## 4. Trade Timeline Analysis

In [None]:
def analyze_trade_timeline(df, title, max_episodes=5):
    """Analyze trade timelines for selected episodes"""
    print(f"\n📈 {title} - Trade Timeline Analysis")
    print("=" * 60)
    
    # Select representative episodes
    episode_rewards = df.groupby('episode_id')['episode_reward'].first().sort_values(ascending=False)
    selected_episodes = episode_rewards.head(max_episodes).index
    
    fig, axes = plt.subplots(max_episodes, 1, figsize=(15, 3*max_episodes))
    if max_episodes == 1:
        axes = [axes]
    
    for i, episode_id in enumerate(selected_episodes):
        episode_data = df[df['episode_id'] == episode_id].sort_values('step')
        
        if len(episode_data) == 0:
            continue
            
        # Create timeline plot
        ax = axes[i]
        
        # Plot positions
        ax2 = ax.twinx()
        
        # P&L line
        cumulative_pnl = episode_data['step_pnl'].cumsum()
        ax.plot(episode_data['step'], cumulative_pnl, 'b-', linewidth=2, label='Cumulative P&L')
        ax.set_ylabel('Cumulative P&L', color='b')
        ax.tick_params(axis='y', labelcolor='b')
        
        # Position bars
        ax2.bar(episode_data['step'], episode_data['nvda_position'], 
               alpha=0.3, color='green', label='NVDA Position', width=0.8)
        ax2.bar(episode_data['step'], episode_data['msft_position'], 
               alpha=0.3, color='blue', label='MSFT Position', width=0.8, bottom=episode_data['nvda_position'])
        ax2.set_ylabel('Position', color='g')
        ax2.tick_params(axis='y', labelcolor='g')
        ax2.set_ylim(-2.5, 2.5)
        
        # Title and labels
        episode_reward = episode_data['episode_reward'].iloc[0]
        final_pnl = cumulative_pnl.iloc[-1]
        ax.set_title(f'Episode {episode_id}: Reward={episode_reward:.3f}, Final P&L={final_pnl:.2f}')
        ax.set_xlabel('Step')
        
        # Add legends
        ax.legend(loc='upper left')
        ax2.legend(loc='upper right')
        
        # Add grid
        ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(RESULTS_PATH / f'{title.lower().replace(" ", "_")}_timeline.png', 
               dpi=300, bbox_inches='tight')
    plt.show()
    
    # Summary statistics
    print(f"\n📊 Timeline Analysis Summary:")
    for episode_id in selected_episodes:
        episode_data = df[df['episode_id'] == episode_id]
        if len(episode_data) > 0:
            reward = episode_data['episode_reward'].iloc[0]
            total_pnl = episode_data['step_pnl'].sum()
            avg_position = (abs(episode_data['nvda_position']).mean() + abs(episode_data['msft_position']).mean()) / 2
            print(f"  Episode {episode_id}: Reward={reward:.3f}, Total P&L={total_pnl:.2f}, Avg |Position|={avg_position:.3f}")

# Analyze timelines for good and bad episodes
if len(good_episodes) > 0:
    analyze_trade_timeline(good_episodes, "Good Episodes", max_episodes=3)

if len(bad_episodes) > 0:
    analyze_trade_timeline(bad_episodes, "Bad Episodes", max_episodes=3)

## 5. Market Alignment Analysis

In [None]:
def analyze_market_alignment(df, title):
    """Analyze if trades align with actual market movements"""
    print(f"\n🎯 {title} - Market Alignment Analysis")
    print("=" * 60)
    
    if 'nvda_price' not in df.columns or 'msft_price' not in df.columns:
        print("❌ Price data not available for market alignment analysis")
        return
    
    # Calculate price changes and position alignment
    df_analysis = df.copy().sort_values(['episode_id', 'step'])
    
    # Price changes
    df_analysis['nvda_price_change'] = df_analysis.groupby('episode_id')['nvda_price'].diff()
    df_analysis['msft_price_change'] = df_analysis.groupby('episode_id')['msft_price'].diff()
    
    # Position-price alignment (positive when position and price change have same sign)
    df_analysis['nvda_alignment'] = df_analysis['nvda_position'] * df_analysis['nvda_price_change']
    df_analysis['msft_alignment'] = df_analysis['msft_position'] * df_analysis['msft_price_change']
    
    # Remove NaN values
    df_clean = df_analysis.dropna(subset=['nvda_price_change', 'msft_price_change'])
    
    if len(df_clean) == 0:
        print("❌ No valid price change data available")
        return
    
    # Calculate alignment statistics
    nvda_positive_alignment = (df_clean['nvda_alignment'] > 0).mean()
    msft_positive_alignment = (df_clean['msft_alignment'] > 0).mean()
    
    print(f"📊 NVDA Position-Price Alignment: {nvda_positive_alignment:.3f} ({nvda_positive_alignment*100:.1f}% positive)")
    print(f"📊 MSFT Position-Price Alignment: {msft_positive_alignment:.3f} ({msft_positive_alignment*100:.1f}% positive)")
    
    # Interpretation
    if nvda_positive_alignment > 0.6:
        print("✅ NVDA: Good market alignment - positions generally follow price direction")
    elif nvda_positive_alignment < 0.4:
        print("⚠️ NVDA: Poor market alignment - positions often oppose price direction")
    else:
        print("🟡 NVDA: Neutral market alignment")
        
    if msft_positive_alignment > 0.6:
        print("✅ MSFT: Good market alignment - positions generally follow price direction")
    elif msft_positive_alignment < 0.4:
        print("⚠️ MSFT: Poor market alignment - positions often oppose price direction")
    else:
        print("🟡 MSFT: Neutral market alignment")
    
    # Visualization
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # NVDA alignment scatter
    axes[0,0].scatter(df_clean['nvda_price_change'], df_clean['nvda_position'], 
                     alpha=0.5, s=10, c=df_clean['nvda_alignment'], cmap='RdYlGn')
    axes[0,0].set_title('NVDA: Position vs Price Change')
    axes[0,0].set_xlabel('NVDA Price Change')
    axes[0,0].set_ylabel('NVDA Position')
    axes[0,0].axhline(y=0, color='k', linestyle='--', alpha=0.5)
    axes[0,0].axvline(x=0, color='k', linestyle='--', alpha=0.5)
    
    # MSFT alignment scatter
    axes[0,1].scatter(df_clean['msft_price_change'], df_clean['msft_position'], 
                     alpha=0.5, s=10, c=df_clean['msft_alignment'], cmap='RdYlGn')
    axes[0,1].set_title('MSFT: Position vs Price Change')
    axes[0,1].set_xlabel('MSFT Price Change')
    axes[0,1].set_ylabel('MSFT Position')
    axes[0,1].axhline(y=0, color='k', linestyle='--', alpha=0.5)
    axes[0,1].axvline(x=0, color='k', linestyle='--', alpha=0.5)
    
    # Alignment histograms
    axes[1,0].hist(df_clean['nvda_alignment'], bins=30, alpha=0.7, color='green', edgecolor='black')
    axes[1,0].axvline(x=0, color='red', linestyle='--', linewidth=2)
    axes[1,0].set_title('NVDA Alignment Distribution')
    axes[1,0].set_xlabel('Position × Price Change')
    axes[1,0].set_ylabel('Frequency')
    
    axes[1,1].hist(df_clean['msft_alignment'], bins=30, alpha=0.7, color='blue', edgecolor='black')
    axes[1,1].axvline(x=0, color='red', linestyle='--', linewidth=2)
    axes[1,1].set_title('MSFT Alignment Distribution')
    axes[1,1].set_xlabel('Position × Price Change')
    axes[1,1].set_ylabel('Frequency')
    
    plt.tight_layout()
    plt.savefig(RESULTS_PATH / f'{title.lower().replace(" ", "_")}_market_alignment.png', 
               dpi=300, bbox_inches='tight')
    plt.show()
    
    return {
        'nvda_alignment': nvda_positive_alignment,
        'msft_alignment': msft_positive_alignment
    }

# Analyze market alignment for good and bad episodes
alignment_results = {}

if len(good_episodes) > 0:
    alignment_results['good'] = analyze_market_alignment(good_episodes, "Good Episodes")

if len(bad_episodes) > 0:
    alignment_results['bad'] = analyze_market_alignment(bad_episodes, "Bad Episodes")

## 6. Summary Report Generation

In [None]:
def generate_summary_report():
    """Generate comprehensive summary report"""
    print("\n📋 ACTION TRACE ANALYSIS - SUMMARY REPORT")
    print("=" * 70)
    
    # Episode statistics
    total_episodes = len(action_traces['episode_id'].unique())
    good_count = len(good_episodes['episode_id'].unique()) if len(good_episodes) > 0 else 0
    bad_count = len(bad_episodes['episode_id'].unique()) if len(bad_episodes) > 0 else 0
    
    print(f"📊 EPISODE BREAKDOWN:")
    print(f"   Total Episodes: {total_episodes}")
    print(f"   Good Episodes (reward > 0.5): {good_count} ({good_count/total_episodes*100:.1f}%)")
    print(f"   Bad Episodes (reward < 0): {bad_count} ({bad_count/total_episodes*100:.1f}%)")
    
    # Reward hacking assessment
    print(f"\n🔍 REWARD HACKING ASSESSMENT:")
    
    if len(good_episodes) > 0 and 'good_flip_analysis' in locals():
        good_high_flip = len(good_flip_analysis[good_flip_analysis['flip_rate'] > 0.1])
        good_avg_flip = good_flip_analysis['flip_rate'].mean()
        print(f"   Good Episodes - Avg flip rate: {good_avg_flip:.3f}, High flip episodes: {good_high_flip}")
        
        if good_high_flip > good_count * 0.2:  # More than 20% have high flip rates
            print(f"   🚨 CONCERN: {good_high_flip} good episodes show high flip rates (potential reward hacking)")
        else:
            print(f"   ✅ Good episodes show reasonable trading patterns")
    
    if len(bad_episodes) > 0 and 'bad_flip_analysis' in locals():
        bad_high_flip = len(bad_flip_analysis[bad_flip_analysis['flip_rate'] > 0.1])
        bad_avg_flip = bad_flip_analysis['flip_rate'].mean()
        print(f"   Bad Episodes - Avg flip rate: {bad_avg_flip:.3f}, High flip episodes: {bad_high_flip}")
    
    # Market alignment assessment
    print(f"\n🎯 MARKET ALIGNMENT ASSESSMENT:")
    
    if 'good' in alignment_results:
        good_nvda = alignment_results['good']['nvda_alignment']
        good_msft = alignment_results['good']['msft_alignment']
        print(f"   Good Episodes - NVDA alignment: {good_nvda:.3f}, MSFT alignment: {good_msft:.3f}")
        
        if good_nvda > 0.6 and good_msft > 0.6:
            print(f"   ✅ Good episodes show strong market alignment (genuine trading)")
        elif good_nvda < 0.4 or good_msft < 0.4:
            print(f"   ⚠️ Good episodes show poor market alignment (potential issues)")
        else:
            print(f"   🟡 Good episodes show mixed market alignment")
    
    if 'bad' in alignment_results:
        bad_nvda = alignment_results['bad']['nvda_alignment']
        bad_msft = alignment_results['bad']['msft_alignment']
        print(f"   Bad Episodes - NVDA alignment: {bad_nvda:.3f}, MSFT alignment: {bad_msft:.3f}")
    
    # Overall assessment
    print(f"\n🎯 OVERALL ASSESSMENT:")
    
    concerns = []
    positives = []
    
    # Check for major issues
    if good_count < total_episodes * 0.1:
        concerns.append("Very few good episodes - potential fundamental issues")
    
    if len(good_episodes) > 0 and 'good_flip_analysis' in locals():
        if len(good_flip_analysis[good_flip_analysis['flip_rate'] > 0.1]) > good_count * 0.2:
            concerns.append("High flip rates in good episodes - potential reward hacking")
        else:
            positives.append("Good episodes show reasonable trading patterns")
    
    if 'good' in alignment_results:
        if alignment_results['good']['nvda_alignment'] > 0.6 and alignment_results['good']['msft_alignment'] > 0.6:
            positives.append("Strong market alignment in good episodes")
        elif alignment_results['good']['nvda_alignment'] < 0.4 or alignment_results['good']['msft_alignment'] < 0.4:
            concerns.append("Poor market alignment in good episodes")
    
    if concerns:
        print(f"   🚨 CONCERNS IDENTIFIED:")
        for concern in concerns:
            print(f"      - {concern}")
    
    if positives:
        print(f"   ✅ POSITIVE FINDINGS:")
        for positive in positives:
            print(f"      - {positive}")
    
    # Recommendations
    print(f"\n💡 RECOMMENDATIONS:")
    
    if len(concerns) == 0:
        print(f"   ✅ No major issues detected - proceed with Phase 2 temporal validation")
    elif "reward hacking" in str(concerns):
        print(f"   🔧 Address reward system - implement stronger anti-gaming measures")
    elif "market alignment" in str(concerns):
        print(f"   🔧 Review position logic - ensure trades align with market movements")
    else:
        print(f"   🔧 Investigate fundamental training issues before proceeding")
    
    # Save summary to file
    summary_text = f"""
ACTION TRACE ANALYSIS SUMMARY
Generated: {pd.Timestamp.now()}

EPISODE BREAKDOWN:
- Total Episodes: {total_episodes}
- Good Episodes: {good_count} ({good_count/total_episodes*100:.1f}%)
- Bad Episodes: {bad_count} ({bad_count/total_episodes*100:.1f}%)

CONCERNS: {len(concerns)}
{chr(10).join([f'- {c}' for c in concerns])}

POSITIVES: {len(positives)}
{chr(10).join([f'- {p}' for p in positives])}

FILES GENERATED:
- good_episodes_analysis.png
- bad_episodes_analysis.png
- good_episodes_flip_analysis.png
- bad_episodes_flip_analysis.png
- good_episodes_timeline.png
- bad_episodes_timeline.png
- good_episodes_market_alignment.png
- bad_episodes_market_alignment.png
"""
    
    with open(RESULTS_PATH / 'action_trace_summary.txt', 'w') as f:
        f.write(summary_text)
    
    print(f"\n📁 Results saved to: {RESULTS_PATH}")
    print(f"📄 Summary report: {RESULTS_PATH / 'action_trace_summary.txt'}")

# Generate the summary report
generate_summary_report()

## 7. Next Steps

Based on this analysis:

1. **If no major issues detected**: Proceed with Phase 2 temporal validation
2. **If reward hacking detected**: Implement stronger anti-gaming measures in reward system
3. **If poor market alignment**: Review position logic and ensure trades align with market movements
4. **If fundamental issues**: Investigate training process before proceeding

**Files generated in `diagnostic_runs/phase3_actions/`:**
- `good_episodes_analysis.png` - Position patterns for good episodes
- `bad_episodes_analysis.png` - Position patterns for bad episodes  
- `*_flip_analysis.png` - Reward hacking detection charts
- `*_timeline.png` - Trade timeline visualizations
- `*_market_alignment.png` - Market alignment analysis
- `action_trace_summary.txt` - Text summary of findings