In [None]:
#@title MSMARCO minmax
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import sys

# Configuration
HISTORY_FILE = "../MABhybrid/data/linucb_training_history.jsonl"
DATA_FILE = "../MABhybrid/data/bandit_data_train.jsonl"
OUTPUT_DIR = "../MABhybrid/fig"
os.makedirs(OUTPUT_DIR, exist_ok=True)

def load_data():
    """Loads history and enriches it with baseline rewards from ground truth."""
    if not os.path.exists(HISTORY_FILE) or not os.path.exists(DATA_FILE):
        print("Error: Missing history or data file.")
        return None

    print("Loading training history...")
    history = []
    with open(HISTORY_FILE, 'r') as f:
        for line in f:
            history.append(json.loads(line))
    df = pd.DataFrame(history)

    print("Loading ground truth data for baselines...")
    ground_truth = []
    with open(DATA_FILE, 'r') as f:
        for line in f:
            ground_truth.append(json.loads(line))
    
    # Calculate Baseline Rewards
    # We assume the training loop iterated through ground_truth sequentially
    n_data = len(ground_truth)
    
    static_rewards = []
    random_rewards = []
    optimal_rewards = []

    for step in df['step']:
        # Map step to index in the original data file (handling wrap-around if any)
        idx = step % n_data
        entry = ground_truth[idx]
        
        # Static Baseline: Always choose Arm 2 (Index 2 corresponds to alpha=0.5)
        static_rewards.append(entry['rewards'][2])
        
        # Random Baseline: Expected value is the mean of all arms
        random_rewards.append(np.mean(entry['rewards']))
        
        # Optimal Reward (for calculating regret)
        optimal_rewards.append(max(entry['rewards']))

    df['static_reward'] = static_rewards
    df['random_reward'] = random_rewards
    df['optimal_reward'] = optimal_rewards
    
    # Calculate Regrets
    df['static_regret'] = df['optimal_reward'] - df['static_reward']
    df['random_regret'] = df['optimal_reward'] - df['random_reward']
    # 'regret' column already exists for the agent in the history file
    
    return df, ground_truth

def plot_comparative_cumulative_reward(df):
    """
    Plots Cumulative Mean Reward: Agent vs Static vs Random
    """
    plt.figure(figsize=(10, 6))
    
    # Calculate expanding means (cumulative average)
    df['agent_cum_mean'] = df['reward'].expanding().mean()
    df['static_cum_mean'] = df['static_reward'].expanding().mean()
    df['random_cum_mean'] = df['random_reward'].expanding().mean()
    
    plt.plot(df['step'], df['agent_cum_mean'], label='LinUCB Agent', color='blue', linewidth=2)
    plt.plot(df['step'], df['static_cum_mean'], label='Static (α=0.5)', color='green', linestyle='--')
    plt.plot(df['step'], df['random_cum_mean'], label='Random', color='gray', linestyle=':')
    
    plt.title('Performance Comparison: Cumulative Mean Reward (NDCG@10)')
    plt.xlabel('Training Steps')
    plt.ylabel('Average Reward (NDCG)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    output_path = os.path.join(OUTPUT_DIR, 'comparative_reward.png')
    plt.savefig(output_path)
    print(f"Saved Comparative Reward plot to {output_path}")
    plt.close()

def plot_comparative_cumulative_regret(df):
    """
    Plots Cumulative Regret: Agent vs Static vs Random
    Lower is better.
    """
    plt.figure(figsize=(10, 6))
    
    plt.plot(df['step'], df['regret'].cumsum(), label='LinUCB Agent', color='red', linewidth=2)
    plt.plot(df['step'], df['static_regret'].cumsum(), label='Static (α=0.5)', color='green', linestyle='--')
    plt.plot(df['step'], df['random_regret'].cumsum(), label='Random', color='gray', linestyle=':')
    
    plt.title('Performance Comparison: Cumulative Regret')
    plt.xlabel('Training Steps')
    plt.ylabel('Cumulative Regret (Lost NDCG)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    output_path = os.path.join(OUTPUT_DIR, 'comparative_regret.png')
    plt.savefig(output_path)
    print(f"Saved Comparative Regret plot to {output_path}")
    plt.close()

def plot_arm_distribution(df):
    plt.figure(figsize=(8, 6))
    arm_counts = df['chosen_arm'].value_counts().sort_index()
    arms = arm_counts.index.tolist()
    counts = arm_counts.values.tolist()
    alpha_map = {0: '0.0 (Dense)', 1: '0.25', 2: '0.5 (Hybrid)', 3: '0.75', 4: '1.0 (Sparse)'}
    labels = [alpha_map.get(a, str(a)) for a in arms]
    
    plt.bar(arms, counts, color='purple', alpha=0.7)
    plt.xticks(arms, labels, rotation=45)
    plt.title('Agent Choice Distribution')
    plt.xlabel('Arm (Alpha Value)')
    plt.ylabel('Count')
    plt.tight_layout()
    plt.savefig(os.path.join(OUTPUT_DIR, 'arm_distribution.png'))
    plt.close()

def analyze_feature_importance(history_df, ground_truth_data):
    """
    Reconstructs the agent to analyze feature weights.
    """
    # Import locally to avoid issues if MABhybrid isn't in pythonpath
    # sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'MABhybrid'))
    # try:
    #     from linucb import LinUCBAgent
    # except ImportError:
    #     print("Could not import LinUCBAgent for feature analysis.")
    #     return

    print("\n--- Reconstructing Agent for Feature Analysis ---")
    # 5 features: Length, MaxIDF, AvgIDF, QFlag, Bias
    agent = LinUCBAgent(n_arms=5, n_features=5, alpha=0.1)
    
    # Replay history to update agent
    n_data = len(ground_truth_data)
    
    # Limit reconstruction to first 50k steps or full length to save time if large
    steps_to_replay = min(len(history_df), 50000) 
    
    for i in range(steps_to_replay):
        row = history_df.iloc[i]
        step_idx = int(row['step']) % n_data
        
        # Get context from ground truth
        features = np.array(ground_truth_data[step_idx]['features'])
        chosen_arm = int(row['chosen_arm'])
        reward = float(row['reward'])
        
        agent.update(chosen_arm, features, reward)

    feature_names = ['Length', 'MaxIDF', 'AvgIDF', 'QFlag', 'Bias']
    
    # Print weights for Sparse (Arm 4) vs Dense (Arm 0)
    print("\n[Interpretability Result] Learned Feature Weights:")
    
    # Helper to print vector
    def print_arm_weights(arm_idx, name):
        theta = np.linalg.inv(agent.A[arm_idx]) @ agent.b[arm_idx]
        print(f"\nArm {arm_idx} ({name}):")
        for f, w in zip(feature_names, theta):
            print(f"  {f:10s}: {w: .4f}")

    print_arm_weights(0, "Dense Retrieval")
    print_arm_weights(4, "Sparse Retrieval")

def main():
    result = load_data()
    if result is None: return
    df, ground_truth = result
    
    print(f"Loaded {len(df)} training steps.")
    
    plot_comparative_cumulative_reward(df)
    plot_comparative_cumulative_regret(df)
    plot_arm_distribution(df)
    analyze_feature_importance(df, ground_truth)
    
    print(f"\nAnalysis complete. Check {OUTPUT_DIR} for plots.")

if __name__ == "__main__":
    main()