# Meta Analysis Workbook

This notebook provides a comprehensive meta-analysis workflow for analyzing policy performance correlations across multiple evaluation tasks using the Observatory API.

## Overview
- Query top policies from Observatory
- Evaluate policies across all specified tasks
- Analyze performance correlations
- Visualize results with heatmaps and PCA
- Generate comprehensive reports

In [1]:
! uv add ipykernel seaborn scikit-learn

[2mResolved [1m183 packages[0m [2min 12ms[0m[0m
[2mAudited [1m151 packages[0m [2min 997ms[0m[0m


In [2]:
# Import required libraries
import sys
from pathlib import Path

# Get absolute path to analysis modules
notebook_dir = Path(__file__).parent if '__file__' in globals() else Path.cwd()
analysis_dir = notebook_dir.parent.parent / 'analysis'

# Add to Python path if analysis directory exists
if analysis_dir.exists():
    sys.path.insert(0, str(analysis_dir.parent))
    print(f"✅ Added {analysis_dir.parent} to Python path")
else:
    print(f"⚠️ Analysis directory not found at {analysis_dir}")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from typing import List, Dict, Optional
import warnings
warnings.filterwarnings('ignore')

# Import our analysis modules
try:
    from analysis.observatory_client import ObservatoryClient
    from analysis.correlation_analyzer import CorrelationAnalyzer
    from analysis.meta_analyzer import MetaAnalyzer
    from analysis.visualization_utils import VisualizationUtils
    print("✅ All analysis modules imported successfully")
except ImportError as e:
    print(f"❌ Import error: {e}")

✅ Added /Users/bullm/Documents/GitHub/metta/mettabook to Python path
✅ All analysis modules imported successfully


## 1. Initialize Observatory Client

In [3]:
# Initialize the Observatory client
client = ObservatoryClient()

# Test connection
try:
    # Get available environments
    envs = client.get_environments()
    print(f"✅ Connected to Observatory API")
    print(f"📊 Found {len(envs)} available environments")
except Exception as e:
    print(f"❌ Connection failed: {e}")
    print("Please check your Observatory API configuration")

✅ Connected to Observatory API
📊 Found 80 available environments


## 2. Query Top Policies

In [4]:
# Configuration
TOP_N_POLICIES = 20
ENVIRONMENTS = envs

# Query top policies
print(f"🔍 Querying top {TOP_N_POLICIES} policies...")
policies = client.get_top_policies(
    n=TOP_N_POLICIES,
    environments=ENVIRONMENTS
)

print(f"✅ Found {len(policies)} policies")
print("\nTop 5 policies:")
for i, policy in enumerate(policies[:5]):
    print(f"  {i+1}. {policy['name']} (ID: {policy['id']})")

🔍 Querying top 20 policies...
🔍 Running diagnostic query...


Exception: API request failed: 408 - {"detail":"Query execution timed out after 20 seconds"}

## 3. Evaluate Policies Across All Tasks

In [5]:
# Get all available environments for evaluation
all_envs = client.get_environments()
print(f"📋 Evaluating across {len(all_envs)} environments")

# Extract policy IDs
policy_ids = [p['id'] for p in policies]

# Get evaluation data
print("🔄 Fetching evaluation data...")
eval_data = client.get_policy_evaluations(
    policy_ids=policy_ids,
    environments=all_envs
)

print(f"✅ Retrieved {len(eval_data)} evaluation records")

📋 Evaluating across 80 environments


NameError: name 'policies' is not defined

## 4. Create Performance Matrix

In [None]:
# Convert to DataFrame
df = pd.DataFrame(eval_data)

# Pivot to create performance matrix
performance_matrix = df.pivot_table(
    index='policy_id',
    columns='environment',
    values='value',
    aggfunc='mean'
)

print(f"📊 Performance matrix shape: {performance_matrix.shape}")
print(f"\nEnvironments: {list(performance_matrix.columns)}")
print(f"\nPolicies: {len(performance_matrix.index)}")

# Display first few rows
performance_matrix.head()

## 5. Correlation Analysis

In [None]:
# Initialize correlation analyzer
corr_analyzer = CorrelationAnalyzer()

# Calculate correlations
correlation_matrix = corr_analyzer.calculate_correlations(performance_matrix)

print("🔍 Correlation Analysis Results:")
print(f"\nAverage correlation: {corr_analyzer.get_average_correlation():.3f}")
print(f"Correlation range: {corr_analyzer.get_correlation_range()}")

# Find most/least correlated pairs
most_corr = corr_analyzer.get_most_correlated_pairs(n=3)
least_corr = corr_analyzer.get_least_correlated_pairs(n=3)

print(f"\nMost correlated pairs:")
for pair, corr in most_corr:
    print(f"  {pair[0]} ↔ {pair[1]}: {corr:.3f}")

print(f"\nLeast correlated pairs:")
for pair, corr in least_corr:
    print(f"  {pair[0]} ↔ {pair[1]}: {corr:.3f}")

## 6. Visualization

In [None]:
# Initialize visualization utils
viz_utils = VisualizationUtils()

# Set up plotting style
plt.style.use('default')
plt.rcParams['figure.figsize'] = (12, 8)

# Create correlation heatmap
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# 1. Correlation heatmap
sns.heatmap(correlation_matrix, annot=True, cmap='RdBu_r', center=0,
            square=True, fmt='.2f', ax=axes[0,0])
axes[0,0].set_title('Task Performance Correlations', fontsize=14, fontweight='bold')

# 2. Performance distribution
performance_matrix.boxplot(ax=axes[0,1])
axes[0,1].set_title('Performance Distribution by Task', fontsize=14, fontweight='bold')
axes[0,1].set_ylabel('Performance Score')
axes[0,1].tick_params(axis='x', rotation=45)

# 3. Performance heatmap
sns.heatmap(performance_matrix, cmap='viridis', ax=axes[1,0], cbar_kws={'label': 'Performance'})
axes[1,0].set_title('Policy Performance Heatmap', fontsize=14, fontweight='bold')
axes[1,0].set_xlabel('Tasks')
axes[1,0].set_ylabel('Policies')

# 4. Correlation distribution
corr_values = correlation_matrix.values[np.triu_indices_from(correlation_matrix.values, k=1)]
axes[1,1].hist(corr_values, bins=20, alpha=0.7, edgecolor='black')
axes[1,1].set_xlabel('Correlation Coefficient')
axes[1,1].set_ylabel('Frequency')
axes[1,1].set_title('Correlation Distribution', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

## 7. PCA Analysis

In [None]:
# Initialize meta analyzer
meta_analyzer = MetaAnalyzer()

# Perform PCA
pca_results = meta_analyzer.perform_pca(performance_matrix)

print("🔬 PCA Analysis Results:")
print(f"Explained variance ratio: {pca_results['explained_variance_ratio']}")
print(f"Cumulative explained variance: {np.cumsum(pca_results['explained_variance_ratio'])}")

# Plot PCA results
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Scree plot
n_components = len(pca_results['explained_variance_ratio'])
axes[0].plot(range(1, n_components + 1), pca_results['explained_variance_ratio'], 'bo-')
axes[0].set_xlabel('Principal Component')
axes[0].set_ylabel('Explained Variance Ratio')
axes[0].set_title('PCA Scree Plot', fontsize=14, fontweight='bold')
axes[0].grid(True)

# PCA loadings
loadings = pca_results['components']
im = axes[1].imshow(loadings, cmap='RdBu_r', aspect='auto')
axes[1].set_xticks(range(loadings.shape[1]))
axes[1].set_xticklabels([f'PC{i+1}' for i in range(loadings.shape[1])])
axes[1].set_yticks(range(loadings.shape[0]))
axes[1].set_yticklabels(performance_matrix.columns)
axes[1].set_title('PCA Component Loadings', fontsize=14, fontweight='bold')
plt.colorbar(im, ax=axes[1])

plt.tight_layout()
plt.show()

## 8. Summary Statistics

In [None]:
# Generate summary statistics
summary_stats = meta_analyzer.generate_summary_statistics(
    performance_matrix=performance_matrix,
    correlation_matrix=correlation_matrix,
    pca_results=pca_results
)

print("📈 Summary Statistics:")
print("=" * 50)
for key, value in summary_stats.items():
    if isinstance(value, float):
        print(f"{key}: {value:.3f}")
    else:
        print(f"{key}: {value}")

# Save results
output_file = '../data/meta_analysis_results.json'
meta_analyzer.save_results(summary_stats, output_file)
print(f"\n💾 Results saved to {output_file}")

## 9. Export Visualizations

In [None]:
# Save key visualizations
viz_dir = '../visualizations/'
os.makedirs(viz_dir, exist_ok=True)

# Correlation heatmap
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='RdBu_r', center=0,
            square=True, fmt='.2f')
plt.title('Task Performance Correlations', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig(f'{viz_dir}correlation_heatmap.png', dpi=300, bbox_inches='tight')
plt.close()

# Performance matrix
fig, ax = plt.subplots(figsize=(12, 8))
sns.heatmap(performance_matrix, cmap='viridis', cbar_kws={'label': 'Performance'})
plt.title('Policy Performance Matrix', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig(f'{viz_dir}policy_performance.png', dpi=300, bbox_inches='tight')
plt.close()

# PCA scree plot
fig, ax = plt.subplots(figsize=(8, 6))
n_components = len(pca_results['explained_variance_ratio'])
ax.plot(range(1, n_components + 1), pca_results['explained_variance_ratio'], 'bo-')
ax.set_xlabel('Principal Component')
ax.set_ylabel('Explained Variance Ratio')
ax.set_title('PCA Scree Plot', fontsize=14, fontweight='bold')
ax.grid(True)
plt.tight_layout()
plt.savefig(f'{viz_dir}pca_scree_plot.png', dpi=300, bbox_inches='tight')
plt.close()

print(f"✅ Visualizations saved to {viz_dir}")