# Experiment 2: Context Window Size Impact Analysis

Analyzing how increasing context window size affects performance, latency, and token usage.

In [None]:
# Import required libraries
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys

# Add src to path
sys.path.append(str(Path.cwd().parent / 'src'))

# Set plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('Set2')
%matplotlib inline

## Load and Analyze Results

Load experimental results across different context sizes.

In [None]:
# Load Experiment 2 results
exp2_path = Path.cwd().parent / 'results' / 'exp2' / 'results.json'

with open(exp2_path, 'r', encoding='utf-8') as f:
    exp2_results = json.load(f)

# Create DataFrame
exp2_df = pd.DataFrame(exp2_results['results_summary'])

print("Experiment 2 Summary:")
print(exp2_df.to_string(index=False))

## Performance Degradation Analysis

Quantify how performance degrades with context size.

In [None]:
# Calculate degradation
initial_acc = exp2_df.iloc[0]['accuracy_mean']
final_acc = exp2_df.iloc[-1]['accuracy_mean']
degradation = ((initial_acc - final_acc) / initial_acc) * 100

print(f"\nAccuracy degradation: {degradation:.1f}% ({initial_acc:.3f} → {final_acc:.3f})")

# Latency increase
initial_lat = exp2_df.iloc[0]['latency_mean']
final_lat = exp2_df.iloc[-1]['latency_mean']
lat_increase = ((final_lat - initial_lat) / initial_lat) * 100

print(f"Latency increase: {lat_increase:.1f}% ({initial_lat:.2f}s → {final_lat:.2f}s)")

# Token usage increase
initial_tokens = exp2_df.iloc[0]['tokens_mean']
final_tokens = exp2_df.iloc[-1]['tokens_mean']
token_increase = ((final_tokens - initial_tokens) / initial_tokens) * 100

print(f"Token usage increase: {token_increase:.1f}% ({initial_tokens:.0f} → {final_tokens:.0f})")

## Visualization

Display the multi-panel impact analysis plot.

In [None]:
from IPython.display import Image, display

# Display plot
plot_path = Path.cwd().parent / 'results' / 'exp2' / 'context_size_impact.png'
if plot_path.exists():
    print("Context Size Impact (Accuracy, Latency, Tokens):")
    display(Image(filename=str(plot_path)))
else:
    print(f"Plot not found at {plot_path}")

## Conclusions

**Key Findings:**
1. Accuracy degrades as context size increases (diminishing returns)
2. Latency increases significantly with larger contexts
3. Token usage grows proportionally with document count

**Trade-offs:**
- **Small contexts (2-5 docs)**: High accuracy, low latency, efficient
- **Medium contexts (10-20 docs)**: Moderate accuracy, acceptable latency
- **Large contexts (50+ docs)**: Degraded accuracy, high latency, expensive

**Recommendations:**
- Use RAG to limit context size to most relevant chunks
- Optimal context size depends on accuracy requirements and budget
- Monitor token limits to avoid truncation