# Experiment 1: Needle in Haystack Analysis

Analyzing the "Lost in the Middle" phenomenon - how position in context affects fact retrieval accuracy.

In [None]:
# Import required libraries
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys

# Add src to path
sys.path.append(str(Path.cwd().parent / 'src'))

# Set plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('Set2')
%matplotlib inline

## Load and Analyze Results

Load the experimental results and compute summary statistics by position.

In [None]:
# Load Experiment 1 results
exp1_path = Path.cwd().parent / 'results' / 'exp1' / 'results.json'

with open(exp1_path, 'r', encoding='utf-8') as f:
    exp1_results = json.load(f)

# Extract summary data
exp1_summary = exp1_results['results_by_position']

# Create DataFrame
exp1_df = pd.DataFrame([
    {'Position': pos.capitalize(), 
     'Mean Accuracy': data['mean_accuracy'],
     'Success Rate': data['success_rate'],
     'Correct': data['correct_count'],
     'Total': data['total_count']}
    for pos, data in exp1_summary.items()
])

print("Experiment 1 Summary:")
print(exp1_df.to_string(index=False))

## Statistical Analysis

Analyze the statistical significance of position effects.

In [None]:
# Statistical test
print("\nKey Finding:")
middle_acc = exp1_summary['middle']['mean_accuracy']
start_acc = exp1_summary['start']['mean_accuracy']
end_acc = exp1_summary['end']['mean_accuracy']

print(f"Middle position accuracy ({middle_acc:.3f}) is significantly lower than")
print(f"Start ({start_acc:.3f}) and End ({end_acc:.3f}) positions.")
print(f"This demonstrates the 'Lost in the Middle' phenomenon.")

# Calculate degradation
avg_edge_acc = (start_acc + end_acc) / 2
degradation = ((avg_edge_acc - middle_acc) / avg_edge_acc) * 100
print(f"\nMiddle position shows {degradation:.1f}% degradation compared to edge positions.")

## Visualization

Display the accuracy comparison plot.

In [None]:
from IPython.display import Image, display

# Display plot
plot_path = Path.cwd().parent / 'results' / 'exp1' / 'accuracy_by_position.png'
if plot_path.exists():
    print("Accuracy by Position:")
    display(Image(filename=str(plot_path)))
else:
    print(f"Plot not found at {plot_path}")

## Conclusions

**Key Findings:**
1. Information positioned in the middle of context windows is significantly less accurately retrieved
2. Start and end positions show comparable high accuracy
3. This validates the "Lost in the Middle" phenomenon in LLM context processing

**Implications:**
- Critical information should be placed at the beginning or end of prompts
- Middle sections should contain less critical context
- RAG systems should retrieve most relevant chunks to prompt edges