# Pilot Study Analysis

## Quick analysis of pilot data to validate approach before full studies

**Goals:**
1. Verify metrics are calculating correctly
2. Check for expected patterns (even with low N)
3. Identify any methodological issues
4. Estimate effect sizes for power analysis

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from pathlib import Path

# Set plotting style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

# Paths
DATA_DIR = Path('../data/processed')

## 1. Load Pilot Data

Load the most recent pilot results from Study 1

In [None]:
# Find most recent pilot file
pilot_files = sorted(DATA_DIR.glob('study1_results_*.csv'))

if not pilot_files:
    print("❌ No pilot data found. Run: python3 src/studies/study_1_habituation.py --pilot")
else:
    df = pd.read_csv(pilot_files[-1])
    print(f"✅ Loaded: {pilot_files[-1].name}")
    print(f"\nShape: {df.shape}")
    print(f"\nColumns: {df.columns.tolist()}")
    df.head()

## 2. Basic Descriptive Statistics

In [None]:
# Summary by condition and model
summary = df.groupby(['model', 'condition'])[['entropy', 'mtld', 'token_count']].agg(['mean', 'std', 'count'])
print(summary)

## 3. Visualize Key Metrics

In [None]:
# Entropy by condition
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for idx, model in enumerate(df['model'].unique()):
    model_df = df[df['model'] == model]
    
    sns.boxplot(data=model_df, x='condition', y='entropy', ax=axes[idx])
    axes[idx].set_title(f'{model} - Entropy by Condition')
    axes[idx].set_ylabel('Shannon Entropy (normalized)')

plt.tight_layout()
plt.savefig('../results/figures/pilot_entropy_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

## 4. Statistical Tests (Preliminary)

In [None]:
# Independent samples t-test for each model
print("="*60)
print("PRELIMINARY STATISTICAL TESTS (Pilot Data - Low Power)")
print("="*60 + "\n")

for model in df['model'].unique():
    model_df = df[df['model'] == model]
    
    rep = model_df[model_df['condition'] == 'repetitive']['entropy']
    nov = model_df[model_df['condition'] == 'novel']['entropy']
    
    t_stat, p_value = stats.ttest_ind(rep, nov)
    
    # Cohen's d
    pooled_std = np.sqrt((rep.var() + nov.var()) / 2)
    cohens_d = (rep.mean() - nov.mean()) / pooled_std if pooled_std > 0 else 0
    
    print(f"Model: {model}")
    print(f"  Repetitive mean: {rep.mean():.4f}")
    print(f"  Novel mean:      {nov.mean():.4f}")
    print(f"  t-statistic:     {t_stat:.4f}")
    print(f"  p-value:         {p_value:.4f} {'**' if p_value < 0.05 else ''}")
    print(f"  Cohen's d:       {cohens_d:.4f}")
    print()

## 5. Time Series: Entropy Over Trials

In [None]:
# Plot entropy trajectory over trials
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for idx, model in enumerate(df['model'].unique()):
    model_df = df[df['model'] == model]
    
    for condition in ['repetitive', 'novel']:
        cond_df = model_df[model_df['condition'] == condition]
        axes[idx].plot(cond_df['prompt_index'], cond_df['entropy'], 
                      marker='o', label=condition, alpha=0.7)
    
    axes[idx].set_xlabel('Trial Index')
    axes[idx].set_ylabel('Entropy')
    axes[idx].set_title(f'{model}')
    axes[idx].legend()

plt.suptitle('Entropy Trajectories Over Trials (Pilot)', y=1.02)
plt.tight_layout()
plt.savefig('../results/figures/pilot_entropy_trajectories.png', dpi=300, bbox_inches='tight')
plt.show()

## 6. Power Analysis for Full Study

Estimate required N based on observed effect sizes

In [None]:
from statsmodels.stats.power import TTestIndPower

print("\n" + "="*60)
print("POWER ANALYSIS FOR FULL STUDY")
print("="*60 + "\n")

power_analysis = TTestIndPower()

for model in df['model'].unique():
    model_df = df[df['model'] == model]
    
    rep = model_df[model_df['condition'] == 'repetitive']['entropy']
    nov = model_df[model_df['condition'] == 'novel']['entropy']
    
    pooled_std = np.sqrt((rep.var() + nov.var()) / 2)
    observed_d = abs((rep.mean() - nov.mean()) / pooled_std) if pooled_std > 0 else 0
    
    # Calculate required N for power=0.80, alpha=0.05
    if observed_d > 0:
        required_n = power_analysis.solve_power(effect_size=observed_d, alpha=0.05, power=0.80)
        print(f"Model: {model}")
        print(f"  Observed Cohen's d: {observed_d:.4f}")
        print(f"  Required N per condition (power=0.80): {int(np.ceil(required_n))}")
        print(f"  Planned N: 100")
        print(f"  Status: {'✅ Adequate' if required_n <= 100 else '⚠️  Underpowered'}")
        print()

## 7. Lexical Diversity (MTLD)

In [None]:
# Compare MTLD across conditions
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for idx, model in enumerate(df['model'].unique()):
    model_df = df[df['model'] == model]
    
    sns.boxplot(data=model_df, x='condition', y='mtld', ax=axes[idx])
    axes[idx].set_title(f'{model} - MTLD by Condition')
    axes[idx].set_ylabel('MTLD Score')

plt.tight_layout()
plt.savefig('../results/figures/pilot_mtld_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

## 8. Conclusions from Pilot

**Questions to address:**
1. Do we see expected direction of effects (even if not significant)?
2. Are metrics behaving as expected?
3. Any methodological issues to fix?
4. Is N=100 adequate for full study?

**Next steps:**
- Refine prompts if needed
- Adjust N if power analysis suggests
- Proceed to full Study 1

In [None]:
# Save summary statistics
summary.to_csv('../results/pilot_summary_statistics.csv')
print("✅ Pilot analysis complete! Summary saved.")