# AILS Statistical Analysis

This notebook performs comprehensive statistical analysis for the AILS paper:
- Paired t-tests for performance comparison
- Cohen's d effect size calculations
- ANOVA for multi-group comparisons
- Confidence interval estimation
- Tukey HSD post-hoc analysis

**Author:** Amr Elshahed  
**Institution:** Universiti Sains Malaysia

In [None]:
import numpy as np
import pandas as pd
from scipy import stats
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import os
import glob

# Set style
plt.rcParams['figure.dpi'] = 150
plt.rcParams['font.family'] = 'serif'
sns.set_style('whitegrid')

print("AILS Statistical Analysis Notebook")
print(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

## 1. Load Experiment Data

In [None]:
# Load the most recent results file
results_dir = '../data/results/'

# Try to find existing results
synthetic_files = sorted(glob.glob(f'{results_dir}synthetic_results_*.csv'))
movingai_files = sorted(glob.glob(f'{results_dir}movingai_results_*.csv'))

df = None

if synthetic_files:
    df = pd.read_csv(synthetic_files[-1])
    print(f"Loaded: {synthetic_files[-1]}")
elif movingai_files:
    df = pd.read_csv(movingai_files[-1])
    print(f"Loaded: {movingai_files[-1]}")
else:
    # Generate sample data for demonstration
    print("No existing results found. Generating sample data...")
    np.random.seed(42)
    
    n_samples = 500
    methods = ['standard_astar', 'ails_base', 'ails_adaptive']
    
    data = []
    for method in methods:
        for i in range(n_samples):
            base_time = 10 if method == 'standard_astar' else (7 if method == 'ails_base' else 5)
            base_nodes = 1000 if method == 'standard_astar' else (700 if method == 'ails_base' else 500)
            
            data.append({
                'method': method,
                'pair_id': i,
                'time_ms': base_time + np.random.exponential(base_time * 0.3),
                'nodes_visited': int(base_nodes + np.random.exponential(base_nodes * 0.2)),
                'path_found': True,
                'grid_size': np.random.choice([100, 200, 300]),
                'pattern': np.random.choice(['uniform', 'clustered'])
            })
    
    df = pd.DataFrame(data)
    print(f"Generated {len(df)} sample records")

# Filter to successful paths only
df = df[df['path_found']].copy()
print(f"\nTotal records: {len(df)}")
print(f"Methods: {df['method'].unique()}")

## 2. Descriptive Statistics

In [None]:
# Summary statistics by method
summary = df.groupby('method').agg({
    'time_ms': ['mean', 'std', 'median', 'min', 'max', 'count'],
    'nodes_visited': ['mean', 'std', 'median', 'min', 'max']
}).round(3)

print("\n" + "="*70)
print("DESCRIPTIVE STATISTICS")
print("="*70)
print(summary.to_string())

## 3. Paired t-Tests

Compare AILS variants against standard A* using paired t-tests.

In [None]:
def paired_ttest(df, method1, method2, metric):
    """Perform paired t-test between two methods."""
    df1 = df[df['method'] == method1].sort_values('pair_id')
    df2 = df[df['method'] == method2].sort_values('pair_id')
    
    # Match by pair_id
    common_pairs = set(df1['pair_id']) & set(df2['pair_id'])
    
    if len(common_pairs) < 10:
        return None
    
    v1 = df1[df1['pair_id'].isin(common_pairs)][metric].values
    v2 = df2[df2['pair_id'].isin(common_pairs)][metric].values
    
    t_stat, p_value = stats.ttest_rel(v1, v2)
    
    # Cohen's d for paired samples
    diff = v1 - v2
    cohens_d = np.mean(diff) / np.std(diff, ddof=1)
    
    return {
        'n_pairs': len(common_pairs),
        'mean_1': np.mean(v1),
        'mean_2': np.mean(v2),
        'mean_diff': np.mean(diff),
        't_statistic': t_stat,
        'p_value': p_value,
        'cohens_d': cohens_d,
        'significant': p_value < 0.05
    }

# Run paired t-tests
print("\n" + "="*70)
print("PAIRED T-TESTS (vs Standard A*)")
print("="*70)

comparisons = [
    ('standard_astar', 'ails_base'),
    ('standard_astar', 'ails_adaptive'),
    ('ails_base', 'ails_adaptive')
]

ttest_results = []

for method1, method2 in comparisons:
    for metric in ['time_ms', 'nodes_visited']:
        result = paired_ttest(df, method1, method2, metric)
        if result:
            result['comparison'] = f"{method1} vs {method2}"
            result['metric'] = metric
            ttest_results.append(result)
            
            print(f"\n{result['comparison']} ({metric}):")
            print(f"  n = {result['n_pairs']} pairs")
            print(f"  Mean difference: {result['mean_diff']:.4f}")
            print(f"  t-statistic: {result['t_statistic']:.4f}")
            print(f"  p-value: {result['p_value']:.6f}")
            print(f"  Cohen's d: {result['cohens_d']:.4f}")
            print(f"  Significant (p<0.05): {result['significant']}")

df_ttest = pd.DataFrame(ttest_results)

## 4. Effect Size Interpretation

In [None]:
def interpret_cohens_d(d):
    """Interpret Cohen's d effect size."""
    d = abs(d)
    if d < 0.2:
        return "Negligible"
    elif d < 0.5:
        return "Small"
    elif d < 0.8:
        return "Medium"
    else:
        return "Large"

print("\n" + "="*70)
print("EFFECT SIZE INTERPRETATION")
print("="*70)
print("""
Cohen's d thresholds:
  |d| < 0.2:  Negligible effect
  0.2 <= |d| < 0.5:  Small effect
  0.5 <= |d| < 0.8:  Medium effect
  |d| >= 0.8:  Large effect
""")

if len(df_ttest) > 0:
    print("\nEffect sizes from our experiments:")
    for _, row in df_ttest.iterrows():
        interp = interpret_cohens_d(row['cohens_d'])
        print(f"  {row['comparison']} ({row['metric']}): d={row['cohens_d']:.3f} ({interp})")

## 5. ANOVA Analysis

In [None]:
print("\n" + "="*70)
print("ONE-WAY ANOVA")
print("="*70)

for metric in ['time_ms', 'nodes_visited']:
    groups = [df[df['method'] == m][metric].values for m in df['method'].unique()]
    
    f_stat, p_value = stats.f_oneway(*groups)
    
    print(f"\n{metric}:")
    print(f"  F-statistic: {f_stat:.4f}")
    print(f"  p-value: {p_value:.6f}")
    print(f"  Significant difference between groups: {p_value < 0.05}")

## 6. Tukey HSD Post-hoc Analysis

In [None]:
print("\n" + "="*70)
print("TUKEY HSD POST-HOC ANALYSIS")
print("="*70)

for metric in ['time_ms', 'nodes_visited']:
    print(f"\n{metric}:")
    
    tukey = pairwise_tukeyhsd(
        endog=df[metric],
        groups=df['method'],
        alpha=0.05
    )
    
    print(tukey)
    
    # Save as DataFrame
    tukey_df = pd.DataFrame(
        data=tukey._results_table.data[1:],
        columns=tukey._results_table.data[0]
    )
    
    print(f"\nTukey HSD Summary for {metric}:")
    print(tukey_df.to_string())

## 7. Confidence Intervals

In [None]:
def confidence_interval(data, confidence=0.95):
    """Calculate confidence interval for mean."""
    n = len(data)
    mean = np.mean(data)
    se = stats.sem(data)
    h = se * stats.t.ppf((1 + confidence) / 2, n - 1)
    return mean, mean - h, mean + h

print("\n" + "="*70)
print("95% CONFIDENCE INTERVALS")
print("="*70)

ci_results = []

for method in df['method'].unique():
    df_m = df[df['method'] == method]
    
    for metric in ['time_ms', 'nodes_visited']:
        mean, ci_low, ci_high = confidence_interval(df_m[metric])
        ci_results.append({
            'method': method,
            'metric': metric,
            'mean': mean,
            'ci_lower': ci_low,
            'ci_upper': ci_high,
            'ci_width': ci_high - ci_low
        })

df_ci = pd.DataFrame(ci_results)
print(df_ci.to_string())

## 8. Visualization

In [None]:
# Create visualizations
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# 1. Box plot - Time
ax = axes[0, 0]
df.boxplot(column='time_ms', by='method', ax=ax)
ax.set_title('Execution Time by Method')
ax.set_xlabel('Method')
ax.set_ylabel('Time (ms)')
plt.suptitle('')

# 2. Box plot - Nodes
ax = axes[0, 1]
df.boxplot(column='nodes_visited', by='method', ax=ax)
ax.set_title('Nodes Visited by Method')
ax.set_xlabel('Method')
ax.set_ylabel('Nodes')
plt.suptitle('')

# 3. Confidence interval plot - Time
ax = axes[1, 0]
df_time_ci = df_ci[df_ci['metric'] == 'time_ms']
x = range(len(df_time_ci))
ax.errorbar(x, df_time_ci['mean'], 
            yerr=[df_time_ci['mean'] - df_time_ci['ci_lower'],
                  df_time_ci['ci_upper'] - df_time_ci['mean']],
            fmt='o', capsize=5, capthick=2, markersize=8)
ax.set_xticks(x)
ax.set_xticklabels(df_time_ci['method'], rotation=45, ha='right')
ax.set_ylabel('Time (ms)')
ax.set_title('Mean Time with 95% CI')

# 4. Confidence interval plot - Nodes
ax = axes[1, 1]
df_nodes_ci = df_ci[df_ci['metric'] == 'nodes_visited']
ax.errorbar(x, df_nodes_ci['mean'], 
            yerr=[df_nodes_ci['mean'] - df_nodes_ci['ci_lower'],
                  df_nodes_ci['ci_upper'] - df_nodes_ci['mean']],
            fmt='o', capsize=5, capthick=2, markersize=8)
ax.set_xticks(x)
ax.set_xticklabels(df_nodes_ci['method'], rotation=45, ha='right')
ax.set_ylabel('Nodes Visited')
ax.set_title('Mean Nodes with 95% CI')

plt.tight_layout()
plt.savefig('../data/results/statistical_analysis.png', dpi=150, bbox_inches='tight')
plt.show()

## 9. Generate LaTeX Tables for Paper

In [None]:
print("\n" + "="*70)
print("LATEX TABLES FOR PAPER")
print("="*70)

# Statistical summary table
latex_stats = """\\begin{table}[ht]
\\centering
\\caption{Statistical Comparison of Pathfinding Methods}
\\label{tab:statistics}
\\begin{tabular}{lcccccc}
\\toprule
Comparison & Metric & Mean Diff & t-stat & p-value & Cohen's d & Sig. \\\\
\\midrule
"""

if len(df_ttest) > 0:
    for _, row in df_ttest.iterrows():
        sig = "Yes" if row['significant'] else "No"
        latex_stats += f"{row['comparison'].replace('_', ' ')} & {row['metric'].replace('_', ' ')} & {row['mean_diff']:.3f} & {row['t_statistic']:.2f} & {row['p_value']:.4f} & {row['cohens_d']:.2f} & {sig} \\\\ \n"

latex_stats += """\\bottomrule
\\end{tabular}
\\end{table}"""

print(latex_stats)

# Save
with open('../data/results/statistical_tables.tex', 'w') as f:
    f.write(latex_stats)
print("\nSaved to ../data/results/statistical_tables.tex")

In [None]:
print("\n" + "="*70)
print("Statistical analysis complete!")
print("="*70)