# Loop 5 Analysis: Comprehensive Ensemble Strategy

## Key Findings
1. The strict ensemble (70.615745) was REJECTED due to overlaps in N=3
2. The simple ensemble uses baseline N=3 coordinates and should be valid
3. Current ensemble only scans 4 directories (~30 CSVs) out of 107 directories (3339 CSVs)
4. Gap to target: 1.72 points (2.5%)

In [None]:
import pandas as pd
import numpy as np
import glob
import os
from collections import defaultdict

# Count CSV files across all snapshot directories
all_csvs = glob.glob('/home/nonroot/snapshots/santa-2025/**/*.csv', recursive=True)
print(f'Total CSV files in snapshots: {len(all_csvs)}')

# Count by directory
dir_counts = defaultdict(int)
for f in all_csvs:
    parts = f.split('/')
    if len(parts) >= 5:
        dir_counts[parts[4]] += 1

print(f'\nNumber of snapshot directories: {len(dir_counts)}')
print(f'\nTop 10 directories by CSV count:')
for d, c in sorted(dir_counts.items(), key=lambda x: -x[1])[:10]:
    print(f'  {d}: {c} CSVs')

In [None]:
# Check what the current ensemble is using vs what's available
current_dirs = [
    '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/',
    '/home/nonroot/snapshots/santa-2025/21116303805/code/experiments/',
    '/home/nonroot/snapshots/santa-2025/21328309254/code/experiments/',
    '/home/code/experiments/',
]

current_csvs = []
for d in current_dirs:
    current_csvs.extend(glob.glob(d + '**/*.csv', recursive=True))

print(f'Current ensemble uses: {len(current_csvs)} CSVs')
print(f'Total available: {len(all_csvs)} CSVs')
print(f'Unused: {len(all_csvs) - len(current_csvs)} CSVs ({100*(len(all_csvs) - len(current_csvs))/len(all_csvs):.1f}%)')

In [None]:
# Analyze the current best ensemble
import json

with open('/home/code/experiments/006_safe_ensemble/metrics.json') as f:
    metrics = json.load(f)

print('Current Simple Ensemble:')
print(f'  Score: {metrics["cv_score"]:.6f}')
print(f'  Baseline: {metrics["baseline_score"]:.6f}')
print(f'  Improvement: {metrics["improvement"]:.6f}')
print(f'  Target: 68.89138')
print(f'  Gap: {metrics["cv_score"] - 68.89138:.6f} ({100*(metrics["cv_score"] - 68.89138)/68.89138:.2f}%)')

In [None]:
# Check submission history
with open('/home/code/session_state.json') as f:
    state = json.load(f)

print('Submission History:')
for s in state['submissions']:
    error = s.get('error', 'None')
    lb = s.get('lb_score', 'N/A')
    print(f"  {s['model_name']}: CV={s['cv_score']:.6f}, LB={lb}, Error={error}")

print(f'\nRemaining submissions: {state["remaining_submissions"]}')
print(f'\nKey insight: The simple ensemble (70.615745) has NOT been submitted yet!')
print('The last submission (005_strict_ensemble) was REJECTED due to overlaps in N=3.')

In [None]:
# Calculate potential improvement from scanning all CSVs
# If current ensemble found 0.06 improvement from 30 CSVs,
# scanning 3339 CSVs could find more

improvement_per_csv = 0.06 / 30  # rough estimate
potential_from_all = improvement_per_csv * 3339
print(f'Rough estimate of potential improvement from all CSVs: {potential_from_all:.2f} points')
print('(This is a very rough upper bound - actual improvement will be less due to overlap)')
print()
print('More realistic estimate:')
print('  - Current: 0.06 improvement from 30 CSVs')
print('  - If we find 10x more unique good configs: 0.6 improvement')
print('  - If we find 5x more: 0.3 improvement')
print('  - If we find 2x more: 0.12 improvement')
print()
print('Even 0.1 improvement would be significant!')