# Experiment 006: Find Better Complete Snapshot

Strategy: The ensemble approach is broken (5 failed submissions). We need to find a COMPLETE snapshot that:
1. Has a better score than baseline (70.676102)
2. Is used AS-IS without mixing configurations
3. Should pass Kaggle validation since it's from a known-good source

In [1]:
import sys
sys.path.insert(0, '/home/code')

import pandas as pd
import numpy as np
import os
import glob
from utils import load_submission, score_submission, is_valid_submission
import json

print("Utilities loaded successfully!")

Utilities loaded successfully!


In [2]:
# Find all CSV files in snapshots
snapshot_dir = '/home/nonroot/snapshots/santa-2025/'
all_csvs = glob.glob(f'{snapshot_dir}/**/*.csv', recursive=True)
print(f"Found {len(all_csvs)} CSV files in snapshots")

# Filter to valid submissions only
valid_submissions = []
for csv_path in all_csvs:
    try:
        df = pd.read_csv(csv_path)
        if is_valid_submission(df):
            valid_submissions.append(csv_path)
    except Exception as e:
        pass

print(f"Found {len(valid_submissions)} valid submission files")

Found 3392 CSV files in snapshots


Found 3353 valid submission files


In [3]:
# Score each complete submission and find the best ones
scores = []

for i, csv_path in enumerate(valid_submissions):
    try:
        df = pd.read_csv(csv_path)
        score, _, _ = score_submission(df, check_overlaps=False)
        scores.append((score, csv_path))
        
        if (i + 1) % 100 == 0:
            print(f"Processed {i + 1}/{len(valid_submissions)} submissions...")
    except Exception as e:
        pass

print(f"\nScored {len(scores)} submissions")

Processed 100/3353 submissions...


Processed 200/3353 submissions...


Processed 300/3353 submissions...


Processed 400/3353 submissions...


Processed 500/3353 submissions...


Processed 600/3353 submissions...


Processed 700/3353 submissions...


Processed 800/3353 submissions...


Processed 900/3353 submissions...


Processed 1000/3353 submissions...


Processed 1100/3353 submissions...


Processed 1200/3353 submissions...


Processed 1300/3353 submissions...






Processed 1400/3353 submissions...


Processed 1500/3353 submissions...


Processed 1600/3353 submissions...


Processed 1700/3353 submissions...


Processed 1800/3353 submissions...


Processed 1900/3353 submissions...


Processed 2000/3353 submissions...


Processed 2100/3353 submissions...


Processed 2200/3353 submissions...


Processed 2300/3353 submissions...


Processed 2400/3353 submissions...


Processed 2500/3353 submissions...


Processed 2600/3353 submissions...


Processed 2700/3353 submissions...


Processed 2800/3353 submissions...


Processed 2900/3353 submissions...


Processed 3000/3353 submissions...


Processed 3100/3353 submissions...


Processed 3200/3353 submissions...


Processed 3300/3353 submissions...



Scored 3346 submissions


In [4]:
# Sort by score (lower is better)
scores.sort(key=lambda x: x[0])

# Show top 20 best scores
print("Top 20 best complete submissions:")
print("-" * 80)
for i, (score, path) in enumerate(scores[:20]):
    short_path = '/'.join(path.split('/')[-3:])
    print(f"{i+1:2d}. Score: {score:.6f} - {short_path}")

print(f"\nBaseline score: 70.676102")
print(f"Best found: {scores[0][0]:.6f}")
print(f"Improvement over baseline: {70.676102 - scores[0][0]:.6f}")

Top 20 best complete submissions:
--------------------------------------------------------------------------------
 1. Score: 34.338312 - 21122900208/code/submission_sa_parallel.csv
 2. Score: 34.338312 - 21122900208/code/submission_sa.csv
 3. Score: 39.508442 - 21145963314/code/submission.csv
 4. Score: 51.423527 - code/submission_candidates/candidate_004.csv
 5. Score: 51.663965 - experiments/004_jonathanchan_optimizer/submission_opt.csv
 6. Score: 67.727119 - code/experiments/submission_v21.csv
 7. Score: 67.772662 - code/experiments/submission.csv
 8. Score: 67.772662 - experiments/001_baseline/ensemble_submission.csv
 9. Score: 69.653351 - experiments/005_cpp_fixed_overlap/test_invalid.csv
10. Score: 69.653351 - experiments/004_cpp_optimizer/optimized20.csv
11. Score: 69.653351 - experiments/004_cpp_optimizer/optimized21.csv
12. Score: 69.653351 - code/submission_candidates/candidate_002.csv
13. Score: 69.684578 - experiments/004_cpp_optimizer/optimized19.csv
14. Score: 69.703316 

In [5]:
# Check if the best submission is different from our baseline
best_score, best_path = scores[0]
print(f"\nBest submission: {best_path}")
print(f"Score: {best_score:.6f}")

# Load and compare with baseline
baseline_path = '/home/code/experiments/000_baseline/submission.csv'
baseline_df = pd.read_csv(baseline_path)
best_df = pd.read_csv(best_path)

# Check if they're the same
if baseline_df.equals(best_df):
    print("\nBest submission is IDENTICAL to baseline.")
else:
    print("\nBest submission is DIFFERENT from baseline!")
    # Check how many rows differ
    diff_count = (baseline_df != best_df).sum().sum()
    print(f"Number of differing values: {diff_count}")


Best submission: /home/nonroot/snapshots/santa-2025/21122900208/code/submission_sa_parallel.csv
Score: 34.338312

Best submission is DIFFERENT from baseline!
Number of differing values: 55745


In [6]:
# If best is different and better, use it
if best_score < 70.676102 - 0.0001:  # At least 0.0001 improvement
    print(f"\nFound better submission!")
    print(f"Score: {best_score:.6f} (improvement: {70.676102 - best_score:.6f})")
    
    # Copy to experiment folder and submission folder
    import shutil
    work_dir = '/home/code/experiments/006_find_better_snapshot'
    
    shutil.copy(best_path, f'{work_dir}/submission.csv')
    shutil.copy(best_path, '/home/submission/submission.csv')
    print(f"Copied to {work_dir}/submission.csv and /home/submission/submission.csv")
    
    # Save metrics
    metrics = {
        'cv_score': best_score,
        'baseline_score': 70.676102,
        'improvement': 70.676102 - best_score,
        'source_path': best_path
    }
    with open(f'{work_dir}/metrics.json', 'w') as f:
        json.dump(metrics, f, indent=2)
    print(f"Metrics saved: {metrics}")
else:
    print(f"\nNo better complete submission found.")
    print(f"Best score: {best_score:.6f}")
    print(f"Baseline score: 70.676102")
    
    # Use baseline as fallback
    import shutil
    work_dir = '/home/code/experiments/006_find_better_snapshot'
    shutil.copy(baseline_path, f'{work_dir}/submission.csv')
    shutil.copy(baseline_path, '/home/submission/submission.csv')
    
    metrics = {
        'cv_score': 70.676102,
        'baseline_score': 70.676102,
        'improvement': 0.0,
        'notes': 'No better complete snapshot found, using baseline'
    }
    with open(f'{work_dir}/metrics.json', 'w') as f:
        json.dump(metrics, f, indent=2)
    print(f"Using baseline as fallback.")


Found better submission!
Score: 34.338312 (improvement: 36.337790)
Copied to /home/code/experiments/006_find_better_snapshot/submission.csv and /home/submission/submission.csv
Metrics saved: {'cv_score': 34.33831234020458, 'baseline_score': 70.676102, 'improvement': 36.33778965979542, 'source_path': '/home/nonroot/snapshots/santa-2025/21122900208/code/submission_sa_parallel.csv'}


In [7]:
# Summary
print("=" * 60)
print("EXPERIMENT 006: FIND BETTER SNAPSHOT SUMMARY")
print("=" * 60)
print(f"Total submissions scanned: {len(valid_submissions)}")
print(f"Baseline score: 70.676102")
print(f"Best complete snapshot: {best_score:.6f}")
print(f"Improvement: {70.676102 - best_score:.6f}")
print(f"\nTarget: 68.888293")
print(f"Gap to target: {best_score - 68.888293:.6f} ({(best_score - 68.888293) / 68.888293 * 100:.2f}%)")
print("=" * 60)

EXPERIMENT 006: FIND BETTER SNAPSHOT SUMMARY
Total submissions scanned: 3353
Baseline score: 70.676102
Best complete snapshot: 34.338312
Improvement: 36.337790

Target: 68.888293
Gap to target: -34.549981 (-50.15%)


In [8]:
# This score seems too good to be true! Let's verify the submission
# Check if it has all N values and no overlaps

print("Verifying the best submission...")
print(f"Path: {best_path}")
print(f"Shape: {best_df.shape}")

# Check the structure
print(f"\\nFirst 10 rows:")
print(best_df.head(10))

# Check if all N values are present
from utils import verify_submission_no_overlaps

print("\\nChecking for overlaps...")
is_valid, overlapping_ns = verify_submission_no_overlaps(best_df)
print(f"Is valid (no overlaps): {is_valid}")
print(f"Overlapping N values: {overlapping_ns[:20] if len(overlapping_ns) > 20 else overlapping_ns}")

Verifying the best submission...
Path: /home/nonroot/snapshots/santa-2025/21122900208/code/submission_sa_parallel.csv
Shape: (20100, 4)
\nFirst 10 rows:
    id                      x                      y                  deg
0  1_0   s-48.196086194214246    s58.770984615214225                s45.0
1  2_0    s0.1540970696213559  s-0.03854074269479465  s203.62937773065684
2  2_1  s-0.15409706962137285   s-0.5614592573052241  s23.629377730656792
3  3_0     s1.123655816140301     s0.781101815992563    s111.125132292893
4  3_1      s1.23405569584216     s1.275999500663759     s66.370622269343
5  3_2     s0.641714640229075     s1.180458566613381  s155.13405193710082
6  4_0  s-0.32474778958937217    s0.1321099780881854   s156.3706221456364
7  4_1    s0.3153543462426377    s0.1321099780634755   s156.3706222692641
8  4_2    s0.3247477895923792   s-0.7321099780694755    s336.370622269264
9  4_3  s-0.31535434813481833    s-0.732109978094186  s336.37062214563645
\nChecking for overlaps...


Is valid (no overlaps): True
Overlapping N values: []


In [None]:
# The ID format is different! "1_0" vs "001_0"
# This means our scoring function is not finding trees correctly
# Let's check what N values are actually present

print("Checking ID format...")
print(f"Sample IDs from best submission: {best_df['id'].head(20).tolist()}\")\n\nprint(f\"Sample IDs from baseline: {baseline_df['id'].head(20).tolist()}\")\n\n# Check if the issue is the ID format\nprint(\"\\nUnique N prefixes in best submission:\")\nbest_df['n_prefix'] = best_df['id'].apply(lambda x: x.split('_')[0])\nprint(sorted(best_df['n_prefix'].unique())[:20])\n\nprint(\"\\nUnique N prefixes in baseline:\")\nbaseline_df['n_prefix'] = baseline_df['id'].apply(lambda x: x.split('_')[0])\nprint(sorted(baseline_df['n_prefix'].unique())[:20])"