# Loop 3 Analysis: Check Better Ensemble from Snapshots

Found a potential better ensemble (70.559) in snapshots. Need to verify it's valid.

In [1]:
import sys
sys.path.insert(0, '/home/code')

import pandas as pd
import numpy as np
from utils import (
    load_submission, score_submission, verify_submission_no_overlaps
)

print("Utilities loaded!")

Utilities loaded!


In [2]:
# Load the potential better ensemble
better_path = '/home/nonroot/snapshots/santa-2025/21156852373/code/ensemble_70.559.csv'
better_df = load_submission(better_path)
print(f"Loaded: {better_df.shape}")

# Calculate score
score, scores_by_n, _ = score_submission(better_df, check_overlaps=False)
print(f"Calculated score: {score:.6f}")

Loaded: (20100, 4)


Calculated score: 70.559048


In [3]:
# Check for overlaps with strict tolerance
print("\nChecking for overlaps with strict tolerance (1e-15)...")
is_valid, overlapping_ns = verify_submission_no_overlaps(better_df)
print(f"Is valid: {is_valid}")
print(f"Overlapping N values: {overlapping_ns}")


Checking for overlaps with strict tolerance (1e-15)...


Is valid: False
Overlapping N values: [2, 4, 5, 16, 35, 36, 40, 46, 47, 48, 53, 54, 55, 56, 59, 62, 63, 64, 69, 70, 71, 77, 78, 79, 80, 87, 88, 94, 95, 96, 97, 99, 100, 107, 108, 109, 110, 118, 119, 120, 124, 125, 126, 128, 129, 130, 131, 139, 140, 150, 152, 153, 154, 155, 156, 166, 167, 168, 175, 176, 177, 178, 179, 185, 190, 191, 192]


In [4]:
# Compare with our current best
current_path = '/home/code/experiments/002_fixed_ensemble/submission.csv'
current_df = load_submission(current_path)
current_score, _, _ = score_submission(current_df, check_overlaps=False)

print(f"\nComparison:")
print(f"  Current best: {current_score:.6f}")
print(f"  Snapshot ensemble: {score:.6f}")
print(f"  Improvement: {current_score - score:.6f}")
print(f"\nTarget: 68.888293")
print(f"Gap to target: {score - 68.888293:.6f}")


Comparison:
  Current best: 70.615786
  Snapshot ensemble: 70.559048
  Improvement: 0.056738

Target: 68.888293
Gap to target: 1.670755


In [5]:
# If valid, let's use it!
if is_valid:
    print("\n=== VALID SUBMISSION FOUND ===")
    print(f"Score: {score:.6f}")
    print("Copying to submission folder...")
    
    import shutil
    shutil.copy(better_path, '/home/submission/submission.csv')
    print("Done!")
else:
    print(f"\n=== SUBMISSION HAS OVERLAPS ===")
    print(f"Overlapping N values: {overlapping_ns}")
    print("Need to fix these before using.")


=== SUBMISSION HAS OVERLAPS ===
Overlapping N values: [2, 4, 5, 16, 35, 36, 40, 46, 47, 48, 53, 54, 55, 56, 59, 62, 63, 64, 69, 70, 71, 77, 78, 79, 80, 87, 88, 94, 95, 96, 97, 99, 100, 107, 108, 109, 110, 118, 119, 120, 124, 125, 126, 128, 129, 130, 131, 139, 140, 150, 152, 153, 154, 155, 156, 166, 167, 168, 175, 176, 177, 178, 179, 185, 190, 191, 192]
Need to fix these before using.
