# Check Snapshot Files for Valid Submissions

Looking for files with better scores that have 0 overlaps.

In [None]:
import numpy as np
import pandas as pd
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
import warnings
warnings.filterwarnings('ignore')

getcontext().prec = 25
scale_factor = Decimal('1e15')

# Tree geometry
trunk_w = Decimal('0.15')
trunk_h = Decimal('0.2')
base_w = Decimal('0.7')
mid_w = Decimal('0.4')
top_w = Decimal('0.25')
tip_y = Decimal('0.8')
tier_1_y = Decimal('0.5')
tier_2_y = Decimal('0.25')
base_y = Decimal('0.0')
trunk_bottom_y = -trunk_h

def create_tree_polygon_lb(center_x_str, center_y_str, angle_str):
    center_x = Decimal(center_x_str)
    center_y = Decimal(center_y_str)
    angle = Decimal(angle_str)
    
    initial_polygon = Polygon([
        (float(Decimal('0.0') * scale_factor), float(tip_y * scale_factor)),
        (float(top_w / Decimal('2') * scale_factor), float(tier_1_y * scale_factor)),
        (float(top_w / Decimal('4') * scale_factor), float(tier_1_y * scale_factor)),
        (float(mid_w / Decimal('2') * scale_factor), float(tier_2_y * scale_factor)),
        (float(mid_w / Decimal('4') * scale_factor), float(tier_2_y * scale_factor)),
        (float(base_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
        (float(trunk_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
        (float(trunk_w / Decimal('2') * scale_factor), float(trunk_bottom_y * scale_factor)),
        (float(-(trunk_w / Decimal('2')) * scale_factor), float(trunk_bottom_y * scale_factor)),
        (float(-(trunk_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
        (float(-(base_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
        (float(-(mid_w / Decimal('4')) * scale_factor), float(tier_2_y * scale_factor)),
        (float(-(mid_w / Decimal('2')) * scale_factor), float(tier_2_y * scale_factor)),
        (float(-(top_w / Decimal('4')) * scale_factor), float(tier_1_y * scale_factor)),
        (float(-(top_w / Decimal('2')) * scale_factor), float(tier_1_y * scale_factor)),
    ])
    
    rotated = affinity.rotate(initial_polygon, float(angle), origin=(0, 0))
    polygon = affinity.translate(rotated,
                                 xoff=float(center_x * scale_factor),
                                 yoff=float(center_y * scale_factor))
    return polygon

def check_lb_overlaps(polygons):
    overlapping_pairs = []
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                overlapping_pairs.append((i, j))
    return overlapping_pairs

def check_file_overlaps(filepath):
    try:
        df = pd.read_csv(filepath)
        if len(df) != 20100:
            return None, f'Wrong row count: {len(df)}'
        
        overlap_groups = []
        for n in range(1, 201):
            prefix = f'{n:03d}_'
            config_df = df[df['id'].str.startswith(prefix)]
            
            if len(config_df) != n:
                return None, f'Group {n} has {len(config_df)} trees'
            
            polygons = []
            for _, row in config_df.iterrows():
                x = str(row['x']).replace('s', '')
                y = str(row['y']).replace('s', '')
                deg = str(row['deg']).replace('s', '')
                poly = create_tree_polygon_lb(x, y, deg)
                polygons.append(poly)
            
            overlapping = check_lb_overlaps(polygons)
            if overlapping:
                overlap_groups.append(n)
        
        return len(overlap_groups), overlap_groups
    except Exception as e:
        return None, str(e)

print('Functions defined')

In [None]:
# Check the 70.749 files
print('Checking 70.749 files...')

files_to_check = [
    '/tmp/test_70749.csv',
    '/home/nonroot/snapshots/santa-2025/21198893057/code/experiments/013_long_sa/solutions/submission_70.749531.csv',
]

for filepath in files_to_check:
    result, details = check_file_overlaps(filepath)
    if result is not None:
        print(f'{filepath}: {result} groups with overlaps')
        if result > 0 and result <= 10:
            print(f'  Overlap groups: {details}')
    else:
        print(f'{filepath}: ERROR - {details}')

In [None]:
# Check more snapshot files
import os
import glob

print('Searching for valid submissions in snapshots...')

# Find all CSV files with scores in the name
pattern = '/home/nonroot/snapshots/santa-2025/*/code/**/submission*.csv'
files = glob.glob(pattern, recursive=True)

print(f'Found {len(files)} submission files')

# Check files with promising scores (70.x)
valid_files = []
for filepath in files:
    basename = os.path.basename(filepath)
    if '70.' in basename or '69.' in basename or '68.' in basename:
        result, details = check_file_overlaps(filepath)
        if result == 0:
            print(f'VALID: {filepath}')
            valid_files.append(filepath)
        elif result is not None and result <= 5:
            print(f'{result} overlaps: {filepath}')

print(f'\nFound {len(valid_files)} valid files')

In [None]:
# Also check the preoptimized folders in snapshots
print('Checking preoptimized folders...')

pattern = '/home/nonroot/snapshots/santa-2025/*/code/preoptimized/*.csv'
files = glob.glob(pattern, recursive=True)

print(f'Found {len(files)} preoptimized files')

for filepath in files:
    basename = os.path.basename(filepath)
    # Skip sample_submission.csv
    if 'sample' in basename.lower():
        continue
    result, details = check_file_overlaps(filepath)
    if result == 0:
        print(f'VALID: {filepath}')
        valid_files.append(filepath)
    elif result is not None and result <= 3:
        print(f'{result} overlaps: {filepath}')