# Check Snapshot Files for Valid Submissions

Looking for files with better scores that have 0 overlaps.

In [1]:
import numpy as np
import pandas as pd
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
import warnings
warnings.filterwarnings('ignore')

getcontext().prec = 25
scale_factor = Decimal('1e15')

# Tree geometry
trunk_w = Decimal('0.15')
trunk_h = Decimal('0.2')
base_w = Decimal('0.7')
mid_w = Decimal('0.4')
top_w = Decimal('0.25')
tip_y = Decimal('0.8')
tier_1_y = Decimal('0.5')
tier_2_y = Decimal('0.25')
base_y = Decimal('0.0')
trunk_bottom_y = -trunk_h

def create_tree_polygon_lb(center_x_str, center_y_str, angle_str):
    center_x = Decimal(center_x_str)
    center_y = Decimal(center_y_str)
    angle = Decimal(angle_str)
    
    initial_polygon = Polygon([
        (float(Decimal('0.0') * scale_factor), float(tip_y * scale_factor)),
        (float(top_w / Decimal('2') * scale_factor), float(tier_1_y * scale_factor)),
        (float(top_w / Decimal('4') * scale_factor), float(tier_1_y * scale_factor)),
        (float(mid_w / Decimal('2') * scale_factor), float(tier_2_y * scale_factor)),
        (float(mid_w / Decimal('4') * scale_factor), float(tier_2_y * scale_factor)),
        (float(base_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
        (float(trunk_w / Decimal('2') * scale_factor), float(base_y * scale_factor)),
        (float(trunk_w / Decimal('2') * scale_factor), float(trunk_bottom_y * scale_factor)),
        (float(-(trunk_w / Decimal('2')) * scale_factor), float(trunk_bottom_y * scale_factor)),
        (float(-(trunk_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
        (float(-(base_w / Decimal('2')) * scale_factor), float(base_y * scale_factor)),
        (float(-(mid_w / Decimal('4')) * scale_factor), float(tier_2_y * scale_factor)),
        (float(-(mid_w / Decimal('2')) * scale_factor), float(tier_2_y * scale_factor)),
        (float(-(top_w / Decimal('4')) * scale_factor), float(tier_1_y * scale_factor)),
        (float(-(top_w / Decimal('2')) * scale_factor), float(tier_1_y * scale_factor)),
    ])
    
    rotated = affinity.rotate(initial_polygon, float(angle), origin=(0, 0))
    polygon = affinity.translate(rotated,
                                 xoff=float(center_x * scale_factor),
                                 yoff=float(center_y * scale_factor))
    return polygon

def check_lb_overlaps(polygons):
    overlapping_pairs = []
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                overlapping_pairs.append((i, j))
    return overlapping_pairs

def check_file_overlaps(filepath):
    try:
        df = pd.read_csv(filepath)
        if len(df) != 20100:
            return None, f'Wrong row count: {len(df)}'
        
        overlap_groups = []
        for n in range(1, 201):
            prefix = f'{n:03d}_'
            config_df = df[df['id'].str.startswith(prefix)]
            
            if len(config_df) != n:
                return None, f'Group {n} has {len(config_df)} trees'
            
            polygons = []
            for _, row in config_df.iterrows():
                x = str(row['x']).replace('s', '')
                y = str(row['y']).replace('s', '')
                deg = str(row['deg']).replace('s', '')
                poly = create_tree_polygon_lb(x, y, deg)
                polygons.append(poly)
            
            overlapping = check_lb_overlaps(polygons)
            if overlapping:
                overlap_groups.append(n)
        
        return len(overlap_groups), overlap_groups
    except Exception as e:
        return None, str(e)

print('Functions defined')

Functions defined


In [2]:
# Check the 70.749 files
print('Checking 70.749 files...')

files_to_check = [
    '/tmp/test_70749.csv',
    '/home/nonroot/snapshots/santa-2025/21198893057/code/experiments/013_long_sa/solutions/submission_70.749531.csv',
]

for filepath in files_to_check:
    result, details = check_file_overlaps(filepath)
    if result is not None:
        print(f'{filepath}: {result} groups with overlaps')
        if result > 0 and result <= 10:
            print(f'  Overlap groups: {details}')
    else:
        print(f'{filepath}: ERROR - {details}')

Checking 70.749 files...


/tmp/test_70749.csv: 198 groups with overlaps


/home/nonroot/snapshots/santa-2025/21198893057/code/experiments/013_long_sa/solutions/submission_70.749531.csv: 198 groups with overlaps


In [3]:
# Check more snapshot files
import os
import glob

print('Searching for valid submissions in snapshots...')

# Find all CSV files with scores in the name
pattern = '/home/nonroot/snapshots/santa-2025/*/code/**/submission*.csv'
files = glob.glob(pattern, recursive=True)

print(f'Found {len(files)} submission files')

# Check files with promising scores (70.x)
valid_files = []
for filepath in files:
    basename = os.path.basename(filepath)
    if '70.' in basename or '69.' in basename or '68.' in basename:
        result, details = check_file_overlaps(filepath)
        if result == 0:
            print(f'VALID: {filepath}')
            valid_files.append(filepath)
        elif result is not None and result <= 5:
            print(f'{result} overlaps: {filepath}')

print(f'\nFound {len(valid_files)} valid files')

Searching for valid submissions in snapshots...
Found 1078 submission files


VALID: /home/nonroot/snapshots/santa-2025/21198893057/code/exploration/datasets/solutions/submission_70.926137.csv


VALID: /home/nonroot/snapshots/santa-2025/21190224310/code/exploration/datasets/solutions/submission_70.926137.csv


VALID: /home/nonroot/snapshots/santa-2025/21191207951/code/exploration/datasets/solutions/submission_70.926137.csv


VALID: /home/nonroot/snapshots/santa-2025/21222392487/code/exploration/datasets/solutions/submission_70.926137.csv


VALID: /home/nonroot/snapshots/santa-2025/21180221700/code/exploration/datasets/solutions/submission_70.926137.csv


VALID: /home/nonroot/snapshots/santa-2025/21222390477/code/exploration/datasets/solutions/submission_70.926137.csv


VALID: /home/nonroot/snapshots/santa-2025/21191209482/code/exploration/datasets/solutions/submission_70.926137.csv


VALID: /home/nonroot/snapshots/santa-2025/21198891805/code/exploration/datasets/solutions/submission_70.926137.csv


VALID: /home/nonroot/snapshots/santa-2025/21222373488/code/exploration/datasets/solutions/submission_70.926137.csv


VALID: /home/nonroot/snapshots/santa-2025/21191212682/code/exploration/datasets/solutions/submission_70.926137.csv


VALID: /home/nonroot/snapshots/santa-2025/21191206469/code/exploration/datasets/solutions/submission_70.926137.csv


VALID: /home/nonroot/snapshots/santa-2025/21180223864/code/exploration/datasets/solutions/submission_70.926137.csv


VALID: /home/nonroot/snapshots/santa-2025/21191211160/code/exploration/datasets/solutions/submission_70.926137.csv


VALID: /home/nonroot/snapshots/santa-2025/21180219583/code/exploration/datasets/solutions/submission_70.926137.csv


VALID: /home/nonroot/snapshots/santa-2025/21198925328/code/exploration/datasets/solutions/submission_70.926137.csv


VALID: /home/nonroot/snapshots/santa-2025/21198928571/code/exploration/datasets/solutions/submission_70.926137.csv


VALID: /home/nonroot/snapshots/santa-2025/21198790429/code/exploration/datasets/solutions/submission_70.926137.csv


VALID: /home/nonroot/snapshots/santa-2025/21222377956/code/exploration/datasets/solutions/submission_70.926137.csv


VALID: /home/nonroot/snapshots/santa-2025/21190222820/code/exploration/datasets/solutions/submission_70.926137.csv


VALID: /home/nonroot/snapshots/santa-2025/21222375510/code/exploration/datasets/solutions/submission_70.926137.csv


VALID: /home/nonroot/snapshots/santa-2025/21165874980/code/exploration/datasets/solutions/submission_70.926137.csv


VALID: /home/nonroot/snapshots/santa-2025/21198927060/code/exploration/datasets/solutions/submission_70.926137.csv

Found 22 valid files


In [4]:
# Also check the preoptimized folders in snapshots
print('Checking preoptimized folders...')

pattern = '/home/nonroot/snapshots/santa-2025/*/code/preoptimized/*.csv'
files = glob.glob(pattern, recursive=True)

print(f'Found {len(files)} preoptimized files')

for filepath in files:
    basename = os.path.basename(filepath)
    # Skip sample_submission.csv
    if 'sample' in basename.lower():
        continue
    result, details = check_file_overlaps(filepath)
    if result == 0:
        print(f'VALID: {filepath}')
        valid_files.append(filepath)
    elif result is not None and result <= 3:
        print(f'{result} overlaps: {filepath}')

Checking preoptimized folders...
Found 26 preoptimized files


VALID: /home/nonroot/snapshots/santa-2025/21129617858/code/preoptimized/submission_70_926149550346.csv


VALID: /home/nonroot/snapshots/santa-2025/21129617858/code/preoptimized/best_public.csv


VALID: /home/nonroot/snapshots/santa-2025/21129617858/code/preoptimized/71.97.csv


VALID: /home/nonroot/snapshots/santa-2025/21322577324/code/preoptimized/chistyakov_best.csv


VALID: /home/nonroot/snapshots/santa-2025/21322577324/code/preoptimized/submission_70_926.csv


2 overlaps: /home/nonroot/snapshots/santa-2025/21322577324/code/preoptimized/best_snapshot.csv


VALID: /home/nonroot/snapshots/santa-2025/21322578388/code/preoptimized/chistyakov_best.csv


VALID: /home/nonroot/snapshots/santa-2025/21322578388/code/preoptimized/submission_70_926.csv


2 overlaps: /home/nonroot/snapshots/santa-2025/21322578388/code/preoptimized/best_snapshot.csv


In [6]:
# Check the 71.97.csv file
print('Checking 71.97.csv...')
df_7197 = pd.read_csv('/home/nonroot/snapshots/santa-2025/21129617858/code/preoptimized/71.97.csv')
print(f'Rows: {len(df_7197)}')
print(df_7197.head())

# Calculate score
from shapely.ops import unary_union

def get_bounding_box_side(polygons):
    if not polygons:
        return 0.0
    union = unary_union(polygons)
    bounds = union.bounds
    width = bounds[2] - bounds[0]
    height = bounds[3] - bounds[1]
    return max(width, height) / float(scale_factor)

total_score = 0.0
for n in range(1, 201):
    prefix = f'{n:03d}_'
    config_df = df_7197[df_7197['id'].str.startswith(prefix)]
    
    if len(config_df) != n:
        print(f'Group {n} has {len(config_df)} trees')
        continue
    
    polygons = []
    for _, row in config_df.iterrows():
        x = str(row['x']).replace('s', '')
        y = str(row['y']).replace('s', '')
        deg = str(row['deg']).replace('s', '')
        poly = create_tree_polygon_lb(x, y, deg)
        polygons.append(poly)
    
    side = get_bounding_box_side(polygons)
    total_score += (side ** 2) / n

print(f'Score: {total_score:.6f}')
print(f'Target: 68.896973')
print(f'Gap: {total_score - 68.896973:.6f}')

Checking 71.97.csv...
Rows: 20100
      id                         x                        y  \
0  001_0           s-47.8763389434      s58.451237364399994   
1  002_0  s-6.52823020647238028147  s2.41591800757018626555   
2  002_1  s-6.83642434570761903245  s1.89299949280238766214   
3  003_0   s0.67780483906596800114  s1.22664656548317063667   
4  003_1   s1.27014589560965207582  s1.32218749553771219851   

                         deg  
0                      s45.0  
1  s203.62937770382222879562  
2   s23.62937768419021722366  
3  s155.13405193982077889814  
4   s66.37062189402990952658  


Score: 71.972027
Target: 68.896973
Gap: 3.075054
