# Experiment 009: Snapshot Analysis + Fractional Translation

Analyze snapshots to find better solutions, then apply fractional translation.

In [1]:
import pandas as pd
import numpy as np
import os
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
from shapely.strtree import STRtree
import json

# Tree shape coordinates
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def create_tree_polygon(x, y, deg):
    coords = list(zip(TX, TY))
    poly = Polygon(coords)
    poly = rotate(poly, deg, origin=(0, 0))
    poly = translate(poly, x, y)
    return poly

def has_overlap(trees):
    if len(trees) <= 1:
        return False
    polygons = [create_tree_polygon(x, y, deg) for x, y, deg in trees]
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx == i:
                continue
            if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False

def parse_submission(filepath):
    try:
        df = pd.read_csv(filepath)
        if 'id' not in df.columns or 'x' not in df.columns:
            return None
        def parse_val(v):
            if isinstance(v, str) and v.startswith('s'):
                return float(v[1:])
            return float(v)
        df['x_val'] = df['x'].apply(parse_val)
        df['y_val'] = df['y'].apply(parse_val)
        df['deg_val'] = df['deg'].apply(parse_val)
        df['N'] = df['id'].apply(lambda x: int(str(x).split('_')[0]))
        result = {}
        for n, group in df.groupby('N'):
            trees = list(zip(group['x_val'], group['y_val'], group['deg_val']))
            result[n] = trees
        return result
    except:
        return None

def calculate_score(trees_by_n):
    total = 0
    for n, trees in trees_by_n.items():
        all_x, all_y = [], []
        for x, y, deg in trees:
            poly = create_tree_polygon(x, y, deg)
            bounds = poly.bounds
            all_x.extend([bounds[0], bounds[2]])
            all_y.extend([bounds[1], bounds[3]])
        side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
        total += side**2 / n
    return total

print("Functions defined")

Functions defined


In [2]:
# Analyze snapshots
print("Analyzing snapshots...")

snapshots_dir = '/home/nonroot/snapshots/santa-2025'
snapshot_dirs = sorted(os.listdir(snapshots_dir))

results = []
for snap_dir in snapshot_dirs[:50]:  # Check first 50
    csv_path = os.path.join(snapshots_dir, snap_dir, 'submission', 'submission.csv')
    if os.path.exists(csv_path):
        trees = parse_submission(csv_path)
        if trees and len(trees) == 200:
            score = calculate_score(trees)
            results.append({'dir': snap_dir, 'score': score, 'path': csv_path})
            if score < 71.0:
                print(f"{snap_dir}: score={score:.6f}")

print(f"\nAnalyzed {len(results)} valid submissions")

Analyzing snapshots...


21104669204: score=70.734327


21105319338: score=70.734327


21108486172: score=70.734327


21116303805: score=70.676102


21117525284: score=70.676104


21117626902: score=70.676145


21121776553: score=70.936674


21121942239: score=70.676102


21121943993: score=70.676102


21123763369: score=70.743774


21123768399: score=70.676102


21129617858: score=70.676764


21145961371: score=70.676102


21145965159: score=70.659944


21145966992: score=70.572798


21145968755: score=70.659959


21156850282: score=70.659437


21156851249: score=70.659437

Analyzed 38 valid submissions


In [3]:
# Sort by score and show best
results.sort(key=lambda x: x['score'])
print("\nTop 10 best scores:")
for r in results[:10]:
    print(f"  {r['dir']}: {r['score']:.6f}")

print(f"\nBaseline score: 70.647327")


Top 10 best scores:
  21145966992: 70.572798
  21156850282: 70.659437
  21156851249: 70.659437
  21145965159: 70.659944
  21145968755: 70.659959
  21123768399: 70.676102
  21121943993: 70.676102
  21116303805: 70.676102
  21121942239: 70.676102
  21145961371: 70.676102

Baseline score: 70.647327


In [4]:
# Check if any are better than baseline and have no overlaps
print("\nChecking for valid improvements over baseline...")

baseline_score = 70.647327
improvements = []

for r in results:
    if r['score'] < baseline_score - 0.0001:  # Meaningful improvement
        trees = parse_submission(r['path'])
        # Check for overlaps
        has_overlaps = False
        for n in range(1, 201):
            if n in trees and has_overlap(trees[n]):
                has_overlaps = True
                break
        
        if not has_overlaps:
            improvements.append(r)
            print(f"  VALID IMPROVEMENT: {r['dir']}: {r['score']:.6f}")
        else:
            print(f"  Has overlaps: {r['dir']}: {r['score']:.6f}")

print(f"\nFound {len(improvements)} valid improvements")


Checking for valid improvements over baseline...
  Has overlaps: 21145966992: 70.572798

Found 0 valid improvements


In [5]:
# If no improvements found, use baseline
if len(improvements) == 0:
    print("\nNo valid improvements found. Using baseline.")
    import shutil
    baseline_path = '/home/code/exploration/datasets/submission.csv'
    os.makedirs('/home/submission', exist_ok=True)
    shutil.copy(baseline_path, '/home/submission/submission.csv')
    final_score = baseline_score
else:
    # Use the best improvement
    best = improvements[0]
    print(f"\nUsing best improvement: {best['dir']} with score {best['score']:.6f}")
    import shutil
    os.makedirs('/home/submission', exist_ok=True)
    shutil.copy(best['path'], '/home/submission/submission.csv')
    final_score = best['score']

print(f"\nFinal score: {final_score:.6f}")


No valid improvements found. Using baseline.

Final score: 70.647327


In [6]:
# Save metrics
metrics = {
    'cv_score': final_score,
    'baseline_score': baseline_score,
    'snapshots_analyzed': len(results),
    'valid_improvements': len(improvements),
    'best_snapshot': improvements[0]['dir'] if improvements else None
}

with open('/home/code/experiments/009_fractional_translation/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"\nFinal Score: {final_score:.6f}")
print(f"Target: 68.919")
print(f"Gap to target: {final_score - 68.919:.6f}")


Final Score: 70.647327
Target: 68.919
Gap to target: 1.728327
