# Experiment 003: Fractional Translation Optimization

Implement fractional translation in pure Python to optimize N=2-20.

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union
import json
import time
from decimal import Decimal, getcontext

getcontext().prec = 30

# Tree polygon vertices
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def create_tree_polygon(x, y, deg):
    """Create a tree polygon at position (x, y) with rotation deg."""
    poly = Polygon(zip(TX, TY))
    rotated = affinity.rotate(poly, deg, origin=(0, 0))
    return affinity.translate(rotated, x, y)

def check_overlap(poly1, poly2):
    """Check if two polygons overlap (not just touch)."""
    if not poly1.intersects(poly2):
        return False
    if poly1.touches(poly2):
        return False
    intersection = poly1.intersection(poly2)
    return intersection.area > 1e-15

def parse_value(s):
    """Parse 's' prefixed value."""
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

print("Functions defined successfully")

Functions defined successfully


In [2]:
# Load baseline solution
baseline_df = pd.read_csv('/home/code/experiments/002_valid_baseline/submission.csv')
baseline_df['x'] = baseline_df['x'].apply(parse_value)
baseline_df['y'] = baseline_df['y'].apply(parse_value)
baseline_df['deg'] = baseline_df['deg'].apply(parse_value)
baseline_df['n'] = baseline_df['id'].apply(lambda x: int(x.split('_')[0]))

print(f"Loaded baseline with {len(baseline_df)} rows")
print(f"N values: {baseline_df['n'].min()} to {baseline_df['n'].max()}")

# Load baseline per-N scores
with open('/home/code/experiments/002_valid_baseline/metrics.json', 'r') as f:
    baseline_metrics = json.load(f)

baseline_per_n = baseline_metrics['per_n_scores']
print(f"\nBaseline total score: {baseline_metrics['cv_score']:.6f}")

Loaded baseline with 20100 rows
N values: 1 to 200

Baseline total score: 70.615102


In [3]:
def get_trees_for_n(df, n):
    """Get list of tree dicts for a given N."""
    trees_df = df[df['n'] == n].copy()
    trees = []
    for _, row in trees_df.iterrows():
        trees.append({'x': row['x'], 'y': row['y'], 'deg': row['deg'], 'id': row['id']})
    return trees

def get_bounding_box_side(trees):
    """Get the side length of the bounding box for a list of trees."""
    all_polys = [create_tree_polygon(t['x'], t['y'], t['deg']) for t in trees]
    combined = unary_union(all_polys)
    bounds = combined.bounds  # (minx, miny, maxx, maxy)
    return max(bounds[2] - bounds[0], bounds[3] - bounds[1])

def has_any_overlap(trees, idx):
    """Check if tree at idx overlaps with any other tree."""
    poly_i = create_tree_polygon(trees[idx]['x'], trees[idx]['y'], trees[idx]['deg'])
    for j in range(len(trees)):
        if j != idx:
            poly_j = create_tree_polygon(trees[j]['x'], trees[j]['y'], trees[j]['deg'])
            if check_overlap(poly_i, poly_j):
                return True
    return False

# Test on N=2
test_trees = get_trees_for_n(baseline_df, 2)
print(f"N=2 has {len(test_trees)} trees")
print(f"N=2 bounding box side: {get_bounding_box_side(test_trees):.6f}")
print(f"N=2 baseline score: {baseline_per_n['2']['score']:.6f}")

N=2 has 2 trees
N=2 bounding box side: 0.949504
N=2 baseline score: 0.450779


In [6]:
def fractional_translation(trees, max_iter=50, verbose=False):
    """Improve configuration by tiny translations and rotations."""
    # Step sizes to try (from largest to smallest)
    frac_steps = [0.01, 0.005, 0.002, 0.001, 0.0005, 0.0002, 0.0001, 0.00005, 0.00002, 0.00001]
    
    # 8 directions: up, down, left, right, and diagonals
    directions = [(0, 1), (0, -1), (1, 0), (-1, 0), (1, 1), (1, -1), (-1, 1), (-1, -1)]
    
    # Rotation steps
    rotation_steps = [5, 2, 1, 0.5, 0.2, 0.1, 0.05]
    
    best_side = get_bounding_box_side(trees)
    initial_side = best_side
    
    for iteration in range(max_iter):
        improved = False
        
        # Try translations
        for idx in range(len(trees)):
            for step in frac_steps:
                for dx, dy in directions:
                    old_x, old_y = trees[idx]['x'], trees[idx]['y']
                    trees[idx]['x'] = old_x + dx * step
                    trees[idx]['y'] = old_y + dy * step
                    
                    # Check if valid (no overlaps)
                    if not has_any_overlap(trees, idx):
                        new_side = get_bounding_box_side(trees)
                        if new_side < best_side - 1e-12:
                            best_side = new_side
                            improved = True
                            if verbose:
                                print(f"  Iter {iteration}, tree {idx} trans: {initial_side:.6f} -> {best_side:.6f}")
                        else:
                            trees[idx]['x'] = old_x
                            trees[idx]['y'] = old_y
                    else:
                        trees[idx]['x'] = old_x
                        trees[idx]['y'] = old_y
        
        # Try rotations
        for idx in range(len(trees)):
            for rot_step in rotation_steps:
                for d_rot in [rot_step, -rot_step]:
                    old_deg = trees[idx]['deg']
                    trees[idx]['deg'] = (old_deg + d_rot) % 360
                    
                    if not has_any_overlap(trees, idx):
                        new_side = get_bounding_box_side(trees)
                        if new_side < best_side - 1e-12:
                            best_side = new_side
                            improved = True
                            if verbose:
                                print(f"  Iter {iteration}, tree {idx} rot: {initial_side:.6f} -> {best_side:.6f}")
                        else:
                            trees[idx]['deg'] = old_deg
                    else:
                        trees[idx]['deg'] = old_deg
        
        if not improved:
            break
    
    return trees, best_side

print("Fractional translation function defined (with rotation)")

Fractional translation function defined (with rotation)


In [7]:
# Test on N=2 first
print("Testing fractional translation on N=2...")
test_trees = get_trees_for_n(baseline_df, 2)
initial_side = get_bounding_box_side(test_trees)
print(f"Initial side: {initial_side:.6f}")

start_time = time.time()
optimized_trees, new_side = fractional_translation(test_trees, max_iter=20, verbose=True)
elapsed = time.time() - start_time

print(f"\nOptimized side: {new_side:.6f}")
print(f"Improvement: {initial_side - new_side:.6f}")
print(f"Time: {elapsed:.2f}s")

old_score = initial_side ** 2 / 2
new_score = new_side ** 2 / 2
print(f"Score: {old_score:.6f} -> {new_score:.6f} (improvement: {old_score - new_score:.6f})")

Testing fractional translation on N=2...
Initial side: 0.949504

Optimized side: 0.949504
Improvement: 0.000000
Time: 0.05s
Score: 0.450779 -> 0.450779 (improvement: 0.000000)


In [8]:
# Optimize N=2 to N=20
print("Optimizing N=2 to N=20...")
print("="*60)

improvements = {}
optimized_solutions = {}
total_improvement = 0

for n in range(2, 21):
    trees = get_trees_for_n(baseline_df, n)
    initial_side = get_bounding_box_side(trees)
    old_score = initial_side ** 2 / n
    
    start_time = time.time()
    optimized_trees, new_side = fractional_translation(trees, max_iter=30)
    elapsed = time.time() - start_time
    
    new_score = new_side ** 2 / n
    improvement = old_score - new_score
    
    if improvement > 1e-8:
        improvements[n] = improvement
        optimized_solutions[n] = optimized_trees
        total_improvement += improvement
        print(f"✅ N={n:3d}: {old_score:.6f} -> {new_score:.6f} (improved by {improvement:.6f}) [{elapsed:.1f}s]")
    else:
        print(f"   N={n:3d}: {old_score:.6f} (no improvement) [{elapsed:.1f}s]")

print("="*60)
print(f"Total improvement from N=2-20: {total_improvement:.6f}")
print(f"Improved {len(improvements)} N values")

Optimizing N=2 to N=20...
   N=  2: 0.450779 (no improvement) [0.0s]
   N=  3: 0.434745 (no improvement) [0.1s]


   N=  4: 0.416545 (no improvement) [0.1s]


   N=  5: 0.416850 (no improvement) [0.2s]


   N=  6: 0.399610 (no improvement) [0.3s]


   N=  7: 0.399897 (no improvement) [0.3s]


   N=  8: 0.385407 (no improvement) [0.4s]


   N=  9: 0.387415 (no improvement) [0.5s]


   N= 10: 0.376630 (no improvement) [0.4s]


   N= 11: 0.374924 (no improvement) [0.6s]


   N= 12: 0.372724 (no improvement) [0.7s]


   N= 13: 0.372294 (no improvement) [0.7s]


   N= 14: 0.369543 (no improvement) [0.9s]


   N= 15: 0.376949 (no improvement) [1.3s]


   N= 16: 0.374128 (no improvement) [1.2s]


   N= 17: 0.370040 (no improvement) [1.4s]


   N= 18: 0.368771 (no improvement) [1.6s]


   N= 19: 0.368615 (no improvement) [1.5s]


   N= 20: 0.376057 (no improvement) [1.7s]
Total improvement from N=2-20: 0.000000
Improved 0 N values


In [9]:
# The baseline is already well-optimized. Let's try a per-N ensemble approach
# to find better solutions from the 116 snapshots

import glob
import os

print("Searching for better per-N solutions in snapshots...")

# Find all submission files
snapshot_submissions = glob.glob('/home/nonroot/snapshots/santa-2025/*/submission/submission.csv')
print(f"Found {len(snapshot_submissions)} snapshot submissions")

# Also check code folders
code_submissions = glob.glob('/home/nonroot/snapshots/santa-2025/*/code/submission.csv')
print(f"Found {len(code_submissions)} code submissions")

all_submissions = snapshot_submissions + code_submissions
print(f"Total: {len(all_submissions)} submissions to check")

Searching for better per-N solutions in snapshots...
Found 90 snapshot submissions
Found 49 code submissions
Total: 139 submissions to check


In [10]:
# Load and score each submission, tracking best per-N
print("Loading and scoring submissions...")

best_per_n = {}  # {n: {'score': score, 'side': side, 'trees': trees, 'source': source}}

# Initialize with baseline
for n in range(1, 201):
    trees = get_trees_for_n(baseline_df, n)
    side = get_bounding_box_side(trees)
    score = side ** 2 / n
    best_per_n[n] = {
        'score': score,
        'side': side,
        'trees': trees,
        'source': 'baseline'
    }

print(f"Initialized with baseline scores")
print(f"Baseline total: {sum(best_per_n[n]['score'] for n in range(1, 201)):.6f}")

# Check a sample of submissions
sample_submissions = all_submissions[:30]  # Check first 30
print(f"\nChecking {len(sample_submissions)} submissions...")

Loading and scoring submissions...


Initialized with baseline scores
Baseline total: 70.615102

Checking 30 submissions...


In [11]:
improvements_found = []

for sub_path in sample_submissions:
    try:
        # Load submission
        sub_df = pd.read_csv(sub_path)
        if len(sub_df) < 20000:
            continue
            
        sub_df['x'] = sub_df['x'].apply(parse_value)
        sub_df['y'] = sub_df['y'].apply(parse_value)
        sub_df['deg'] = sub_df['deg'].apply(parse_value)
        sub_df['n'] = sub_df['id'].apply(lambda x: int(x.split('_')[0]))
        
        # Check each N
        for n in range(1, 201):
            trees = get_trees_for_n(sub_df, n)
            if len(trees) != n:
                continue
                
            side = get_bounding_box_side(trees)
            score = side ** 2 / n
            
            if score < best_per_n[n]['score'] - 1e-10:
                # Validate no overlaps
                valid = True
                for i in range(len(trees)):
                    if has_any_overlap(trees, i):
                        valid = False
                        break
                
                if valid:
                    improvement = best_per_n[n]['score'] - score
                    improvements_found.append((n, improvement, sub_path))
                    best_per_n[n] = {
                        'score': score,
                        'side': side,
                        'trees': trees,
                        'source': sub_path
                    }
                    print(f"✅ N={n}: Found better solution! {best_per_n[n]['score']:.6f} -> {score:.6f} (improved by {improvement:.6f})")
    except Exception as e:
        pass

print(f"\nFound {len(improvements_found)} improvements")
new_total = sum(best_per_n[n]['score'] for n in range(1, 201))
print(f"New total score: {new_total:.6f}")

✅ N=154: Found better solution! 0.334158 -> 0.334158 (improved by 0.000000)


✅ N=138: Found better solution! 0.341028 -> 0.341028 (improved by 0.000000)


✅ N=22: Found better solution! 0.375258 -> 0.375258 (improved by 0.000000)


✅ N=67: Found better solution! 0.353854 -> 0.353854 (improved by 0.000000)


✅ N=74: Found better solution! 0.352709 -> 0.352709 (improved by 0.000000)


✅ N=80: Found better solution! 0.344880 -> 0.344880 (improved by 0.000000)


✅ N=96: Found better solution! 0.346397 -> 0.346397 (improved by 0.000000)


✅ N=111: Found better solution! 0.343919 -> 0.343919 (improved by 0.000000)


✅ N=112: Found better solution! 0.346450 -> 0.346450 (improved by 0.000000)


✅ N=113: Found better solution! 0.348370 -> 0.348370 (improved by 0.000000)


✅ N=114: Found better solution! 0.345620 -> 0.345620 (improved by 0.000000)


✅ N=117: Found better solution! 0.341955 -> 0.341955 (improved by 0.000000)


✅ N=119: Found better solution! 0.339077 -> 0.339077 (improved by 0.000000)


✅ N=120: Found better solution! 0.337683 -> 0.337683 (improved by 0.000000)


✅ N=124: Found better solution! 0.346839 -> 0.346839 (improved by 0.000000)


✅ N=125: Found better solution! 0.345548 -> 0.345548 (improved by 0.000000)


✅ N=140: Found better solution! 0.340098 -> 0.340098 (improved by 0.000000)


✅ N=155: Found better solution! 0.332069 -> 0.332069 (improved by 0.000000)


✅ N=167: Found better solution! 0.332835 -> 0.332835 (improved by 0.000000)


✅ N=168: Found better solution! 0.332475 -> 0.332475 (improved by 0.000000)


✅ N=170: Found better solution! 0.341484 -> 0.341484 (improved by 0.000000)


✅ N=175: Found better solution! 0.338682 -> 0.338682 (improved by 0.000000)


✅ N=178: Found better solution! 0.334442 -> 0.334442 (improved by 0.000000)


✅ N=182: Found better solution! 0.329988 -> 0.329988 (improved by 0.000000)


✅ N=185: Found better solution! 0.338562 -> 0.338562 (improved by 0.000000)


✅ N=188: Found better solution! 0.339480 -> 0.339480 (improved by 0.000000)


✅ N=189: Found better solution! 0.338821 -> 0.338821 (improved by 0.000000)


✅ N=190: Found better solution! 0.338231 -> 0.338231 (improved by 0.000000)


✅ N=194: Found better solution! 0.332999 -> 0.332999 (improved by 0.000000)



Found 29 improvements
New total score: 70.615102


In [None]:
# Continue with N=21 to N=50 (larger N, may take longer)
print("\nOptimizing N=21 to N=50...")
print("="*60)

for n in range(21, 51):
    trees = get_trees_for_n(baseline_df, n)
    initial_side = get_bounding_box_side(trees)
    old_score = initial_side ** 2 / n
    
    start_time = time.time()
    # Use fewer iterations for larger N
    optimized_trees, new_side = fractional_translation(trees, max_iter=15)
    elapsed = time.time() - start_time
    
    new_score = new_side ** 2 / n
    improvement = old_score - new_score
    
    if improvement > 1e-8:
        improvements[n] = improvement
        optimized_solutions[n] = optimized_trees
        total_improvement += improvement
        print(f"✅ N={n:3d}: {old_score:.6f} -> {new_score:.6f} (improved by {improvement:.6f}) [{elapsed:.1f}s]")
    else:
        print(f"   N={n:3d}: {old_score:.6f} (no improvement) [{elapsed:.1f}s]")

print("="*60)
print(f"Total improvement from N=2-50: {total_improvement:.6f}")
print(f"Improved {len(improvements)} N values")

In [None]:
# Create new submission with improvements
print("\nCreating submission with improvements...")

# Start with baseline
final_df = baseline_df.copy()

# Replace improved N values
for n, trees in optimized_solutions.items():
    # Remove old rows for this N
    final_df = final_df[final_df['n'] != n]
    
    # Add new rows
    for tree in trees:
        new_row = pd.DataFrame([{
            'id': tree['id'],
            'x': f"s{tree['x']}",
            'y': f"s{tree['y']}",
            'deg': f"s{tree['deg']}",
            'n': n
        }])
        final_df = pd.concat([final_df, new_row], ignore_index=True)

# Sort by id
final_df = final_df.sort_values('id').reset_index(drop=True)

print(f"Final submission has {len(final_df)} rows")
print(f"Improved {len(optimized_solutions)} N values")

In [None]:
# Calculate new total score
print("\nCalculating new total score...")

def calculate_total_score(df):
    total = 0
    per_n = {}
    for n in range(1, 201):
        trees_df = df[df['n'] == n]
        if len(trees_df) == 0:
            continue
        
        trees = []
        for _, row in trees_df.iterrows():
            x = parse_value(row['x']) if isinstance(row['x'], str) else row['x']
            y = parse_value(row['y']) if isinstance(row['y'], str) else row['y']
            deg = parse_value(row['deg']) if isinstance(row['deg'], str) else row['deg']
            trees.append({'x': x, 'y': y, 'deg': deg})
        
        side = get_bounding_box_side(trees)
        score = side ** 2 / n
        per_n[n] = {'side': side, 'score': score}
        total += score
    return total, per_n

new_total, new_per_n = calculate_total_score(final_df)
old_total = baseline_metrics['cv_score']

print(f"Old total score: {old_total:.6f}")
print(f"New total score: {new_total:.6f}")
print(f"Total improvement: {old_total - new_total:.6f}")

In [None]:
# Validate no overlaps using strict validation
print("\nValidating no overlaps...")

SCALE = 10**18

def validate_n_strict(df, n):
    """Validate no overlaps for N using integer-scaled coordinates."""
    trees_df = df[df['n'] == n]
    if len(trees_df) <= 1:
        return True, "OK"
    
    polygons = []
    for _, row in trees_df.iterrows():
        x = parse_value(row['x']) if isinstance(row['x'], str) else row['x']
        y = parse_value(row['y']) if isinstance(row['y'], str) else row['y']
        deg = parse_value(row['deg']) if isinstance(row['deg'], str) else row['deg']
        
        poly = create_tree_polygon(x, y, deg)
        coords = list(poly.exterior.coords)
        int_coords = [(int(Decimal(str(c[0])) * SCALE), int(Decimal(str(c[1])) * SCALE)) for c in coords]
        int_poly = Polygon(int_coords)
        polygons.append(int_poly)
    
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                intersection = polygons[i].intersection(polygons[j])
                if intersection.area > 0:
                    return False, f"Trees {i} and {j} overlap"
    return True, "OK"

# Validate all improved N values
overlap_errors = []
for n in optimized_solutions.keys():
    ok, msg = validate_n_strict(final_df, n)
    if not ok:
        overlap_errors.append(f"N={n}: {msg}")
        print(f"❌ N={n}: {msg}")
    else:
        print(f"✅ N={n}: Valid")

if overlap_errors:
    print(f"\n⚠️ Found {len(overlap_errors)} overlap errors!")
else:
    print(f"\n✅ All {len(optimized_solutions)} improved N values are valid!")

In [None]:
# Save submission
import os

# Save to experiment folder
submission_df = final_df[['id', 'x', 'y', 'deg']].copy()
submission_df.to_csv('/home/code/experiments/003_fractional_translation/submission.csv', index=False)

# Save to submission folder
os.makedirs('/home/submission', exist_ok=True)
submission_df.to_csv('/home/submission/submission.csv', index=False)

print(f"Saved submission with {len(submission_df)} rows")
print(f"New score: {new_total:.6f}")

# Save metrics
metrics = {
    'cv_score': new_total,
    'baseline_score': old_total,
    'improvement': old_total - new_total,
    'improved_n_values': list(optimized_solutions.keys()),
    'per_n_improvements': {str(n): improvements[n] for n in improvements}
}

with open('/home/code/experiments/003_fractional_translation/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"\nMetrics saved")
print(f"CV Score: {new_total:.6f}")
print(f"Improvement over baseline: {old_total - new_total:.6f}")