# Experiment 008: Fractional Translation

Implement fractional translation from jonathanchan kernel:
- Move trees in tiny steps (0.001 to 0.00001) in 8 directions
- Keep move if it reduces bounding box without overlap
- Use strict 1e18 validation

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely import affinity
from shapely.ops import unary_union
from numba import njit
import json
import time
import warnings
warnings.filterwarnings('ignore')

getcontext().prec = 30
SCALE_FACTOR = Decimal('1e18')

print("Setup complete")

Setup complete


In [2]:
# Tree shape vertices
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def create_tree_polygon(x, y, angle):
    """Create tree polygon for scoring."""
    x, y, angle = float(x), float(y), float(angle)
    coords = list(zip(TX, TY))
    poly = Polygon(coords)
    poly = affinity.rotate(poly, angle, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

def create_high_precision_tree(x, y, angle):
    """Create tree polygon with 1e18 scaling for validation."""
    x = Decimal(str(x))
    y = Decimal(str(y))
    angle = Decimal(str(angle))
    sf = SCALE_FACTOR
    vertices = [
        (float(Decimal('0.0') * sf), float(Decimal('0.8') * sf)),
        (float(Decimal('0.125') * sf), float(Decimal('0.5') * sf)),
        (float(Decimal('0.0625') * sf), float(Decimal('0.5') * sf)),
        (float(Decimal('0.2') * sf), float(Decimal('0.25') * sf)),
        (float(Decimal('0.1') * sf), float(Decimal('0.25') * sf)),
        (float(Decimal('0.35') * sf), float(Decimal('0.0') * sf)),
        (float(Decimal('0.075') * sf), float(Decimal('0.0') * sf)),
        (float(Decimal('0.075') * sf), float(Decimal('-0.2') * sf)),
        (float(Decimal('-0.075') * sf), float(Decimal('-0.2') * sf)),
        (float(Decimal('-0.075') * sf), float(Decimal('0.0') * sf)),
        (float(Decimal('-0.35') * sf), float(Decimal('0.0') * sf)),
        (float(Decimal('-0.1') * sf), float(Decimal('0.25') * sf)),
        (float(Decimal('-0.2') * sf), float(Decimal('0.25') * sf)),
        (float(Decimal('-0.0625') * sf), float(Decimal('0.5') * sf)),
        (float(Decimal('-0.125') * sf), float(Decimal('0.5') * sf)),
    ]
    poly = Polygon(vertices)
    poly = affinity.rotate(poly, float(angle), origin=(0, 0))
    poly = affinity.translate(poly, xoff=float(x * sf), yoff=float(y * sf))
    return poly

def validate_no_overlap_strict(trees_data):
    """Check for overlaps using 1e18 scaling."""
    if len(trees_data) <= 1:
        return True
    polygons = [create_high_precision_tree(t['x'], t['y'], t['deg']) for t in trees_data]
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                return False
    return True

def get_bbox_side(trees):
    """Get bounding box side length."""
    if len(trees) == 0:
        return 0
    polygons = [create_tree_polygon(t['x'], t['y'], t['deg']) for t in trees]
    union = unary_union(polygons)
    bounds = union.bounds
    return max(bounds[2] - bounds[0], bounds[3] - bounds[1])

def get_score(trees, n):
    """Get score contribution for N trees."""
    side = get_bbox_side(trees)
    return (side ** 2) / n

print("Core functions defined")

Core functions defined


In [3]:
# Fast scoring using numba
@njit
def make_polygon_template():
    tw=0.15; th=0.2; bw=0.7; mw=0.4; ow=0.25
    tip=0.8; t1=0.5; t2=0.25; base=0.0; tbot=-th
    x=np.array([0,ow/2,ow/4,mw/2,mw/4,bw/2,tw/2,tw/2,-tw/2,-tw/2,-bw/2,-mw/4,-mw/2,-ow/4,-ow/2],np.float64)
    y=np.array([tip,t1,t1,t2,t2,base,base,tbot,tbot,base,base,t2,t2,t1,t1],np.float64)
    return x,y

@njit
def get_bbox_side_fast(xs, ys, degs, tx, ty):
    """Fast bbox side calculation using numba."""
    n = xs.size
    V = tx.size
    mnx = 1e300; mny = 1e300; mxx = -1e300; mxy = -1e300
    for i in range(n):
        r = degs[i] * np.pi / 180.0
        c = np.cos(r); s = np.sin(r)
        xi = xs[i]; yi = ys[i]
        for j in range(V):
            X = c * tx[j] - s * ty[j] + xi
            Y = s * tx[j] + c * ty[j] + yi
            if X < mnx: mnx = X
            if X > mxx: mxx = X
            if Y < mny: mny = Y
            if Y > mxy: mxy = Y
    return max(mxx - mnx, mxy - mny)

def strip(a):
    return np.array([float(str(v).replace("s","")) for v in a], np.float64)

tx, ty = make_polygon_template()
print("Fast scoring compiled")

Fast scoring compiled


In [4]:
# Load exp_007 submission (our current best)
print("Loading exp_007 submission...")
df = pd.read_csv('/home/code/experiments/007_rotation_backprop/submission.csv')
df["N"] = df["id"].astype(str).str.split("_").str[0].astype(int)

best_trees = {}
best_scores = {}

for n, g in df.groupby("N"):
    trees = []
    for _, row in g.iterrows():
        x = str(row['x']).replace('s', '')
        y = str(row['y']).replace('s', '')
        deg = str(row['deg']).replace('s', '')
        trees.append({'x': x, 'y': y, 'deg': deg})
    best_trees[n] = trees
    best_scores[n] = get_score(trees, n)

baseline_total = sum(best_scores.values())
print(f"Loaded with score: {baseline_total:.6f}")

Loading exp_007 submission...


Loaded with score: 70.615107


In [5]:
# Implement fractional translation
print("\n" + "=" * 60)
print("FRACTIONAL TRANSLATION")
print("=" * 60)

def fractional_translation(trees, n, max_iter=50):
    """Move trees in tiny steps to reduce bounding box.
    Based on jonathanchan kernel approach.
    """
    # Step sizes from jonathanchan kernel
    frac_steps = [0.001, 0.0005, 0.0002, 0.0001, 0.00005, 0.00002, 0.00001]
    # 8 directions: N, S, E, W, NE, NW, SE, SW
    directions = [(0, 1), (0, -1), (1, 0), (-1, 0), (1, 1), (1, -1), (-1, 1), (-1, -1)]
    
    # Make deep copy
    current_trees = [dict(t) for t in trees]
    current_score = get_score(current_trees, n)
    
    best_trees = [dict(t) for t in current_trees]
    best_score = current_score
    
    for iteration in range(max_iter):
        improved = False
        
        for i in range(len(current_trees)):
            for step in frac_steps:
                for dx, dy in directions:
                    # Try moving tree i
                    test_trees = [dict(t) for t in current_trees]
                    test_trees[i]['x'] = str(float(test_trees[i]['x']) + dx * step)
                    test_trees[i]['y'] = str(float(test_trees[i]['y']) + dy * step)
                    
                    # Check for overlaps with strict validation
                    if not validate_no_overlap_strict(test_trees):
                        continue
                    
                    # Calculate new score
                    new_score = get_score(test_trees, n)
                    
                    if new_score < best_score - 1e-12:
                        best_score = new_score
                        best_trees = [dict(t) for t in test_trees]
                        current_trees = [dict(t) for t in test_trees]
                        improved = True
        
        if not improved:
            break
    
    return best_trees, best_score, best_score < current_score - 1e-12

print("Fractional translation function defined")


FRACTIONAL TRANSLATION
Fractional translation function defined


In [6]:
# Test on a few N values first
print("\nTesting fractional translation on N=10, 20, 30...")

for test_n in [10, 20, 30]:
    trees = best_trees[test_n]
    old_score = best_scores[test_n]
    
    start = time.time()
    new_trees, new_score, improved = fractional_translation(trees, test_n, max_iter=20)
    elapsed = time.time() - start
    
    if improved:
        print(f"  N={test_n}: {old_score:.6f} -> {new_score:.6f} (+{old_score - new_score:.6f}) in {elapsed:.1f}s")
    else:
        print(f"  N={test_n}: No improvement found in {elapsed:.1f}s")


Testing fractional translation on N=10, 20, 30...


  N=10: No improvement found in 0.6s


  N=20: No improvement found in 2.5s


  N=30: No improvement found in 6.0s


In [7]:
# Apply fractional translation to ALL N values
print("\n" + "=" * 60)
print("APPLYING TO ALL N VALUES")
print("=" * 60)

improvements = []
start_time = time.time()

for n in range(2, 201):  # Skip N=1 (already optimal)
    trees = best_trees[n]
    old_score = best_scores[n]
    
    # Use fewer iterations for larger N (speed vs quality tradeoff)
    max_iter = 30 if n <= 50 else 20 if n <= 100 else 10
    
    new_trees, new_score, improved = fractional_translation(trees, n, max_iter=max_iter)
    
    if improved:
        best_trees[n] = new_trees
        best_scores[n] = new_score
        improvement = old_score - new_score
        improvements.append((n, improvement))
        print(f"  N={n:3d}: +{improvement:.6f}")
    
    if n % 50 == 0:
        elapsed = time.time() - start_time
        print(f"  Progress: N={n}/200, elapsed={elapsed:.1f}s")

print(f"\nTotal time: {time.time() - start_time:.1f}s")
print(f"N values improved: {len(improvements)}")
if improvements:
    total_improvement = sum(imp for _, imp in improvements)
    print(f"Total improvement: {total_improvement:.6f}")


APPLYING TO ALL N VALUES


  N= 35: +0.000005


  N= 36: +0.000014


  Progress: N=50/200, elapsed=423.1s


  N= 63: +0.000000


  N= 73: +0.000001


  N= 78: +0.000001


  N= 81: +0.000001


  N= 88: +0.000000


  N= 91: +0.000001


  N= 93: +0.000001


  N= 94: +0.000002


  N= 95: +0.000000


  N= 98: +0.000001


  N=100: +0.000000
  Progress: N=100/200, elapsed=5716.9s


  N=123: +0.000001


  N=131: +0.000002


  N=143: +0.000000


  Progress: N=150/200, elapsed=19394.4s


  N=159: +0.000000


  N=160: +0.000001


  N=184: +0.000002


  Progress: N=200/200, elapsed=52590.5s

Total time: 52590.5s
N values improved: 19
Total improvement: 0.000033


In [8]:
# Calculate final score
final_total = sum(best_scores.values())
print(f"\n" + "=" * 60)
print("RESULTS")
print("=" * 60)
print(f"Original score: {baseline_total:.6f}")
print(f"Final score: {final_total:.6f}")
print(f"Improvement: {baseline_total - final_total:.6f}")


RESULTS
Original score: 70.615107
Final score: 70.615074
Improvement: 0.000033


In [9]:
# Final validation
print("\n" + "=" * 60)
print("FINAL VALIDATION")
print("=" * 60)

final_overlaps = []
for n in range(1, 201):
    if not validate_no_overlap_strict(best_trees[n]):
        final_overlaps.append(n)

if final_overlaps:
    print(f"WARNING: {len(final_overlaps)} N values have overlaps!")
else:
    print("✅ All N values pass strict validation!")


FINAL VALIDATION


✅ All N values pass strict validation!


In [10]:
# Create submission
print("\n" + "=" * 60)
print("CREATE SUBMISSION")
print("=" * 60)

rows = []
for n in range(1, 201):
    trees = best_trees[n]
    for i, t in enumerate(trees):
        rows.append({
            'id': f"{n:03d}_{i}",
            'x': f"s{t['x']}",
            'y': f"s{t['y']}",
            'deg': f"s{t['deg']}"
        })

submission_df = pd.DataFrame(rows)
print(f"Submission shape: {submission_df.shape}")

submission_df.to_csv('/home/code/experiments/008_fractional_translation/submission.csv', index=False)
submission_df.to_csv('/home/submission/submission.csv', index=False)
print("Submission saved!")


CREATE SUBMISSION
Submission shape: (20100, 4)
Submission saved!


In [11]:
# Save metrics
metrics = {
    'cv_score': final_total,
    'baseline_score': baseline_total,
    'improvement': baseline_total - final_total,
    'n_improved': len(improvements),
    'final_overlaps': len(final_overlaps),
    'target': 68.888293,
    'gap': final_total - 68.888293
}

with open('/home/code/experiments/008_fractional_translation/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print("\nMetrics saved!")
print(f"\n" + "=" * 60)
print("FINAL RESULTS")
print("=" * 60)
print(f"Original score: {baseline_total:.6f}")
print(f"Final score: {final_total:.6f}")
print(f"Improvement: {baseline_total - final_total:.6f}")
print(f"Target: 68.888293")
print(f"Gap to target: {final_total - 68.888293:.6f}")


Metrics saved!

FINAL RESULTS
Original score: 70.615107
Final score: 70.615074
Improvement: 0.000033
Target: 68.888293
Gap to target: 1.726781
