# Experiment 007: Hybrid Solution with Kaggle-Compatible Validation

**Goal:** Create a hybrid solution using:
1. Pre-optimized baseline for N values that pass Kaggle-style validation
2. Zaburo fallback for N values with overlaps
3. Apply fractional translation refinement

**Key insight:** Pre-optimized baseline scores ~70.6 but has overlap issues. If we can identify which N values are valid and use Zaburo for the rest, we can get close to 70.6.

In [None]:
import numpy as np
import pandas as pd
from decimal import Decimal, getcontext
from shapely.geometry import Polygon
from shapely import affinity
import copy
import json
import os

# Set high precision for Decimal
getcontext().prec = 28

# Scale factor for integer coordinates (match Kaggle)
SCALE = Decimal('1000000000000000')  # 1e15

print("Setup complete")

In [None]:
# Tree polygon vertices (from getting-started kernel)
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

class ChristmasTree:
    """Christmas tree with Kaggle-compatible precision."""
    def __init__(self, x, y, angle):
        self.center_x = Decimal(str(x))
        self.center_y = Decimal(str(y))
        self.angle = Decimal(str(angle))
        self._update_polygon()
    
    def _update_polygon(self):
        """Update polygon vertices based on position and angle."""
        # Create base polygon
        coords = list(zip(TX, TY))
        poly = Polygon(coords)
        # Rotate and translate
        poly = affinity.rotate(poly, float(self.angle), origin=(0, 0))
        poly = affinity.translate(poly, float(self.center_x), float(self.center_y))
        self.polygon = poly
        self.vertices = list(poly.exterior.coords)[:-1]  # Remove duplicate last point
    
    def get_integer_polygon(self):
        """Get polygon with integer coordinates for Kaggle-style validation."""
        int_coords = []
        for x, y in self.vertices:
            xi = int(Decimal(str(x)) * SCALE)
            yi = int(Decimal(str(y)) * SCALE)
            int_coords.append((xi, yi))
        return Polygon(int_coords)
    
    def clone(self):
        return ChristmasTree(float(self.center_x), float(self.center_y), float(self.angle))

print("ChristmasTree class defined")

In [None]:
def check_overlap_kaggle_style(trees):
    """Check overlaps using integer coordinates like Kaggle does."""
    polygons = [t.get_integer_polygon() for t in trees]
    
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]):
                # Check if it's just touching (allowed) or actual overlap (not allowed)
                if not polygons[i].touches(polygons[j]):
                    # Check intersection area
                    try:
                        intersection = polygons[i].intersection(polygons[j])
                        if intersection.area > 0:
                            return True, (i, j), intersection.area
                    except:
                        return True, (i, j), -1
    return False, None, 0

def calculate_bbox_side(trees):
    """Calculate bounding box side length."""
    all_x = []
    all_y = []
    for tree in trees:
        for x, y in tree.vertices:
            all_x.append(x)
            all_y.append(y)
    
    min_x, max_x = min(all_x), max(all_x)
    min_y, max_y = min(all_y), max(all_y)
    
    return max(max_x - min_x, max_y - min_y)

def calculate_score(trees, n):
    """Calculate score contribution for n trees."""
    side = calculate_bbox_side(trees)
    return (side ** 2) / n

print("Validation functions defined")

In [None]:
# Load pre-optimized baseline
baseline_path = '/home/nonroot/snapshots/santa-2025/21329067673/submission/submission.csv'
baseline_df = pd.read_csv(baseline_path)
print(f"Loaded baseline with {len(baseline_df)} rows")
print(baseline_df.head())

# Parse the 'id' column to get n and i
# Format: "001_0" means N=1, tree index 0
def parse_id(id_str):
    parts = id_str.split('_')
    n = int(parts[0])
    i = int(parts[1])
    return n, i

# Parse the 's' prefix from coordinate values
def parse_coord(val):
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

# Parse baseline into trees by N
baseline_trees = {}
for _, row in baseline_df.iterrows():
    n, i = parse_id(row['id'])
    x = parse_coord(row['x'])
    y = parse_coord(row['y'])
    deg = parse_coord(row['deg'])
    
    if n not in baseline_trees:
        baseline_trees[n] = []
    
    tree = ChristmasTree(x, y, deg)
    baseline_trees[n].append(tree)

print(f"Parsed {len(baseline_trees)} N values")
print(f"N=1 has {len(baseline_trees[1])} trees, N=200 has {len(baseline_trees[200])} trees")

In [None]:
# Load Zaburo solution (guaranteed valid)
zaburo_path = '/home/code/experiments/005_zaburo_rowbased/submission.csv'
zaburo_df = pd.read_csv(zaburo_path)
print(f"Loaded Zaburo with {len(zaburo_df)} rows")

# Parse Zaburo into trees by N
zaburo_trees = {}
for n in range(1, 201):
    n_df = zaburo_df[zaburo_df['n'] == n].sort_values('i')
    trees = []
    for _, row in n_df.iterrows():
        tree = ChristmasTree(row['x'], row['y'], row['deg'])
        trees.append(tree)
    zaburo_trees[n] = trees

print(f"Parsed {len(zaburo_trees)} N values")

In [None]:
# Check each N value in baseline for overlaps using Kaggle-style validation
print("Checking baseline for overlaps with Kaggle-style validation...")

overlapping_n = []
valid_n = []

for n in range(1, 201):
    trees = baseline_trees[n]
    has_overlap, pair, area = check_overlap_kaggle_style(trees)
    
    if has_overlap:
        overlapping_n.append(n)
        if n <= 20 or len(overlapping_n) <= 10:
            print(f"N={n}: OVERLAP between trees {pair}, area={area}")
    else:
        valid_n.append(n)

print(f"\nTotal overlapping N values: {len(overlapping_n)}")
print(f"Total valid N values: {len(valid_n)}")
print(f"\nOverlapping N values: {overlapping_n[:30]}..." if len(overlapping_n) > 30 else f"\nOverlapping N values: {overlapping_n}")

In [None]:
# Calculate scores for baseline valid N values vs Zaburo
print("\nComparing scores for valid baseline N values vs Zaburo:")

baseline_valid_total = 0
zaburo_total = 0

for n in range(1, 201):
    baseline_score = calculate_score(baseline_trees[n], n)
    zaburo_score = calculate_score(zaburo_trees[n], n)
    
    if n in valid_n:
        baseline_valid_total += baseline_score
    zaburo_total += zaburo_score

print(f"Baseline (valid N only): {baseline_valid_total:.6f}")
print(f"Zaburo total: {zaburo_total:.6f}")

# Calculate what hybrid would score
hybrid_total = 0
for n in range(1, 201):
    if n in valid_n:
        hybrid_total += calculate_score(baseline_trees[n], n)
    else:
        hybrid_total += calculate_score(zaburo_trees[n], n)

print(f"\nHybrid (baseline valid + Zaburo fallback): {hybrid_total:.6f}")

In [None]:
# Create hybrid solution
print("Creating hybrid solution...")

hybrid_trees = {}
for n in range(1, 201):
    if n in valid_n:
        # Use baseline (better score)
        hybrid_trees[n] = [t.clone() for t in baseline_trees[n]]
    else:
        # Use Zaburo (guaranteed valid)
        hybrid_trees[n] = [t.clone() for t in zaburo_trees[n]]

# Verify hybrid has no overlaps
print("\nVerifying hybrid solution has no overlaps...")
hybrid_overlaps = []
for n in range(1, 201):
    has_overlap, pair, area = check_overlap_kaggle_style(hybrid_trees[n])
    if has_overlap:
        hybrid_overlaps.append(n)
        print(f"N={n}: OVERLAP in hybrid!")

print(f"\nHybrid overlapping N values: {len(hybrid_overlaps)}")
if len(hybrid_overlaps) == 0:
    print("✅ Hybrid solution is VALID (0 overlaps)")

In [None]:
# Calculate final hybrid score
hybrid_score = 0
per_n_scores = {}

for n in range(1, 201):
    score = calculate_score(hybrid_trees[n], n)
    per_n_scores[n] = score
    hybrid_score += score

print(f"Hybrid total score: {hybrid_score:.6f}")
print(f"\nTop 10 score contributors:")
sorted_scores = sorted(per_n_scores.items(), key=lambda x: x[1], reverse=True)
for n, score in sorted_scores[:10]:
    source = 'baseline' if n in valid_n else 'zaburo'
    print(f"  N={n}: {score:.6f} ({source})")

In [None]:
# Now apply fractional translation refinement to improve the hybrid
print("Applying fractional translation refinement...")

frac_steps = [0.01, 0.005, 0.002, 0.001, 0.0005, 0.0002, 0.0001]
directions = [(0, 1), (0, -1), (1, 0), (-1, 0), (1, 1), (1, -1), (-1, 1), (-1, -1)]

def fractional_translation(trees, n, max_iter=50):
    """Apply fractional translation refinement."""
    best_trees = [t.clone() for t in trees]
    best_score = calculate_score(best_trees, n)
    
    for iteration in range(max_iter):
        improved = False
        for i in range(len(best_trees)):
            for step in frac_steps:
                for dx, dy in directions:
                    # Save old position
                    old_x = best_trees[i].center_x
                    old_y = best_trees[i].center_y
                    
                    # Try move
                    best_trees[i].center_x = old_x + Decimal(str(dx * step))
                    best_trees[i].center_y = old_y + Decimal(str(dy * step))
                    best_trees[i]._update_polygon()
                    
                    # Check if valid and better
                    has_overlap, _, _ = check_overlap_kaggle_style(best_trees)
                    if not has_overlap:
                        new_score = calculate_score(best_trees, n)
                        if new_score < best_score - 1e-10:
                            best_score = new_score
                            improved = True
                            continue  # Keep the move
                    
                    # Revert
                    best_trees[i].center_x = old_x
                    best_trees[i].center_y = old_y
                    best_trees[i]._update_polygon()
        
        if not improved:
            break
    
    return best_trees, best_score

print("Fractional translation function defined")

In [None]:
# Apply fractional translation to small N values (highest impact)
print("Applying fractional translation to N=1-20...")

improved_trees = {}
total_improvement = 0

for n in range(1, 21):
    original_score = calculate_score(hybrid_trees[n], n)
    improved, new_score = fractional_translation(hybrid_trees[n], n, max_iter=30)
    improved_trees[n] = improved
    
    improvement = original_score - new_score
    total_improvement += improvement
    
    if improvement > 1e-8:
        print(f"N={n}: {original_score:.6f} -> {new_score:.6f} (improved by {improvement:.6f})")
    else:
        print(f"N={n}: {original_score:.6f} (no improvement)")

print(f"\nTotal improvement from fractional translation: {total_improvement:.6f}")

In [None]:
# Update hybrid with improved trees
for n in range(1, 21):
    hybrid_trees[n] = improved_trees[n]

# Calculate final score
final_score = 0
for n in range(1, 201):
    final_score += calculate_score(hybrid_trees[n], n)

print(f"Final hybrid score: {final_score:.6f}")
print(f"Improvement over pure Zaburo (87.99): {87.99 - final_score:.6f}")
print(f"Gap to target (68.89): {final_score - 68.89:.6f}")

In [None]:
# Final validation - check ALL N values for overlaps
print("Final validation - checking all N values...")

final_overlaps = []
for n in range(1, 201):
    has_overlap, pair, area = check_overlap_kaggle_style(hybrid_trees[n])
    if has_overlap:
        final_overlaps.append((n, pair, area))
        print(f"N={n}: OVERLAP between trees {pair}")

if len(final_overlaps) == 0:
    print("\n✅ FINAL VALIDATION PASSED: 0 overlapping N values")
else:
    print(f"\n❌ VALIDATION FAILED: {len(final_overlaps)} overlapping N values")

In [None]:
# Save submission
rows = []
for n in range(1, 201):
    for i, tree in enumerate(hybrid_trees[n]):
        rows.append({
            'n': n,
            'i': i,
            'x': float(tree.center_x),
            'y': float(tree.center_y),
            'deg': float(tree.angle)
        })

submission_df = pd.DataFrame(rows)
submission_df.to_csv('/home/code/experiments/007_hybrid_kaggle_validation/submission.csv', index=False)
submission_df.to_csv('/home/submission/submission.csv', index=False)

print(f"Saved submission with {len(submission_df)} rows")
print(submission_df.head())

In [None]:
# Save metrics
metrics = {
    'cv_score': final_score,
    'hybrid_score_before_frac': hybrid_score,
    'improvement_from_frac': total_improvement,
    'valid_n_count': len(valid_n),
    'overlapping_n_count': len(overlapping_n),
    'final_overlaps': len(final_overlaps)
}

with open('/home/code/experiments/007_hybrid_kaggle_validation/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print("Metrics saved:")
print(json.dumps(metrics, indent=2))