# Experiment 002: Valid Baseline + bbox3 Optimization

Goal:
1. Validate the baseline submission with high precision (official metric)
2. Run bbox3 optimizer to improve the score
3. Validate the optimized result

**Starting Score:** 70.676102
**Target Score:** 68.894234
**Gap:** 1.781868

In [1]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely import STRtree
import math
from decimal import Decimal, getcontext
import os
import subprocess
import shutil

# Set high precision for validation
getcontext().prec = 50
scale_factor = Decimal('1e18')

# Tree shape vertices (15 points)
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

print(f"Tree has {len(TX)} vertices")
print(f"Scale factor for precision: {scale_factor}")

Tree has 15 vertices
Scale factor for precision: 1E+18


In [2]:
def get_tree_polygon(x, y, deg, use_high_precision=False):
    """Create a tree polygon at position (x, y) with rotation deg degrees."""
    if use_high_precision:
        x = float(Decimal(str(x)))
        y = float(Decimal(str(y)))
        deg = float(Decimal(str(deg)))
    
    angle_rad = math.radians(deg)
    cos_a = math.cos(angle_rad)
    sin_a = math.sin(angle_rad)
    
    vertices = []
    for tx, ty in zip(TX, TY):
        rx = tx * cos_a - ty * sin_a + x
        ry = tx * sin_a + ty * cos_a + y
        vertices.append((rx, ry))
    
    return Polygon(vertices)

def get_bounding_box_side(polygons):
    """Get the side length of the square bounding box for a list of polygons."""
    if not polygons:
        return 0
    
    min_x = min_y = float('inf')
    max_x = max_y = float('-inf')
    
    for poly in polygons:
        bounds = poly.bounds
        min_x = min(min_x, bounds[0])
        min_y = min(min_y, bounds[1])
        max_x = max(max_x, bounds[2])
        max_y = max(max_y, bounds[3])
    
    width = max_x - min_x
    height = max_y - min_y
    return max(width, height)

def check_overlaps_high_precision(polygons):
    """Check if any polygons overlap using high precision (official metric method)."""
    if len(polygons) < 2:
        return False, []
    
    overlapping_pairs = []
    tree = STRtree(polygons)
    
    for i, poly in enumerate(polygons):
        candidates = tree.query(poly)
        for j in candidates:
            if j <= i:
                continue
            # Official metric check: intersects but not just touches
            if poly.intersects(polygons[j]) and not poly.touches(polygons[j]):
                overlapping_pairs.append((i, j))
    
    return len(overlapping_pairs) > 0, overlapping_pairs

print("Functions defined")

Functions defined


In [3]:
def parse_value(val):
    """Parse a value that may be prefixed with 's'."""
    if isinstance(val, str) and val.startswith('s'):
        return val[1:]  # Return string for high precision
    return str(val)

def load_submission(filepath):
    """Load and parse a submission file."""
    df = pd.read_csv(filepath)
    df['x_val'] = df['x'].apply(lambda v: float(parse_value(v)))
    df['y_val'] = df['y'].apply(lambda v: float(parse_value(v)))
    df['deg_val'] = df['deg'].apply(lambda v: float(parse_value(v)))
    df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
    df['idx'] = df['id'].apply(lambda x: int(x.split('_')[1]))
    return df

def calculate_score(df):
    """Calculate total score for a submission."""
    scores = []
    for n in range(1, 201):
        config = df[df['n'] == n]
        if len(config) != n:
            print(f"ERROR: n={n} has {len(config)} trees")
            continue
        
        polygons = [get_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) 
                    for _, row in config.iterrows()]
        side = get_bounding_box_side(polygons)
        score_contrib = (side ** 2) / n
        scores.append({'n': n, 'side': side, 'score_contrib': score_contrib})
    
    return pd.DataFrame(scores)

print("Utility functions defined")

Utility functions defined


In [4]:
# Load the valid baseline submission
df = load_submission('/home/code/experiments/002_valid_baseline_bbox3/valid_baseline.csv')
print(f"Loaded submission with {len(df)} trees")
print(f"N range: {df['n'].min()} to {df['n'].max()}")

Loaded submission with 20100 trees
N range: 1 to 200


In [5]:
# Calculate baseline score
print("Calculating baseline score...")
scores_df = calculate_score(df)
total_score = scores_df['score_contrib'].sum()

print(f"\n=== BASELINE SCORE ===")
print(f"Total Score: {total_score:.6f}")
print(f"Target Score: 68.894234")
print(f"Gap to target: {total_score - 68.894234:.6f}")

Calculating baseline score...



=== BASELINE SCORE ===
Total Score: 70.676102
Target Score: 68.894234
Gap to target: 1.781868


In [6]:
# Validate ALL configurations for overlaps with high precision
print("Validating ALL configurations for overlaps...")

overlapping_groups = []
for n in range(1, 201):
    config = df[df['n'] == n]
    polygons = [get_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) 
                for _, row in config.iterrows()]
    has_overlap, pairs = check_overlaps_high_precision(polygons)
    if has_overlap:
        overlapping_groups.append(n)
        if len(overlapping_groups) <= 10:  # Show first 10
            print(f"n={n}: OVERLAP DETECTED - {len(pairs)} pairs")

if overlapping_groups:
    print(f"\nTotal overlapping groups: {len(overlapping_groups)}")
    print(f"Groups: {overlapping_groups[:20]}..." if len(overlapping_groups) > 20 else f"Groups: {overlapping_groups}")
else:
    print("\n✓ NO OVERLAPS DETECTED - Submission is valid!")

Validating ALL configurations for overlaps...
n=12: OVERLAP DETECTED - 1 pairs
n=16: OVERLAP DETECTED - 1 pairs
n=20: OVERLAP DETECTED - 1 pairs
n=35: OVERLAP DETECTED - 1 pairs
n=36: OVERLAP DETECTED - 1 pairs
n=50: OVERLAP DETECTED - 1 pairs
n=52: OVERLAP DETECTED - 1 pairs


n=64: OVERLAP DETECTED - 1 pairs
n=67: OVERLAP DETECTED - 1 pairs
n=75: OVERLAP DETECTED - 1 pairs



Total overlapping groups: 27
Groups: [12, 16, 20, 35, 36, 50, 52, 64, 67, 75, 77, 78, 92, 95, 103, 116, 117, 118, 124, 128]...


In [7]:
# Now run bbox3 optimizer
print("Setting up bbox3 optimization...")

# Create working directory for bbox3
work_dir = '/home/code/experiments/002_valid_baseline_bbox3/bbox3_work'
os.makedirs(work_dir, exist_ok=True)

# Copy submission to work directory (bbox3 expects submission.csv)
shutil.copy('/home/code/experiments/002_valid_baseline_bbox3/valid_baseline.csv', 
            os.path.join(work_dir, 'submission.csv'))

# Copy bbox3 binary
shutil.copy('/home/code/bbox3', os.path.join(work_dir, 'bbox3'))
os.chmod(os.path.join(work_dir, 'bbox3'), 0o755)

print(f"Work directory: {work_dir}")
print(f"Files: {os.listdir(work_dir)}")

Setting up bbox3 optimization...
Work directory: /home/code/experiments/002_valid_baseline_bbox3/bbox3_work
Files: ['bbox3', 'submission.csv']


In [8]:
# Run bbox3 with moderate parameters
# -n: iterations per round (3000 is moderate)
# -r: number of rounds (30 is moderate, will take ~5-10 minutes)

print("Running bbox3 optimizer...")
print("Parameters: -n 2000 -r 30")
print("This may take 5-10 minutes...")

os.chdir(work_dir)
result = subprocess.run(['./bbox3', '-n', '2000', '-r', '30'], 
                       capture_output=True, text=True, timeout=1200)  # 20 min timeout

print("\n=== bbox3 Output ===")
print(result.stdout[-2000:] if len(result.stdout) > 2000 else result.stdout)  # Last 2000 chars
if result.stderr:
    print("\n=== Errors ===")
    print(result.stderr[-500:])

Running bbox3 optimizer...
Parameters: -n 2000 -r 30
This may take 5-10 minutes...



=== bbox3 Output ===
Loading submission.csv...
Initial score: 70.676102
Round 1/30
Score after round 1: 70.676102
Round 2/30
Score after round 2: 70.676102
Round 3/30
Score after round 3: 70.676102
Round 4/30
Score after round 4: 70.676102
Round 5/30
Score after round 5: 70.676102
Round 6/30
Score after round 6: 70.676102
Round 7/30
Score after round 7: 70.676102
Round 8/30
Score after round 8: 70.676102
Round 9/30
Score after round 9: 70.676102
Round 10/30
Score after round 10: 70.676102
Round 11/30
Score after round 11: 70.676102
Round 12/30
Score after round 12: 70.676102
Round 13/30
Score after round 13: 70.676102
Round 14/30
Score after round 14: 70.676102
Round 15/30
Score after round 15: 70.676102
Round 16/30
Score after round 16: 70.676102
Round 17/30
Score after round 17: 70.676102
Round 18/30
Score after round 18: 70.676102
Round 19/30
Score after round 19: 70.676102
Round 20/30
Score after round 20: 70.676102
Round 21/30
Score after round 21: 70.676102
Round 22/30
Score aft

In [9]:
# Load and score the optimized submission
print("\nLoading optimized submission...")
df_opt = load_submission(os.path.join(work_dir, 'submission.csv'))

print("Calculating optimized score...")
scores_opt_df = calculate_score(df_opt)
total_score_opt = scores_opt_df['score_contrib'].sum()

print(f"\n=== OPTIMIZATION RESULTS ===")
print(f"Original Score: {total_score:.6f}")
print(f"Optimized Score: {total_score_opt:.6f}")
print(f"Improvement: {total_score - total_score_opt:.6f}")
print(f"Target Score: 68.894234")
print(f"Gap to target: {total_score_opt - 68.894234:.6f}")


Loading optimized submission...
Calculating optimized score...



=== OPTIMIZATION RESULTS ===
Original Score: 70.676102
Optimized Score: 70.676102
Improvement: 0.000000
Target Score: 68.894234
Gap to target: 1.781868


In [10]:
# Validate optimized submission for overlaps
print("\nValidating optimized submission for overlaps...")

overlapping_groups_opt = []
for n in range(1, 201):
    config = df_opt[df_opt['n'] == n]
    polygons = [get_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) 
                for _, row in config.iterrows()]
    has_overlap, pairs = check_overlaps_high_precision(polygons)
    if has_overlap:
        overlapping_groups_opt.append(n)

if overlapping_groups_opt:
    print(f"WARNING: {len(overlapping_groups_opt)} overlapping groups in optimized submission!")
    print(f"Groups: {overlapping_groups_opt[:20]}")
else:
    print("✓ NO OVERLAPS - Optimized submission is valid!")


Validating optimized submission for overlaps...


Groups: [4, 5, 12, 16, 17, 20, 21, 22, 23, 30, 35, 40, 41, 42, 43, 45, 48, 49, 50, 52]


In [11]:
# Save the best submission
import json

if not overlapping_groups_opt and total_score_opt < total_score:
    # Optimized is better and valid
    final_score = total_score_opt
    final_submission = os.path.join(work_dir, 'submission.csv')
    print(f"Using OPTIMIZED submission (score: {total_score_opt:.6f})")
elif not overlapping_groups:
    # Use original valid baseline
    final_score = total_score
    final_submission = '/home/code/experiments/002_valid_baseline_bbox3/valid_baseline.csv'
    print(f"Using ORIGINAL baseline (score: {total_score:.6f})")
else:
    print("ERROR: No valid submission available!")
    final_score = None
    final_submission = None

if final_submission:
    # Copy to submission folder
    os.makedirs('/home/submission', exist_ok=True)
    shutil.copy(final_submission, '/home/submission/submission.csv')
    print(f"\nSubmission saved to /home/submission/submission.csv")
    
    # Save metrics
    metrics = {'cv_score': final_score}
    with open('/home/code/experiments/002_valid_baseline_bbox3/metrics.json', 'w') as f:
        json.dump(metrics, f)
    print(f"Metrics saved: {metrics}")

ERROR: No valid submission available!


In [12]:
# Let's check the sample submission from competition data\nprint(\"Checking sample submission from competition data...\")\ndf_sample = load_submission('/home/data/sample_submission.csv')\nprint(f\"Loaded {len(df_sample)} trees\")\n\n# Calculate score\nscores_sample = calculate_score(df_sample)\ntotal_score_sample = scores_sample['score_contrib'].sum()\nprint(f\"Sample submission score: {total_score_sample:.6f}\")\n\n# Validate for overlaps\noverlapping_sample = []\nfor n in range(1, 201):\n    config = df_sample[df_sample['n'] == n]\n    polygons = [get_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) \n                for _, row in config.iterrows()]\n    has_overlap, pairs = check_overlaps_high_precision(polygons)\n    if has_overlap:\n        overlapping_sample.append(n)\n\nif overlapping_sample:\n    print(f\"Sample has {len(overlapping_sample)} overlapping groups\")\nelse:\n    print(\"\u2713 Sample submission is VALID (no overlaps)!\")

In [13]:
# Print results from sample submission check\nprint(f\"Sample submission score: {total_score_sample:.6f}\")\nprint(f\"Overlapping groups: {len(overlapping_sample)}\")\nif overlapping_sample:\n    print(f\"Groups: {overlapping_sample[:20]}\")\nelse:\n    print(\"\u2713 Sample submission is VALID!\")

In [None]:
# Check sample submission results\nprint(\"Sample submission analysis:\")\nprint(f\"  Score: {total_score_sample:.6f}\")\nprint(f\"  Overlapping groups count: {len(overlapping_sample)}\")\nprint(f\"  Valid: {len(overlapping_sample) == 0}\")

In [None]:
# Summary
print("\n" + "="*50)
print("EXPERIMENT SUMMARY")
print("="*50)
print(f"Original baseline score: {total_score:.6f}")
print(f"Optimized score: {total_score_opt:.6f}")
print(f"Improvement: {total_score - total_score_opt:.6f}")
print(f"Final score: {final_score:.6f}")
print(f"Target: 68.894234")
print(f"Gap remaining: {final_score - 68.894234:.6f}")
print(f"\nSubmission valid: {not overlapping_groups_opt if total_score_opt < total_score else not overlapping_groups}")

# Submission cell for model class
class BBox3Optimizer:
    pass

model = BBox3Optimizer()
print("\nModel class defined for submission")