# Experiment 004: Use chistyakov_best.csv as Baseline

The evaluator identified that our overlap validation was wrong. The LB uses:
- `intersects() and not touches()` for overlap detection
- Scaled coordinates (1e15)

chistyakov_best.csv has 0 overlaps and should pass LB validation.

In [1]:
import numpy as np
import pandas as pd
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
import warnings
warnings.filterwarnings('ignore')

# Set precision for Decimal (matching LB)
getcontext().prec = 25
scale_factor = Decimal('1e15')

print('Libraries loaded')

Libraries loaded


In [2]:
# Tree geometry constants
TRUNK_W = 0.15
TRUNK_H = 0.2
BASE_W = 0.7
MID_W = 0.4
TOP_W = 0.25
TIP_Y = 0.8
TIER_1_Y = 0.5
TIER_2_Y = 0.25
BASE_Y = 0.0
TRUNK_BOTTOM_Y = -TRUNK_H

BASE_TREE_VERTICES = np.array([
    [0.0, TIP_Y],
    [TOP_W/2, TIER_1_Y],
    [TOP_W/4, TIER_1_Y],
    [MID_W/2, TIER_2_Y],
    [MID_W/4, TIER_2_Y],
    [BASE_W/2, BASE_Y],
    [TRUNK_W/2, BASE_Y],
    [TRUNK_W/2, TRUNK_BOTTOM_Y],
    [-TRUNK_W/2, TRUNK_BOTTOM_Y],
    [-TRUNK_W/2, BASE_Y],
    [-BASE_W/2, BASE_Y],
    [-MID_W/4, TIER_2_Y],
    [-MID_W/2, TIER_2_Y],
    [-TOP_W/4, TIER_1_Y],
    [-TOP_W/2, TIER_1_Y],
])

def create_tree_polygon(x, y, deg):
    angle_rad = np.radians(deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    rotation_matrix = np.array([[cos_a, -sin_a], [sin_a, cos_a]])
    rotated = BASE_TREE_VERTICES @ rotation_matrix.T
    translated = rotated + np.array([x, y])
    return Polygon(translated)

def get_bounding_box_side(polygons):
    if not polygons:
        return 0.0
    union = unary_union(polygons)
    bounds = union.bounds
    width = bounds[2] - bounds[0]
    height = bounds[3] - bounds[1]
    return max(width, height)

def parse_submission(df):
    result = df.copy()
    for col in ['x', 'y', 'deg']:
        result[col] = result[col].str.replace('s', '').astype(float)
    return result

print('Helper functions defined')

Helper functions defined


In [3]:
# CORRECT overlap check - matching LB exactly
# LB uses: intersects() and not touches()
def check_lb_overlaps(polygons):
    """Check for overlaps using the EXACT LB method: intersects() and not touches()."""
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]) and not polygons[i].touches(polygons[j]):
                return True, (i, j)
    return False, None

print('LB overlap check defined')

LB overlap check defined


In [4]:
# Load chistyakov_best.csv
print('Loading chistyakov_best.csv...')
chistyakov_df = pd.read_csv('/home/code/preoptimized/chistyakov_best.csv')
print(f'Shape: {chistyakov_df.shape}')
print(chistyakov_df.head())

chistyakov_parsed = parse_submission(chistyakov_df)

Loading chistyakov_best.csv...
Shape: (20100, 4)
      id                     x                    y                   deg
0  001_0  s-48.462025735646897  s59.036924156646883   s45.000000000000000
1  002_0    s0.354635418797758   s0.795710882346675  s203.629377730552051
2  002_1    s0.046441279555199   s0.272792367735944   s23.629377730552051
3  003_0    s0.647087199308396   s1.202535626558404  s155.134051937100821
4  003_1    s1.239428254921482   s1.298076560608782   s66.370622269343002


In [5]:
# Calculate score
def calculate_score(df):
    scores = {}
    total_score = 0.0
    for n in range(1, 201):
        prefix = f'{n:03d}_'
        config_df = df[df['id'].str.startswith(prefix)]
        if len(config_df) != n:
            print(f'Warning: Config {n} has {len(config_df)} trees')
            continue
        polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
        side = get_bounding_box_side(polygons)
        scores[n] = side
        total_score += (side ** 2) / n
    return total_score, scores

print('Calculating chistyakov score...')
chistyakov_score, chistyakov_sides = calculate_score(chistyakov_parsed)
print(f'Score: {chistyakov_score:.6f}')

Calculating chistyakov score...


Score: 70.926150


In [6]:
# Check for LB overlaps
print('Checking for LB overlaps (intersects and not touches)...')
overlap_configs = []
for n in range(1, 201):
    prefix = f'{n:03d}_'
    config_df = chistyakov_parsed[chistyakov_parsed['id'].str.startswith(prefix)]
    polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
    has_overlap, pair = check_lb_overlaps(polygons)
    if has_overlap:
        overlap_configs.append(n)

print(f'Configs with LB overlaps: {len(overlap_configs)}')
if overlap_configs:
    print(f'Overlap configs: {overlap_configs}')
else:
    print('NO OVERLAPS! chistyakov_best.csv should pass LB validation.')

Checking for LB overlaps (intersects and not touches)...


Configs with LB overlaps: 0
NO OVERLAPS! chistyakov_best.csv should pass LB validation.


In [7]:
# Also check minimum distance between trees to understand separation
print('\nChecking minimum distances between trees...')

def get_min_distance(polygons):
    """Get minimum distance between any two polygons."""
    min_dist = float('inf')
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            dist = polygons[i].distance(polygons[j])
            if dist < min_dist:
                min_dist = dist
    return min_dist

# Check a few configs
for n in [5, 10, 50, 100, 200]:
    prefix = f'{n:03d}_'
    config_df = chistyakov_parsed[chistyakov_parsed['id'].str.startswith(prefix)]
    polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
    min_dist = get_min_distance(polygons)
    print(f'N={n}: min_dist = {min_dist:.2e}')


Checking minimum distances between trees...
N=5: min_dist = 1.47e-14
N=10: min_dist = 3.77e-13
N=50: min_dist = 1.84e-12
N=100: min_dist = 1.08e-13


N=200: min_dist = 8.58e-14


In [8]:
# Copy chistyakov_best.csv as our submission
import os
import shutil

os.makedirs('/home/submission', exist_ok=True)
shutil.copy('/home/code/preoptimized/chistyakov_best.csv', '/home/submission/submission.csv')
shutil.copy('/home/code/preoptimized/chistyakov_best.csv', '/home/code/experiments/004_chistyakov_baseline/submission.csv')

print('Submission saved!')
print(f'Score: {chistyakov_score:.6f}')
print(f'Target: 68.896973')
print(f'Gap: {chistyakov_score - 68.896973:.6f}')

Submission saved!
Score: 70.926150
Target: 68.896973
Gap: 2.029177


In [9]:
# Summary
print('\n' + '='*60)
print('EXPERIMENT 004 SUMMARY')
print('='*60)
print(f'Using: chistyakov_best.csv')
print(f'Score: {chistyakov_score:.6f}')
print(f'LB overlaps: {len(overlap_configs)}')
print(f'Target: 68.896973')
print(f'Gap: {chistyakov_score - 68.896973:.6f}')
print('='*60)
print('\nThis submission should pass LB validation.')
print('If it passes, we can create a hybrid to improve score.')


EXPERIMENT 004 SUMMARY
Using: chistyakov_best.csv
Score: 70.926150
LB overlaps: 0
Target: 68.896973
Gap: 2.029177

This submission should pass LB validation.
If it passes, we can create a hybrid to improve score.


In [10]:
# Now let's try to create a hybrid that improves on chistyakov_best.csv
# by using better configs from other sources where they are VALID

print('Creating hybrid submission...')

# Load all preoptimized submissions
submissions = {
    'chistyakov_best.csv': chistyakov_parsed,
}

# Load others
for filename in ['best_snapshot.csv', 'better_ensemble.csv', 'saspav_best.csv', 'bucket_of_chump.csv', 'ensemble_70_627.csv']:
    try:
        df = pd.read_csv(f'/home/code/preoptimized/{filename}')
        submissions[filename] = parse_submission(df)
    except Exception as e:
        print(f'Error loading {filename}: {e}')

print(f'Loaded {len(submissions)} submissions')

Creating hybrid submission...
Loaded 6 submissions


In [11]:
# For each N, find the best VALID configuration (using LB overlap check)
print('Finding best valid configuration for each N...')

best_configs = {}
best_sources = {}
best_sides = {}

for n in range(1, 201):
    prefix = f'{n:03d}_'
    best_side = float('inf')
    best_config = None
    best_source = None
    
    for filename, df in submissions.items():
        config_df = df[df['id'].str.startswith(prefix)].copy()
        
        if len(config_df) != n:
            continue
        
        # Create polygons and check LB overlaps
        polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
        has_overlap, _ = check_lb_overlaps(polygons)
        
        if not has_overlap:
            side = get_bounding_box_side(polygons)
            if side < best_side:
                best_side = side
                best_config = config_df
                best_source = filename
    
    if best_config is not None:
        best_configs[n] = best_config
        best_sources[n] = best_source
        best_sides[n] = best_side
    else:
        print(f'WARNING: No valid config found for N={n}')

print(f'Found valid configs for {len(best_configs)} out of 200 N values')

Finding best valid configuration for each N...


Found valid configs for 200 out of 200 N values


In [12]:
# Build the hybrid ensemble
print('Building hybrid ensemble...')

ensemble_data = []
for n in range(1, 201):
    if n in best_configs:
        for _, row in best_configs[n].iterrows():
            ensemble_data.append({
                'id': row['id'],
                'x': row['x'],
                'y': row['y'],
                'deg': row['deg']
            })

hybrid_ensemble = pd.DataFrame(ensemble_data)
print(f'Hybrid ensemble shape: {hybrid_ensemble.shape}')

# Calculate score
hybrid_score = sum((best_sides[n] ** 2) / n for n in best_sides)
print(f'Hybrid ensemble score: {hybrid_score:.6f}')
print(f'Improvement over chistyakov: {chistyakov_score - hybrid_score:.6f}')

Building hybrid ensemble...


Hybrid ensemble shape: (20100, 4)
Hybrid ensemble score: 70.627589
Improvement over chistyakov: 0.298561


In [13]:
# Source breakdown
print('Source breakdown for hybrid ensemble:')
from collections import Counter
source_counts = Counter(best_sources.values())
for source, count in source_counts.most_common():
    print(f'{source}: {count} configs')

Source breakdown for hybrid ensemble:
best_snapshot.csv: 148 configs
ensemble_70_627.csv: 34 configs
better_ensemble.csv: 9 configs
bucket_of_chump.csv: 8 configs
chistyakov_best.csv: 1 configs


In [14]:
# Final verification with LB overlap check
print('Final LB overlap verification...')
final_overlaps = 0
for n in range(1, 201):
    prefix = f'{n:03d}_'
    config_df = hybrid_ensemble[hybrid_ensemble['id'].str.startswith(prefix)]
    polygons = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in config_df.iterrows()]
    has_overlap, pair = check_lb_overlaps(polygons)
    if has_overlap:
        final_overlaps += 1
        print(f'N={n}: overlap at {pair}')

if final_overlaps == 0:
    print('NO LB OVERLAPS! Hybrid should pass LB validation.')
else:
    print(f'Found {final_overlaps} configs with LB overlaps')

Final LB overlap verification...


NO LB OVERLAPS! Hybrid should pass LB validation.


In [15]:
# Save the hybrid ensemble if it's better than chistyakov
if hybrid_score < chistyakov_score and final_overlaps == 0:
    print('Hybrid is better! Saving...')
    
    # Format with 's' prefix
    submission = hybrid_ensemble.copy()
    for col in ['x', 'y', 'deg']:
        submission[col] = 's' + submission[col].apply(lambda x: f'{x:.15f}')
    
    submission.to_csv('/home/submission/submission.csv', index=False)
    submission.to_csv('/home/code/experiments/004_chistyakov_baseline/submission.csv', index=False)
    
    print(f'Saved hybrid with score: {hybrid_score:.6f}')
    final_score = hybrid_score
else:
    print('Keeping chistyakov_best.csv as submission')
    final_score = chistyakov_score

print(f'Final score: {final_score:.6f}')
print(f'Target: 68.896973')
print(f'Gap: {final_score - 68.896973:.6f}')

Hybrid is better! Saving...
Saved hybrid with score: 70.627589
Final score: 70.627589
Target: 68.896973
Gap: 1.730616
