# Loop 3 Analysis - Fix Overlap in Group 004

Submission failed with 'Overlapping trees in group 004'. Need to investigate and fix.

In [1]:
import math
import numpy as np
import pandas as pd
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree

getcontext().prec = 50  # Higher precision
scale_factor = Decimal('1e15')

print('Libraries loaded')

Libraries loaded


In [2]:
# Load the failed submission
df = pd.read_csv('/home/submission/submission.csv')
print(f'Total rows: {len(df)}')

# Extract group 004
group_004 = df[df['id'].str.startswith('004_')]
print(f'\nGroup 004 rows: {len(group_004)}')
print(group_004)

Total rows: 20100

Group 004 rows: 4
      id                    x                    y                   deg
6  004_0  s-0.324747789589372   s0.132109978088185  s156.370622145636389
7  004_1   s0.315354346242638   s0.132109978063475  s156.370622269264089
8  004_2   s0.324747789592379  s-0.732109978069476  s336.370622269264004
9  004_3  s-0.315354348134818  s-0.732109978094186  s336.370622145636446


In [3]:
# Parse coordinates
def parse_coord(s):
    return float(str(s).replace('s', ''))

group_004_parsed = []
for _, row in group_004.iterrows():
    x = parse_coord(row['x'])
    y = parse_coord(row['y'])
    deg = parse_coord(row['deg'])
    group_004_parsed.append((x, y, deg))
    print(f"{row['id']}: x={x:.15f}, y={y:.15f}, deg={deg:.15f}")

print(f'\nParsed {len(group_004_parsed)} trees')

004_0: x=-0.324747789589372, y=0.132109978088185, deg=156.370622145636389
004_1: x=0.315354346242638, y=0.132109978063475, deg=156.370622269264089
004_2: x=0.324747789592379, y=-0.732109978069476, deg=336.370622269264004
004_3: x=-0.315354348134818, y=-0.732109978094186, deg=336.370622145636446

Parsed 4 trees


In [4]:
# Create tree polygon with high precision
class ChristmasTree:
    def __init__(self, center_x, center_y, angle):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))

        trunk_w = Decimal('0.15')
        trunk_h = Decimal('0.2')
        base_w = Decimal('0.7')
        mid_w = Decimal('0.4')
        top_w = Decimal('0.25')
        tip_y = Decimal('0.8')
        tier_1_y = Decimal('0.5')
        tier_2_y = Decimal('0.25')
        base_y = Decimal('0.0')
        trunk_bottom_y = -trunk_h

        initial_polygon = Polygon([
            (Decimal('0.0') * scale_factor, tip_y * scale_factor),
            (top_w / Decimal('2') * scale_factor, tier_1_y * scale_factor),
            (top_w / Decimal('4') * scale_factor, tier_1_y * scale_factor),
            (mid_w / Decimal('2') * scale_factor, tier_2_y * scale_factor),
            (mid_w / Decimal('4') * scale_factor, tier_2_y * scale_factor),
            (base_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, base_y * scale_factor),
            (trunk_w / Decimal('2') * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, trunk_bottom_y * scale_factor),
            (-(trunk_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(base_w / Decimal('2')) * scale_factor, base_y * scale_factor),
            (-(mid_w / Decimal('4')) * scale_factor, tier_2_y * scale_factor),
            (-(mid_w / Decimal('2')) * scale_factor, tier_2_y * scale_factor),
            (-(top_w / Decimal('4')) * scale_factor, tier_1_y * scale_factor),
            (-(top_w / Decimal('2')) * scale_factor, tier_1_y * scale_factor),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated,
                                          xoff=float(self.center_x * scale_factor),
                                          yoff=float(self.center_y * scale_factor))

def check_overlap_detailed(trees):
    """Check for overlaps and return details"""
    polygons = [t.polygon for t in trees]
    overlaps = []
    for i in range(len(polygons)):
        for j in range(i+1, len(polygons)):
            if polygons[i].intersects(polygons[j]):
                intersection = polygons[i].intersection(polygons[j])
                if not polygons[i].touches(polygons[j]):
                    overlaps.append((i, j, intersection.area))
    return overlaps

print('Functions defined')

Functions defined


In [5]:
# Check group 004 for overlaps
trees_004 = [ChristmasTree(x, y, deg) for x, y, deg in group_004_parsed]
overlaps = check_overlap_detailed(trees_004)

if overlaps:
    print(f'OVERLAPS FOUND in group 004:')
    for i, j, area in overlaps:
        print(f'  Trees {i} and {j}: intersection area = {area}')
else:
    print('No overlaps detected in group 004 with our validation')

OVERLAPS FOUND in group 004:
  Trees 1 and 2: intersection area = 0.005523681640625


In [6]:
# Let's check ALL groups for overlaps
print('Checking all groups for overlaps...')

df['N'] = df['id'].str.split('_').str[0].astype(int)

overlapping_groups = []
for n, group in df.groupby('N'):
    trees = []
    for _, row in group.iterrows():
        x = parse_coord(row['x'])
        y = parse_coord(row['y'])
        deg = parse_coord(row['deg'])
        trees.append(ChristmasTree(x, y, deg))
    
    overlaps = check_overlap_detailed(trees)
    if overlaps:
        overlapping_groups.append((n, overlaps))
        if n <= 10:  # Print details for small groups
            print(f'Group {n}: {len(overlaps)} overlaps')
            for i, j, area in overlaps:
                print(f'  Trees {i} and {j}: area = {area}')

print(f'\nTotal groups with overlaps: {len(overlapping_groups)}')
if overlapping_groups:
    print(f'Groups: {[g[0] for g in overlapping_groups][:20]}...')

Checking all groups for overlaps...
Group 4: 1 overlaps
  Trees 1 and 2: area = 0.005523681640625
Group 5: 1 overlaps
  Trees 2 and 3: area = 0.23046875
Group 9: 1 overlaps
  Trees 6 and 7: area = 0.001953125



Total groups with overlaps: 40
Groups: [4, 5, 9, 12, 17, 20, 21, 23, 30, 31, 35, 40, 42, 45, 48, 49, 52, 53, 59, 66]...


In [7]:
# List all pre-optimized sources
import os

preopt_dir = '/home/code/preoptimized/'
files = [f for f in os.listdir(preopt_dir) if f.endswith('.csv')]
print(f'Available pre-optimized files: {files}')

Available pre-optimized files: ['ensemble.csv', 'saspav.csv', 'best_ensemble.csv', 'submission_70_926149550346.csv', 'best_public.csv', '71.97.csv', 'corner_extraction.csv']


In [8]:
# Check each pre-optimized file for overlaps
def load_and_check_overlaps(filepath):
    """Load a submission and check all groups for overlaps"""
    df = pd.read_csv(filepath)
    df['N'] = df['id'].str.split('_').str[0].astype(int)
    
    overlapping_groups = []
    for n, group in df.groupby('N'):
        trees = []
        for _, row in group.iterrows():
            x = parse_coord(row['x'])
            y = parse_coord(row['y'])
            deg = parse_coord(row['deg'])
            trees.append(ChristmasTree(x, y, deg))
        
        overlaps = check_overlap_detailed(trees)
        if overlaps:
            overlapping_groups.append(n)
    
    return overlapping_groups

print("Checking each pre-optimized file for overlaps...")
for f in files:
    filepath = os.path.join(preopt_dir, f)
    overlaps = load_and_check_overlaps(filepath)
    print(f"{f}: {len(overlaps)} groups with overlaps")

Checking each pre-optimized file for overlaps...


ensemble.csv: 13 groups with overlaps


saspav.csv: 12 groups with overlaps


best_ensemble.csv: 12 groups with overlaps


submission_70_926149550346.csv: 0 groups with overlaps


best_public.csv: 0 groups with overlaps


71.97.csv: 1 groups with overlaps


corner_extraction.csv: 7 groups with overlaps


In [9]:
# Calculate scores for the valid submissions
from numba import njit

@njit
def make_polygon_template():
    tw=0.15; th=0.2; bw=0.7; mw=0.4; ow=0.25
    tip=0.8; t1=0.5; t2=0.25; base=0.0; tbot=-th
    x = np.array([0,ow/2,ow/4,mw/2,mw/4,bw/2,tw/2,tw/2,-tw/2,-tw/2,-bw/2,-mw/4,-mw/2,-ow/4,-ow/2], np.float64)
    y = np.array([tip,t1,t1,t2,t2,base,base,tbot,tbot,base,base,t2,t2,t1,t1], np.float64)
    return x, y

@njit
def score_group(xs, ys, degs, tx, ty):
    n = xs.size
    V = tx.size
    mnx = 1e300; mny = 1e300; mxx = -1e300; mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c = math.cos(r); s = math.sin(r)
        xi = xs[i]; yi = ys[i]
        for j in range(V):
            X = c*tx[j] - s*ty[j] + xi
            Y = s*tx[j] + c*ty[j] + yi
            if X < mnx: mnx = X
            if X > mxx: mxx = X
            if Y < mny: mny = Y
            if Y > mxy: mxy = Y
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

tx, ty = make_polygon_template()
_ = score_group(np.array([0.0]), np.array([0.0]), np.array([45.0]), tx, ty)
print('JIT compiled')

JIT compiled


In [10]:
# Load and score valid submissions
def load_submission(filepath):
    df = pd.read_csv(filepath)
    df['N'] = df['id'].str.split('_').str[0].astype(int)
    configs = {}
    for n, g in df.groupby('N'):
        xs = np.array([parse_coord(v) for v in g['x']], np.float64)
        ys = np.array([parse_coord(v) for v in g['y']], np.float64)
        degs = np.array([parse_coord(v) for v in g['deg']], np.float64)
        configs[n] = (xs, ys, degs)
    return configs

def calculate_total_score(configs, tx, ty):
    total = 0.0
    for n, (xs, ys, degs) in configs.items():
        total += score_group(xs, ys, degs, tx, ty)
    return total

# Score the valid submissions
valid_files = ['submission_70_926149550346.csv', 'best_public.csv']
for f in valid_files:
    filepath = os.path.join(preopt_dir, f)
    configs = load_submission(filepath)
    score = calculate_total_score(configs, tx, ty)
    print(f"{f}: score = {score:.6f}")

submission_70_926149550346.csv: score = 70.926150
best_public.csv: score = 70.926150


In [11]:
# Create best valid ensemble - for each N, take the best valid config from all sources
print("Creating best valid ensemble...")

# Load all sources
all_sources = {}
for f in files:
    filepath = os.path.join(preopt_dir, f)
    all_sources[f] = load_submission(filepath)

# For each N, find the best valid config
best_valid_configs = {}
for n in range(1, 201):
    best_score = float('inf')
    best_source = None
    best_config = None
    
    for source_name, configs in all_sources.items():
        xs, ys, degs = configs[n]
        
        # Check if valid (no overlaps)
        trees = [ChristmasTree(xs[i], ys[i], degs[i]) for i in range(n)]
        overlaps = check_overlap_detailed(trees)
        
        if not overlaps:  # Valid config
            score = score_group(xs, ys, degs, tx, ty)
            if score < best_score:
                best_score = score
                best_source = source_name
                best_config = (xs, ys, degs)
    
    if best_config is None:
        print(f"WARNING: No valid config found for N={n}!")
    else:
        best_valid_configs[n] = best_config
        if n <= 10 or n % 50 == 0:
            print(f"N={n}: best from {best_source}, score={best_score:.6f}")

# Calculate total score
total_score = calculate_total_score(best_valid_configs, tx, ty)
print(f"\nTotal score: {total_score:.6f}")
print(f"Target: 68.922808")
print(f"Gap: {total_score - 68.922808:.6f}")

Creating best valid ensemble...


N=1: best from ensemble.csv, score=0.661250
N=2: best from ensemble.csv, score=0.450779
N=3: best from ensemble.csv, score=0.434745
N=4: best from ensemble.csv, score=0.416545
N=5: best from saspav.csv, score=0.416850
N=6: best from ensemble.csv, score=0.399610
N=7: best from ensemble.csv, score=0.399897
N=8: best from ensemble.csv, score=0.385407
N=9: best from ensemble.csv, score=0.387415
N=10: best from ensemble.csv, score=0.376630


N=50: best from ensemble.csv, score=0.360753


N=100: best from ensemble.csv, score=0.345531


N=150: best from ensemble.csv, score=0.337065


N=200: best from ensemble.csv, score=0.337731

Total score: 70.676764
Target: 68.922808
Gap: 1.753956


In [12]:
# Verify the ensemble has no overlaps
print("Verifying ensemble has no overlaps...")
overlapping = []
for n, (xs, ys, degs) in best_valid_configs.items():
    trees = [ChristmasTree(xs[i], ys[i], degs[i]) for i in range(n)]
    overlaps = check_overlap_detailed(trees)
    if overlaps:
        overlapping.append(n)

if overlapping:
    print(f"ERROR: {len(overlapping)} groups still have overlaps: {overlapping}")
else:
    print("SUCCESS: All 200 configurations are valid (no overlaps)!")

Verifying ensemble has no overlaps...


SUCCESS: All 200 configurations are valid (no overlaps)!


In [13]:
# Save the valid ensemble submission
print("Saving valid ensemble submission...")

rows = []
for n in range(1, 201):
    xs, ys, degs = best_valid_configs[n]
    for i in range(n):
        rows.append({
            'id': f'{n:03d}_{i}',
            'x': f's{xs[i]:.15f}',
            'y': f's{ys[i]:.15f}',
            'deg': f's{degs[i]:.15f}'
        })

submission = pd.DataFrame(rows)
submission.to_csv('/home/submission/submission.csv', index=False)

# Also save to experiment folder
import os
os.makedirs('/home/code/experiments/008_valid_ensemble', exist_ok=True)
submission.to_csv('/home/code/experiments/008_valid_ensemble/submission.csv', index=False)

print(f"Submission saved!")
print(f"Total rows: {len(submission)}")
print(f"Score: {total_score:.6f}")
print(f"Target: 68.922808")
print(f"Gap: {total_score - 68.922808:.6f}")

Saving valid ensemble submission...
Submission saved!
Total rows: 20100
Score: 70.676764
Target: 68.922808
Gap: 1.753956
