# Baseline Experiment - Pre-optimized Submission

Using the best pre-optimized submission from previous runs as baseline.

In [1]:
import pandas as pd
import numpy as np
from decimal import Decimal, getcontext
from shapely import affinity
from shapely.geometry import Polygon
from shapely.strtree import STRtree
import json

getcontext().prec = 30

# Load submission
submission_path = '/home/code/experiments/001_baseline/submission.csv'
df = pd.read_csv(submission_path)
print(f"Submission shape: {df.shape}")
print(f"Expected rows: 20100 (1+2+...+200)")
print(f"Columns: {df.columns.tolist()}")
print(df.head(10))

Submission shape: (20100, 4)
Expected rows: 20100 (1+2+...+200)
Columns: ['id', 'x', 'y', 'deg']
      id                     x                    y                   deg
0  001_0  s-48.196086194214246  s58.770984615214225   s45.000000000000000
1  002_0    s0.154097069621356  s-0.038540742694795  s203.629377730656842
2  002_1   s-0.154097069621373  s-0.561459257305224   s23.629377730656792
3  003_0    s1.123655816140301   s0.781101815992563  s111.125132292892999
4  003_1    s1.234055695842160   s1.275999500663759   s66.370622269343002
5  003_2    s0.641714640229075   s1.180458566613381  s155.134051937100821
6  004_0   s-0.324747789589372   s0.132109978088185  s156.370622145636389
7  004_1    s0.315354346242638   s0.132109978063475  s156.370622269264089
8  004_2    s0.324747789592379  s-0.732109978069476  s336.370622269264004
9  004_3   s-0.315354348134818  s-0.732109978094186  s336.370622145636446


In [2]:
# Christmas Tree class
class ChristmasTree:
    def __init__(self, center_x='0', center_y='0', angle='0'):
        self.center_x = Decimal(str(center_x))
        self.center_y = Decimal(str(center_y))
        self.angle = Decimal(str(angle))
        
        initial_polygon = Polygon([
            (0.0, 0.8),      # Tip
            (0.125, 0.5),    # Right top tier
            (0.0625, 0.5),
            (0.2, 0.25),     # Right mid tier
            (0.1, 0.25),
            (0.35, 0.0),     # Right base
            (0.075, 0.0),    # Right trunk
            (0.075, -0.2),
            (-0.075, -0.2),  # Left trunk
            (-0.075, 0.0),
            (-0.35, 0.0),    # Left base
            (-0.1, 0.25),    # Left mid tier
            (-0.2, 0.25),
            (-0.0625, 0.5),  # Left top tier
            (-0.125, 0.5),
        ])
        rotated = affinity.rotate(initial_polygon, float(self.angle), origin=(0, 0))
        self.polygon = affinity.translate(rotated, xoff=float(self.center_x), yoff=float(self.center_y))

def parse_value(val):
    """Parse submission value (may have 's' prefix)"""
    if isinstance(val, str) and val.startswith('s'):
        return val[1:]
    return str(val)

def load_trees_for_n(df, n):
    """Load all trees for configuration n"""
    prefix = f"{n:03d}_"
    rows = df[df['id'].str.startswith(prefix)]
    trees = []
    for _, row in rows.iterrows():
        x = parse_value(row['x'])
        y = parse_value(row['y'])
        deg = parse_value(row['deg'])
        trees.append(ChristmasTree(x, y, deg))
    return trees

print("ChristmasTree class defined")

ChristmasTree class defined


In [3]:
# Overlap detection
def has_overlap(trees):
    """Check if any trees overlap"""
    if len(trees) <= 1:
        return False
    polygons = [t.polygon for t in trees]
    tree_index = STRtree(polygons)
    
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx != i and poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                return True
    return False

def get_bounding_box_side(trees):
    """Get the side length of the bounding square"""
    all_points = []
    for tree in trees:
        coords = np.array(tree.polygon.exterior.coords)
        all_points.append(coords)
    all_points = np.vstack(all_points)
    
    min_x, min_y = all_points.min(axis=0)
    max_x, max_y = all_points.max(axis=0)
    
    return max(max_x - min_x, max_y - min_y)

print("Helper functions defined")

Helper functions defined


In [4]:
# Calculate total score
def calculate_score(df):
    """Calculate total score for submission"""
    total_score = 0
    scores_by_n = []
    
    for n in range(1, 201):
        trees = load_trees_for_n(df, n)
        if len(trees) != n:
            print(f"Warning: N={n} has {len(trees)} trees instead of {n}")
            continue
        
        side = get_bounding_box_side(trees)
        contribution = (side ** 2) / n
        total_score += contribution
        scores_by_n.append({
            'n': n,
            'side': side,
            'contribution': contribution,
            'trees': len(trees)
        })
    
    return total_score, scores_by_n

print("Calculating score...")
total_score, scores_by_n = calculate_score(df)
print(f"\nTotal Score: {total_score:.6f}")
print(f"Target Score: 68.919154")
print(f"Gap: {total_score - 68.919154:.6f} ({(total_score - 68.919154) / 68.919154 * 100:.2f}%)")


Calculating score...



Total Score: 70.676102
Target Score: 68.919154
Gap: 1.756948 (2.55%)


In [5]:
# Analyze score contributions
scores_df = pd.DataFrame(scores_by_n)
print("\nTop 10 highest score contributions:")
print(scores_df.nlargest(10, 'contribution')[['n', 'side', 'contribution']])

print("\nScore contribution by N range:")
for start, end in [(1, 10), (11, 50), (51, 100), (101, 150), (151, 200)]:
    subset = scores_df[(scores_df['n'] >= start) & (scores_df['n'] <= end)]
    print(f"  N={start}-{end}: {subset['contribution'].sum():.4f}")


Top 10 highest score contributions:
     n      side  contribution
0    1  0.813173      0.661250
1    2  0.949504      0.450779
2    3  1.142031      0.434745
4    5  1.443692      0.416850
3    4  1.290806      0.416545
6    7  1.673104      0.399897
5    6  1.548438      0.399610
8    9  1.867280      0.387415
7    8  1.755921      0.385407
14  15  2.384962      0.379203

Score contribution by N range:
  N=1-10: 4.3291
  N=11-50: 14.7130
  N=51-100: 17.6411
  N=101-150: 17.1441
  N=151-200: 16.8487


In [6]:
# Check for overlaps (sample a few configurations)
print("Checking for overlaps in sample configurations...")
overlap_found = False
for n in [1, 5, 10, 50, 100, 150, 200]:
    trees = load_trees_for_n(df, n)
    if has_overlap(trees):
        print(f"  N={n}: OVERLAP DETECTED!")
        overlap_found = True
    else:
        print(f"  N={n}: OK")

if not overlap_found:
    print("\nNo overlaps detected in sampled configurations.")

Checking for overlaps in sample configurations...
  N=1: OK
  N=5: OVERLAP DETECTED!
  N=10: OK
  N=50: OK
  N=100: OK
  N=150: OK
  N=200: OK


In [7]:
# Save submission to /home/submission/
import shutil
shutil.copy(submission_path, '/home/submission/submission.csv')
print("Submission copied to /home/submission/submission.csv")

# Save metrics
metrics = {'cv_score': total_score}
with open('/home/code/experiments/001_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f)
print(f"Metrics saved: {metrics}")

Submission copied to /home/submission/submission.csv
Metrics saved: {'cv_score': 70.67610239461072}


In [None]:
# Investigate the N=5 overlap issue
trees_5 = load_trees_for_n(df, 5)
print(f"N=5 has {len(trees_5)} trees")

# Check each pair for overlap
from itertools import combinations
for i, j in combinations(range(len(trees_5)), 2):
    poly_i = trees_5[i].polygon
    poly_j = trees_5[j].polygon
    if poly_i.intersects(poly_j) and not poly_i.touches(poly_j):
        intersection = poly_i.intersection(poly_j)
        print(f"Trees {i} and {j} overlap!")
        print(f"  Intersection area: {intersection.area}")
        print(f"  Tree {i} area: {poly_i.area}")
        print(f"  Tree {j} area: {poly_j.area}")