# Loop 20 Strategic Analysis

## Situation
- Best valid score: 70.627634 (snapshot 21191207951)
- Target: 68.919154
- Gap: 1.708 (2.48%)
- Experiments: 21, all converging to ~70.63

## Key Insight
The INVALID snapshot 21145966992 has score 70.572798 (0.055 better than best valid).
This proves there IS room for improvement - we just need to find VALID configurations in that region.

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
import os

TREE_TEMPLATE = [
    (0.0, 0.8), (0.125, 0.5), (0.0625, 0.5), (0.2, 0.25), (0.1, 0.25),
    (0.35, 0.0), (0.075, 0.0), (0.075, -0.2), (-0.075, -0.2), (-0.075, 0.0),
    (-0.35, 0.0), (-0.1, 0.25), (-0.2, 0.25), (-0.0625, 0.5), (-0.125, 0.5)
]

def parse_s_value(val):
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

def create_tree_polygon(x, y, angle):
    tree = Polygon(TREE_TEMPLATE)
    tree = rotate(tree, angle, origin=(0, 0), use_radians=False)
    tree = translate(tree, x, y)
    return tree

def check_overlap(tree1, tree2):
    return tree1.overlaps(tree2) or tree1.contains(tree2) or tree2.contains(tree1)

print("Functions defined")

In [None]:
# Analyze the INVALID snapshot to understand WHERE the overlaps are
invalid_path = '/home/nonroot/snapshots/santa-2025/21145966992/submission/submission.csv'
valid_path = '/home/nonroot/snapshots/santa-2025/21191207951/submission/submission.csv'

df_invalid = pd.read_csv(invalid_path)
df_invalid['x'] = df_invalid['x'].apply(parse_s_value)
df_invalid['y'] = df_invalid['y'].apply(parse_s_value)
df_invalid['deg'] = df_invalid['deg'].apply(parse_s_value)
df_invalid['n'] = df_invalid['id'].apply(lambda x: int(x.split('_')[0]))

df_valid = pd.read_csv(valid_path)
df_valid['x'] = df_valid['x'].apply(parse_s_value)
df_valid['y'] = df_valid['y'].apply(parse_s_value)
df_valid['deg'] = df_valid['deg'].apply(parse_s_value)
df_valid['n'] = df_valid['id'].apply(lambda x: int(x.split('_')[0]))

print(f"Invalid snapshot: {len(df_invalid)} rows")
print(f"Valid snapshot: {len(df_valid)} rows")

In [None]:
# Find which N values have overlaps in the invalid snapshot
overlap_ns = []
for n in range(1, 201):
    group = df_invalid[df_invalid['n'] == n]
    trees = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in group.iterrows()]
    
    has_overlap = False
    for i in range(len(trees)):
        for j in range(i+1, len(trees)):
            if check_overlap(trees[i], trees[j]):
                has_overlap = True
                break
        if has_overlap:
            break
    
    if has_overlap:
        overlap_ns.append(n)

print(f"N values with overlaps: {len(overlap_ns)}")
print(f"Overlap N values: {overlap_ns[:20]}..." if len(overlap_ns) > 20 else f"Overlap N values: {overlap_ns}")

In [None]:
# Compare scores per N between invalid and valid
def get_score_per_n(df):
    scores = {}
    for n in range(1, 201):
        group = df[df['n'] == n]
        trees = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in group.iterrows()]
        all_x, all_y = [], []
        for tree in trees:
            minx, miny, maxx, maxy = tree.bounds
            all_x.extend([minx, maxx])
            all_y.extend([miny, maxy])
        side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
        scores[n] = (side ** 2) / n
    return scores

invalid_scores = get_score_per_n(df_invalid)
valid_scores = get_score_per_n(df_valid)

print(f"Invalid total: {sum(invalid_scores.values()):.6f}")
print(f"Valid total: {sum(valid_scores.values()):.6f}")
print(f"Difference: {sum(valid_scores.values()) - sum(invalid_scores.values()):.6f}")

In [None]:
# Find N values where invalid is BETTER than valid (and NOT overlapping)
better_ns = []
for n in range(1, 201):
    if n not in overlap_ns and invalid_scores[n] < valid_scores[n]:
        improvement = valid_scores[n] - invalid_scores[n]
        better_ns.append((n, improvement, invalid_scores[n], valid_scores[n]))

better_ns.sort(key=lambda x: -x[1])  # Sort by improvement

print(f"N values where invalid is better AND valid (no overlap): {len(better_ns)}")
print("\nTop 20 improvements:")
for n, imp, inv_score, val_score in better_ns[:20]:
    print(f"  N={n}: improvement={imp:.6f} (invalid={inv_score:.6f}, valid={val_score:.6f})")

In [None]:
# Create a hybrid: use invalid where it's better AND valid, use valid otherwise
hybrid_scores = {}
for n in range(1, 201):
    if n not in overlap_ns and invalid_scores[n] < valid_scores[n]:
        hybrid_scores[n] = invalid_scores[n]
    else:
        hybrid_scores[n] = valid_scores[n]

print(f"Hybrid total: {sum(hybrid_scores.values()):.6f}")
print(f"Valid total: {sum(valid_scores.values()):.6f}")
print(f"Improvement: {sum(valid_scores.values()) - sum(hybrid_scores.values()):.6f}")

In [None]:
# Create the hybrid submission
hybrid_rows = []
for n in range(1, 201):
    if n not in overlap_ns and invalid_scores[n] < valid_scores[n]:
        # Use invalid
        group = df_invalid[df_invalid['n'] == n]
    else:
        # Use valid
        group = df_valid[df_valid['n'] == n]
    
    for _, row in group.iterrows():
        hybrid_rows.append({
            'id': row['id'],
            'x': f"s{row['x']}",
            'y': f"s{row['y']}",
            'deg': f"s{row['deg']}"
        })

hybrid_df = pd.DataFrame(hybrid_rows)
hybrid_df.to_csv('/home/submission/submission.csv', index=False)
print(f"Saved hybrid submission with {len(hybrid_df)} rows")

In [None]:
# Verify the hybrid submission
df_hybrid = pd.read_csv('/home/submission/submission.csv')
df_hybrid['x'] = df_hybrid['x'].apply(parse_s_value)
df_hybrid['y'] = df_hybrid['y'].apply(parse_s_value)
df_hybrid['deg'] = df_hybrid['deg'].apply(parse_s_value)
df_hybrid['n'] = df_hybrid['id'].apply(lambda x: int(x.split('_')[0]))

# Verify no overlaps
has_any_overlap = False
for n in range(1, 201):
    group = df_hybrid[df_hybrid['n'] == n]
    trees = [create_tree_polygon(row['x'], row['y'], row['deg']) for _, row in group.iterrows()]
    
    for i in range(len(trees)):
        for j in range(i+1, len(trees)):
            if check_overlap(trees[i], trees[j]):
                has_any_overlap = True
                print(f"OVERLAP at N={n}!")
                break
        if has_any_overlap:
            break

if not has_any_overlap:
    print("NO OVERLAPS - submission is valid!")

# Calculate final score
hybrid_total = sum(get_score_per_n(df_hybrid).values())
print(f"\nFinal hybrid score: {hybrid_total:.6f}")
print(f"Target: 68.919154")
print(f"Gap: {hybrid_total - 68.919154:.6f}")