# Loop 7 Analysis: Better Baseline Discovery

Key discovery: Found a better pre-optimized baseline in snapshots with score 70.659959 (vs our previous 70.675457)

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
import matplotlib.pyplot as plt

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
TREE_VERTICES = list(zip(TX, TY))

def parse_s_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def create_tree_polygon(x, y, deg):
    poly = Polygon(TREE_VERTICES)
    poly = affinity.rotate(poly, deg, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

def get_bounding_box_side(polygons):
    all_coords = []
    for poly in polygons:
        all_coords.extend(list(poly.exterior.coords))
    xs = [c[0] for c in all_coords]
    ys = [c[1] for c in all_coords]
    return max(max(xs) - min(xs), max(ys) - min(ys))

print('Functions defined')

In [None]:
# Load the new better baseline
df_new = pd.read_csv('/home/code/experiments/009_better_baseline/submission.csv')
df_new['x_val'] = df_new['x'].apply(parse_s_value)
df_new['y_val'] = df_new['y'].apply(parse_s_value)
df_new['deg_val'] = df_new['deg'].apply(parse_s_value)

# Load the old baseline for comparison
df_old = pd.read_csv('/home/code/experiments/006_corner_extraction/ensemble_best.csv')
df_old['x_val'] = df_old['x'].apply(parse_s_value)
df_old['y_val'] = df_old['y'].apply(parse_s_value)
df_old['deg_val'] = df_old['deg'].apply(parse_s_value)

print(f'New baseline rows: {len(df_new)}')
print(f'Old baseline rows: {len(df_old)}')

In [None]:
# Compare scores per N
results = []
for n in range(1, 201):
    prefix = f'{n:03d}_'
    
    # New baseline
    group_new = df_new[df_new['id'].str.startswith(prefix)]
    polygons_new = [create_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) for _, row in group_new.iterrows()]
    side_new = get_bounding_box_side(polygons_new)
    score_new = side_new**2 / n
    
    # Old baseline
    group_old = df_old[df_old['id'].str.startswith(prefix)]
    polygons_old = [create_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) for _, row in group_old.iterrows()]
    side_old = get_bounding_box_side(polygons_old)
    score_old = side_old**2 / n
    
    results.append({
        'n': n,
        'score_new': score_new,
        'score_old': score_old,
        'improvement': score_old - score_new,
        'side_new': side_new,
        'side_old': side_old
    })

results_df = pd.DataFrame(results)
print(f'Total score new: {results_df["score_new"].sum():.6f}')
print(f'Total score old: {results_df["score_old"].sum():.6f}')
print(f'Total improvement: {results_df["improvement"].sum():.6f}')

In [None]:
# Show where the new baseline is better
improved = results_df[results_df['improvement'] > 0.0001].sort_values('improvement', ascending=False)
print(f'N values where new baseline is better: {len(improved)}')
print(improved.head(20))

In [None]:
# Calculate theoretical minimum and efficiency
theoretical_min = sum(0.33 / n for n in range(1, 201))  # Approximate minimum based on tree area
print(f'Theoretical minimum (rough): {theoretical_min:.2f}')
print(f'Current score: {results_df["score_new"].sum():.6f}')
print(f'Target score: 68.919154')
print(f'Gap to target: {results_df["score_new"].sum() - 68.919154:.6f}')
print(f'Gap percentage: {(results_df["score_new"].sum() - 68.919154) / 68.919154 * 100:.2f}%')

In [None]:
# Analyze efficiency per N
results_df['efficiency'] = np.sqrt(results_df['score_new'] * results_df['n']) / results_df['n']
results_df['theoretical_side'] = np.sqrt(0.33 * results_df['n'])  # Rough theoretical minimum side
results_df['side_ratio'] = results_df['side_new'] / results_df['theoretical_side']

# Find N values with worst efficiency (most room for improvement)
worst_efficiency = results_df.nsmallest(20, 'efficiency')
print('N values with worst efficiency (most room for improvement):')
print(worst_efficiency[['n', 'score_new', 'side_new', 'efficiency', 'side_ratio']])

In [None]:
# Calculate potential improvement if we could match average efficiency
avg_efficiency = results_df['efficiency'].mean()
print(f'Average efficiency: {avg_efficiency:.4f}')

# If we improved worst N values to average efficiency
potential_improvement = 0
for _, row in results_df.iterrows():
    if row['efficiency'] < avg_efficiency:
        # Current score contribution
        current = row['score_new']
        # If we had average efficiency
        improved_side = avg_efficiency * row['n']
        improved_score = improved_side**2 / row['n']
        potential_improvement += current - improved_score

print(f'Potential improvement if worst N matched average: {potential_improvement:.4f}')