# Loop 7 Analysis: Better Baseline Discovery

Key discovery: Found a better pre-optimized baseline in snapshots with score 70.659959 (vs our previous 70.675457)

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
import matplotlib.pyplot as plt

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
TREE_VERTICES = list(zip(TX, TY))

def parse_s_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def create_tree_polygon(x, y, deg):
    poly = Polygon(TREE_VERTICES)
    poly = affinity.rotate(poly, deg, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

def get_bounding_box_side(polygons):
    all_coords = []
    for poly in polygons:
        all_coords.extend(list(poly.exterior.coords))
    xs = [c[0] for c in all_coords]
    ys = [c[1] for c in all_coords]
    return max(max(xs) - min(xs), max(ys) - min(ys))

print('Functions defined')

Functions defined


In [2]:
# Load the new better baseline
df_new = pd.read_csv('/home/code/experiments/009_better_baseline/submission.csv')
df_new['x_val'] = df_new['x'].apply(parse_s_value)
df_new['y_val'] = df_new['y'].apply(parse_s_value)
df_new['deg_val'] = df_new['deg'].apply(parse_s_value)

# Load the old baseline for comparison
df_old = pd.read_csv('/home/code/experiments/006_corner_extraction/ensemble_best.csv')
df_old['x_val'] = df_old['x'].apply(parse_s_value)
df_old['y_val'] = df_old['y'].apply(parse_s_value)
df_old['deg_val'] = df_old['deg'].apply(parse_s_value)

print(f'New baseline rows: {len(df_new)}')
print(f'Old baseline rows: {len(df_old)}')

New baseline rows: 20100
Old baseline rows: 20100


In [3]:
# Compare scores per N
results = []
for n in range(1, 201):
    prefix = f'{n:03d}_'
    
    # New baseline
    group_new = df_new[df_new['id'].str.startswith(prefix)]
    polygons_new = [create_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) for _, row in group_new.iterrows()]
    side_new = get_bounding_box_side(polygons_new)
    score_new = side_new**2 / n
    
    # Old baseline
    group_old = df_old[df_old['id'].str.startswith(prefix)]
    polygons_old = [create_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) for _, row in group_old.iterrows()]
    side_old = get_bounding_box_side(polygons_old)
    score_old = side_old**2 / n
    
    results.append({
        'n': n,
        'score_new': score_new,
        'score_old': score_old,
        'improvement': score_old - score_new,
        'side_new': side_new,
        'side_old': side_old
    })

results_df = pd.DataFrame(results)
print(f'Total score new: {results_df["score_new"].sum():.6f}')
print(f'Total score old: {results_df["score_old"].sum():.6f}')
print(f'Total improvement: {results_df["improvement"].sum():.6f}')

Total score new: 70.659959
Total score old: 70.675457
Total improvement: 0.015498


In [4]:
# Show where the new baseline is better
improved = results_df[results_df['improvement'] > 0.0001].sort_values('improvement', ascending=False)
print(f'N values where new baseline is better: {len(improved)}')
print(improved.head(20))

N values where new baseline is better: 24
       n  score_new  score_old  improvement  side_new  side_old
56    57   0.354108   0.358045     0.003937  4.492679  4.517582
53    54   0.359200   0.360686     0.001486  4.404181  4.413282
100  101   0.349036   0.350389     0.001353  5.937390  5.948890
161  162   0.337058   0.338332     0.001274  7.389411  7.403365
73    74   0.353127   0.354139     0.001012  5.111891  5.119210
74    75   0.352898   0.353773     0.000875  5.144644  5.151018
122  123   0.347904   0.348717     0.000813  6.541576  6.549213
156  157   0.341371   0.341876     0.000505  7.320878  7.326288
186  187   0.340237   0.340604     0.000367  7.976487  7.980786
194  195   0.332617   0.332901     0.000284  8.053589  8.057024
140  141   0.343470   0.343724     0.000254  6.959112  6.961687
75    76   0.351372   0.351603     0.000231  5.167619  5.169315
142  143   0.341149   0.341362     0.000214  6.984571  6.986761
141  142   0.341148   0.341339     0.000192  6.960098  6.96205

In [5]:
# Calculate theoretical minimum and efficiency
theoretical_min = sum(0.33 / n for n in range(1, 201))  # Approximate minimum based on tree area
print(f'Theoretical minimum (rough): {theoretical_min:.2f}')
print(f'Current score: {results_df["score_new"].sum():.6f}')
print(f'Target score: 68.919154')
print(f'Gap to target: {results_df["score_new"].sum() - 68.919154:.6f}')
print(f'Gap percentage: {(results_df["score_new"].sum() - 68.919154) / 68.919154 * 100:.2f}%')

Theoretical minimum (rough): 1.94
Current score: 70.659959
Target score: 68.919154
Gap to target: 1.740805
Gap percentage: 2.53%


In [6]:
# Analyze efficiency per N
results_df['efficiency'] = np.sqrt(results_df['score_new'] * results_df['n']) / results_df['n']
results_df['theoretical_side'] = np.sqrt(0.33 * results_df['n'])  # Rough theoretical minimum side
results_df['side_ratio'] = results_df['side_new'] / results_df['theoretical_side']

# Find N values with worst efficiency (most room for improvement)
worst_efficiency = results_df.nsmallest(20, 'efficiency')
print('N values with worst efficiency (most room for improvement):')
print(worst_efficiency[['n', 'score_new', 'side_new', 'efficiency', 'side_ratio']])

N values with worst efficiency (most room for improvement):
       n  score_new  side_new  efficiency  side_ratio
199  200   0.337564  8.216619    0.041083    1.011396
198  199   0.338269  8.204610    0.041229    1.012452
195  196   0.333268  8.082114    0.041235    1.004940
197  198   0.337316  8.172431    0.041275    1.011025
196  197   0.335990  8.135730    0.041298    1.009036
194  195   0.332617  8.053589    0.041300    1.003957
193  194   0.332999  8.037531    0.041431    1.004534
192  193   0.333764  8.025987    0.041585    1.005687
191  192   0.335301  8.023575    0.041789    1.007999
190  191   0.336758  8.020026    0.041990    1.010188
189  190   0.338231  8.016482    0.042192    1.012395
188  189   0.338821  8.002326    0.042340    1.013277
187  188   0.339480  7.988883    0.042494    1.014262
181  182   0.329988  7.749694    0.042581    0.999981
186  187   0.340237  7.976487    0.042655    1.015392
180  181   0.329946  7.727887    0.042696    0.999918
184  185   0.338562  7

In [7]:
# Calculate potential improvement if we could match average efficiency
avg_efficiency = results_df['efficiency'].mean()
print(f'Average efficiency: {avg_efficiency:.4f}')

# If we improved worst N values to average efficiency
potential_improvement = 0
for _, row in results_df.iterrows():
    if row['efficiency'] < avg_efficiency:
        # Current score contribution
        current = row['score_new']
        # If we had average efficiency
        improved_side = avg_efficiency * row['n']
        improved_score = improved_side**2 / row['n']
        potential_improvement += current - improved_score

print(f'Potential improvement if worst N matched average: {potential_improvement:.4f}')

Average efficiency: 0.0819
Potential improvement if worst N matched average: -74.7503
