# Loop 4 Analysis: Understanding the Gap to Target

**Current best:** 70.675478
**Target:** 68.919154
**Gap:** 1.756 points (2.55%)

We've achieved the first real improvement (0.000624) but the gap is still huge. Let's analyze what's possible.

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely import affinity
import matplotlib.pyplot as plt

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
TREE_VERTICES = list(zip(TX, TY))

def parse_s_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def create_tree_polygon(x, y, deg):
    poly = Polygon(TREE_VERTICES)
    poly = affinity.rotate(poly, deg, origin=(0, 0))
    poly = affinity.translate(poly, x, y)
    return poly

def get_bounding_box_side(polygons):
    if not polygons:
        return 0
    all_coords = []
    for poly in polygons:
        all_coords.extend(list(poly.exterior.coords))
    xs = [c[0] for c in all_coords]
    ys = [c[1] for c in all_coords]
    return max(max(xs) - min(xs), max(ys) - min(ys))

# Load current best solution
df = pd.read_csv('/home/code/experiments/005_long_optimization/submission.csv')
df['x_val'] = df['x'].apply(parse_s_value)
df['y_val'] = df['y'].apply(parse_s_value)
df['deg_val'] = df['deg'].apply(parse_s_value)

print(f"Loaded {len(df)} rows")

In [None]:
# Calculate per-N scores and analyze where improvement is possible
results = []
for n in range(1, 201):
    prefix = f'{n:03d}_'
    group = df[df['id'].str.startswith(prefix)]
    polygons = [create_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) for _, row in group.iterrows()]
    side = get_bounding_box_side(polygons)
    score = side**2 / n
    
    # Calculate tree area
    tree_area = Polygon(TREE_VERTICES).area
    total_tree_area = tree_area * n
    box_area = side**2
    efficiency = total_tree_area / box_area if box_area > 0 else 0
    
    results.append({
        'n': n,
        'side': side,
        'score': score,
        'efficiency': efficiency,
        'tree_area': total_tree_area,
        'box_area': box_area
    })

results_df = pd.DataFrame(results)
print(f"Total score: {results_df['score'].sum():.6f}")
print(f"Target: 68.919154")
print(f"Gap: {results_df['score'].sum() - 68.919154:.6f}")
print(f"\nTop 10 contributors to score:")
print(results_df.nlargest(10, 'score')[['n', 'side', 'score', 'efficiency']])

In [None]:
# Analyze efficiency distribution
print("\nEfficiency statistics:")
print(results_df['efficiency'].describe())

# Find N values with lowest efficiency (most room for improvement)
print("\nLowest efficiency N values (most room for improvement):")
print(results_df.nsmallest(10, 'efficiency')[['n', 'side', 'score', 'efficiency']])

# Calculate theoretical best if all N had best efficiency
best_efficiency = results_df['efficiency'].max()
print(f"\nBest efficiency: {best_efficiency:.4f}")

# Theoretical minimum score if all N had best efficiency
theoretical_min = 0
for _, row in results_df.iterrows():
    # If we could pack at best efficiency, what would the side be?
    theoretical_side = np.sqrt(row['tree_area'] / best_efficiency)
    theoretical_score = theoretical_side**2 / row['n']
    theoretical_min += theoretical_score

print(f"Theoretical minimum score (if all N had best efficiency): {theoretical_min:.6f}")
print(f"Current score: {results_df['score'].sum():.6f}")
print(f"Potential improvement: {results_df['score'].sum() - theoretical_min:.6f}")

In [None]:
# Plot efficiency vs N
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.scatter(results_df['n'], results_df['efficiency'], alpha=0.5)
plt.xlabel('N')
plt.ylabel('Packing Efficiency')
plt.title('Packing Efficiency vs N')
plt.axhline(y=best_efficiency, color='r', linestyle='--', label=f'Best: {best_efficiency:.4f}')
plt.legend()

plt.subplot(1, 2, 2)
plt.scatter(results_df['n'], results_df['score'], alpha=0.5)
plt.xlabel('N')
plt.ylabel('Score Contribution')
plt.title('Score Contribution vs N')

plt.tight_layout()
plt.savefig('/home/code/exploration/efficiency_analysis.png', dpi=100)
plt.show()
print("Saved efficiency_analysis.png")

In [None]:
# Calculate how much improvement is needed per N to reach target
target = 68.919154
current_total = results_df['score'].sum()
gap = current_total - target

print(f"Current total: {current_total:.6f}")
print(f"Target: {target:.6f}")
print(f"Gap: {gap:.6f}")
print(f"\nIf we improved uniformly across all N:")
print(f"  Need to reduce each N's score by: {gap/200:.6f} on average")
print(f"  That's {gap/200/results_df['score'].mean()*100:.2f}% reduction per N")

# Find which N values have the most room for improvement
results_df['improvement_potential'] = results_df['score'] * (1 - results_df['efficiency']/best_efficiency)
print(f"\nN values with most improvement potential:")
print(results_df.nlargest(10, 'improvement_potential')[['n', 'score', 'efficiency', 'improvement_potential']])

In [None]:
# Check what the leaderboard scores look like
# The target is 68.919154 which is the top score
# Let's understand what efficiency would be needed

print("Analysis of what's needed to reach target:")
print("="*50)

# If we could improve efficiency uniformly
required_efficiency = best_efficiency * (current_total / target)
print(f"\nIf uniform efficiency improvement:")
print(f"  Current best efficiency: {best_efficiency:.4f}")
print(f"  Required efficiency: {required_efficiency:.4f}")
print(f"  That's {(required_efficiency/best_efficiency - 1)*100:.2f}% higher than current best")

# This is impossible - efficiency can't exceed 1.0 for non-overlapping polygons
# The tree area is fixed, so the only way to improve is to pack more tightly

# Calculate the maximum theoretical efficiency for the tree shape
tree_poly = Polygon(TREE_VERTICES)
tree_area = tree_poly.area
tree_bounds = tree_poly.bounds
tree_bbox_area = (tree_bounds[2] - tree_bounds[0]) * (tree_bounds[3] - tree_bounds[1])
max_single_tree_efficiency = tree_area / tree_bbox_area

print(f"\nTree geometry analysis:")
print(f"  Tree area: {tree_area:.6f}")
print(f"  Tree bounding box area: {tree_bbox_area:.6f}")
print(f"  Single tree efficiency: {max_single_tree_efficiency:.4f}")
print(f"  This is the theoretical max for N=1")

In [None]:
# The key insight: the target score of 68.919154 requires BETTER packing
# than what we currently have. Let's see what the top competitors might be doing.

# Check if there are any patterns in the current solution
print("Analyzing current solution patterns:")
print("="*50)

# Check rotation angles
print(f"\nRotation angle statistics:")
print(df['deg_val'].describe())

# Check if there are common rotation angles
print(f"\nMost common rotation angles (rounded to 5 degrees):")
df['deg_rounded'] = (df['deg_val'] / 5).round() * 5
print(df['deg_rounded'].value_counts().head(10))

# Check position distribution
print(f"\nPosition statistics:")
print(f"X range: [{df['x_val'].min():.4f}, {df['x_val'].max():.4f}]")
print(f"Y range: [{df['y_val'].min():.4f}, {df['y_val'].max():.4f}]")