# Loop 5 Strategic Analysis

## Key Questions:
1. What is the gap between our best score and the target?
2. What techniques have NOT been tried?
3. What do top teams know that we don't?

In [None]:
import pandas as pd
import numpy as np
from numba import njit
import math

# Current status
best_cv = 70.659437
target = 68.919154
gap = best_cv - target
gap_pct = (gap / target) * 100

print("="*60)
print("CURRENT STATUS")
print("="*60)
print(f"Best CV Score: {best_cv:.6f}")
print(f"Target Score:  {target:.6f}")
print(f"Gap:           {gap:.6f} ({gap_pct:.2f}%)")
print()
print("This gap of 1.74 points is HUGE for this competition.")
print("The target is the #1 leaderboard score (team 'shr').")

In [None]:
# Analyze what the gap means in terms of per-N improvements needed
# If we need to improve by 1.74 points total across 200 N values,
# that's an average of 0.0087 points per N value.

# But small N values contribute more to the score.
# Let's calculate the theoretical improvement needed per N.

@njit
def make_polygon_template():
    tw=0.15; th=0.2; bw=0.7; mw=0.4; ow=0.25
    tip=0.8; t1=0.5; t2=0.25; base=0.0; tbot=-th
    x=np.array([0,ow/2,ow/4,mw/2,mw/4,bw/2,tw/2,tw/2,-tw/2,-tw/2,-bw/2,-mw/4,-mw/2,-ow/4,-ow/2],np.float64)
    y=np.array([tip,t1,t1,t2,t2,base,base,tbot,tbot,base,base,t2,t2,t1,t1],np.float64)
    return x,y

@njit
def score_group(xs,ys,degs,tx,ty):
    n=xs.size; V=tx.size
    mnx=1e300; mny=1e300; mxx=-1e300; mxy=-1e300
    for i in range(n):
        r=degs[i]*math.pi/180.0
        c=math.cos(r); s=math.sin(r)
        xi=xs[i]; yi=ys[i]
        for j in range(V):
            X=c*tx[j]-s*ty[j]+xi
            Y=s*tx[j]+c*ty[j]+yi
            if X<mnx: mnx=X
            if X>mxx: mxx=X
            if Y<mny: mny=Y
            if Y>mxy: mxy=Y
    side=max(mxx-mnx,mxy-mny)
    return side*side/ n

def strip(a):
    return np.array([float(str(v).replace("s","")) for v in a],np.float64)

tx, ty = make_polygon_template()

# Load current best
df = pd.read_csv('/home/submission/submission.csv')
df["N"] = df["id"].astype(str).str.split("_").str[0].astype(int)

scores = []
for n, g in df.groupby("N"):
    xs = strip(g["x"].to_numpy())
    ys = strip(g["y"].to_numpy())
    ds = strip(g["deg"].to_numpy())
    score = score_group(xs, ys, ds, tx, ty)
    scores.append({'n': n, 'score': score})

scores_df = pd.DataFrame(scores)
print("Per-N scores (top 20 by score contribution):")
print(scores_df.sort_values('score', ascending=False).head(20))

In [None]:
# Calculate what improvement is needed per N to reach target
# Assuming uniform improvement across all N values

total_current = scores_df['score'].sum()
print(f"Total current score: {total_current:.6f}")
print(f"Target score: {target:.6f}")
print(f"Gap: {total_current - target:.6f}")

# If we need to improve by 2.5% uniformly
improvement_factor = target / total_current
print(f"\nImprovement factor needed: {improvement_factor:.6f}")
print(f"This means each side length needs to be reduced by factor: {np.sqrt(improvement_factor):.6f}")
print(f"Or about {(1 - np.sqrt(improvement_factor)) * 100:.2f}% reduction in side length")

In [None]:
# Key insight: The gap is 2.5% in total score, which means ~1.25% reduction in side length.
# This is a VERY small improvement, but it's consistent across ALL N values.

# Let's check what the theoretical lower bound might be.
# For N=1, the optimal is a single tree at 45 degrees with side = 0.813173
# Our current N=1 score is 0.661250, which matches the theoretical optimal.

print("Theoretical analysis:")
print("="*60)

# N=1 theoretical optimal
n1_score = scores_df[scores_df['n'] == 1]['score'].values[0]
print(f"N=1 score: {n1_score:.6f} (theoretical optimal: 0.661250)")

# The gap suggests that for SOME N values, there are better solutions.
# But which ones?

# Let's calculate the "efficiency" for each N
# Efficiency = (n * score) / (tree_area * n) = score / tree_area
# Lower efficiency means more room for improvement

tree_area = 0.5  # Approximate area of a tree
scores_df['efficiency'] = scores_df['score'] / tree_area
scores_df['side'] = np.sqrt(scores_df['score'] * scores_df['n'])

print("\nN values with lowest efficiency (most room for improvement):")
print(scores_df.sort_values('efficiency').head(20))

In [None]:
# CRITICAL INSIGHT:
# The gap of 1.74 points is 2.5% of the total score.
# This is NOT achievable through incremental optimization.
# The top teams must have found fundamentally different solutions for SOME N values.

# Let's calculate what the target score implies for each N:
# If the improvement is uniform, each N needs to improve by 2.5%.
# But if the improvement is concentrated in specific N values, we need to find those.

# Hypothesis: The top teams have found better solutions for specific N values
# that are NOT in the public datasets.

print("STRATEGIC ANALYSIS:")
print("="*60)
print()
print("1. Our current score (70.659437) is the BEST among all public datasets.")
print("2. The target (68.919154) is 1.74 points better.")
print("3. This gap is 2.5% - too large for incremental optimization.")
print()
print("POSSIBLE EXPLANATIONS:")
print("- Top teams have proprietary solutions not shared publicly")
print("- Top teams use fundamentally different algorithms (MILP, constraint programming)")
print("- Top teams have found optimal solutions for specific N values")
print()
print("WHAT WE'VE TRIED (ALL FAILED):")
print("- SA optimization (multiple variants)")
print("- Rebuild from corners")
print("- bbox3 C++ optimizer")
print("- Random restarts (1900 runs)")
print("- Ensemble from 39+ CSV sources")
print()
print("WHAT WE HAVEN'T TRIED:")
print("- MILP/constraint programming for small N")
print("- Fractional translation (from jonathanchan kernel)")
print("- Asymmetric configurations for larger N")
print("- Lattice-based construction with different patterns")