# Baseline Verification

Verify the pre-optimized baseline submission score.

In [3]:
import numpy as np
import pandas as pd
import os

# Change to experiment directory
os.chdir('/home/code/experiments/001_baseline')

# Tree vertices
TX = np.array([0,0.125,0.0625,0.2,0.1,0.35,0.075,0.075,-0.075,-0.075,-0.35,-0.1,-0.2,-0.0625,-0.125])
TY = np.array([0.8,0.5,0.5,0.25,0.25,0,0,-0.2,-0.2,0,0,0.25,0.25,0.5,0.5])

def score_group(xs, ys, degs):
    """Calculate score for a single N-tree configuration"""
    n = len(xs)
    all_x, all_y = [], []
    for i in range(n):
        rad = np.radians(degs[i])
        c, s = np.cos(rad), np.sin(rad)
        for j in range(len(TX)):
            x = TX[j] * c - TY[j] * s + xs[i]
            y = TX[j] * s + TY[j] * c + ys[i]
            all_x.append(x)
            all_y.append(y)
    side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
    return side * side / n

def calculate_total_score(df):
    """Calculate total score for a submission"""
    # Parse the 's' prefix from values
    df['x_val'] = df['x'].str[1:].astype(float)
    df['y_val'] = df['y'].str[1:].astype(float)
    df['deg_val'] = df['deg'].str[1:].astype(float)
    
    # Extract N from id (e.g., '003_1' -> 3)
    df['n'] = df['id'].str.split('_').str[0].astype(int)
    
    total_score = 0
    per_n_scores = {}
    
    for n in range(1, 201):
        group = df[df['n'] == n]
        if len(group) == n:
            xs = group['x_val'].values
            ys = group['y_val'].values
            degs = group['deg_val'].values
            score = score_group(xs, ys, degs)
            per_n_scores[n] = score
            total_score += score
        else:
            print(f"Warning: N={n} has {len(group)} trees instead of {n}")
    
    return total_score, per_n_scores

print("Loading baseline submission...")
df = pd.read_csv('baseline.csv')
print(f"Shape: {df.shape}")
print(df.head())

Loading baseline submission...
Shape: (20100, 4)
      id                       x                        y  \
0  001_0  s40.752900903586450454  s-32.002948532171380691   
1  002_0   s0.202513410337269301   s-0.028957664041420434   
2  002_1  s-0.105680728905459279   s-0.551876178651849569   
3  003_0   s1.127378112162989332    s0.792211449857787242   
4  003_1   s1.234055695842160016    s1.275999500663759001   

                       deg  
0   s45.000000000000035527  
1  s203.629377730656727863  
2   s23.629377730656813128  
3  s112.222533627590607352  
4   s66.370622269343002131  


In [4]:
# Calculate total score
total_score, per_n_scores = calculate_total_score(df)
print(f"\nTotal Score: {total_score:.6f}")
print(f"\nScore breakdown by N range:")
print(f"  N=1-50:   {sum(per_n_scores[n] for n in range(1, 51)):.4f}")
print(f"  N=51-100: {sum(per_n_scores[n] for n in range(51, 101)):.4f}")
print(f"  N=101-150: {sum(per_n_scores[n] for n in range(101, 151)):.4f}")
print(f"  N=151-200: {sum(per_n_scores[n] for n in range(151, 201)):.4f}")


Total Score: 70.625918

Score breakdown by N range:
  N=1-50:   19.0333
  N=51-100: 17.6134
  N=101-150: 17.1363
  N=151-200: 16.8430


In [5]:
# Show worst N values (highest per-N scores)
print("\nTop 20 worst N values (highest per-N scores):")
sorted_scores = sorted(per_n_scores.items(), key=lambda x: x[1], reverse=True)
for n, score in sorted_scores[:20]:
    print(f"  N={n}: {score:.6f}")


Top 20 worst N values (highest per-N scores):
  N=1: 0.661250
  N=2: 0.450779
  N=3: 0.434745
  N=5: 0.416850
  N=4: 0.416545
  N=7: 0.399897
  N=6: 0.399610
  N=9: 0.387415
  N=8: 0.385407
  N=15: 0.376950
  N=10: 0.376630
  N=21: 0.376451
  N=20: 0.376057
  N=22: 0.375258
  N=11: 0.374924
  N=16: 0.374128
  N=26: 0.373997
  N=12: 0.372724
  N=13: 0.372294
  N=25: 0.372144


In [6]:
# Copy to submission folder
import shutil
shutil.copy('baseline.csv', '/home/submission/submission.csv')
print("Copied baseline to /home/submission/submission.csv")

Copied baseline to /home/submission/submission.csv


In [7]:
# Save metrics
import json
metrics = {'cv_score': total_score}
with open('metrics.json', 'w') as f:
    json.dump(metrics, f)
print(f"Saved metrics: {metrics}")

Saved metrics: {'cv_score': 70.62591788240462}
