# Baseline Experiment - Pre-optimized Submission

Verify the score of the pre-optimized santa-2025.csv submission.

In [1]:
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
from shapely.strtree import STRtree
import warnings
warnings.filterwarnings('ignore')

# Tree geometry (15 vertices)
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
TREE_COORDS = list(zip(TX, TY))

def create_tree_polygon(x, y, deg):
    """Create a tree polygon at position (x, y) with rotation deg."""
    poly = Polygon(TREE_COORDS)
    poly = rotate(poly, deg, origin=(0, 0))
    poly = translate(poly, x, y)
    return poly

print("Tree geometry loaded. Tree has 15 vertices.")
print(f"Tree height: {max(TY) - min(TY):.2f}")
print(f"Tree width: {max(TX) - min(TX):.2f}")

Tree geometry loaded. Tree has 15 vertices.
Tree height: 1.00
Tree width: 0.70


In [2]:
# Load the pre-optimized submission
submission_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv'
df = pd.read_csv(submission_path)
print(f"Submission shape: {df.shape}")
print(f"Expected rows: {sum(range(1, 201))} (1+2+...+200)")
print(f"\nFirst few rows:")
print(df.head(10))

Submission shape: (20100, 4)
Expected rows: 20100 (1+2+...+200)

First few rows:
      id                       x                       y  \
0  001_0    s-48.196086194214246     s58.770984615214225   
1  002_0   s0.154097069621355887  s-0.038540742694794648   
2  002_1  s-0.154097069621372845  s-0.561459257305224058   
3  003_0      s1.123655816140301      s0.781101815992563   
4  003_1       s1.23405569584216      s1.275999500663759   
5  003_2      s0.641714640229075      s1.180458566613381   
6  004_0  s-0.324747789589372171   s0.132109978088185392   
7  004_1   s0.315354346242637695   s0.132109978063475492   
8  004_2   s0.324747789592379210  s-0.732109978069475531   
9  004_3  s-0.315354348134818330  s-0.732109978094185987   

                       deg  
0                    s45.0  
1  s203.629377730656841550  
2   s23.629377730656791812  
3        s111.125132292893  
4         s66.370622269343  
5      s155.13405193710082  
6  s156.370622145636389178  
7  s156.370622269264089255

In [3]:
# Parse the submission - values are prefixed with 's'
def parse_value(val):
    """Parse a value that may be prefixed with 's'."""
    if isinstance(val, str) and val.startswith('s'):
        return float(val[1:])
    return float(val)

# Parse all values
df['x_val'] = df['x'].apply(parse_value)
df['y_val'] = df['y'].apply(parse_value)
df['deg_val'] = df['deg'].apply(parse_value)

# Extract N from id (format: NNN_idx)
df['n'] = df['id'].apply(lambda x: int(x.split('_')[0]))
df['tree_idx'] = df['id'].apply(lambda x: int(x.split('_')[1]))

print(f"Parsed values:")
print(df[['id', 'x_val', 'y_val', 'deg_val', 'n', 'tree_idx']].head(10))

Parsed values:
      id      x_val      y_val     deg_val  n  tree_idx
0  001_0 -48.196086  58.770985   45.000000  1         0
1  002_0   0.154097  -0.038541  203.629378  2         0
2  002_1  -0.154097  -0.561459   23.629378  2         1
3  003_0   1.123656   0.781102  111.125132  3         0
4  003_1   1.234056   1.276000   66.370622  3         1
5  003_2   0.641715   1.180459  155.134052  3         2
6  004_0  -0.324748   0.132110  156.370622  4         0
7  004_1   0.315354   0.132110  156.370622  4         1
8  004_2   0.324748  -0.732110  336.370622  4         2
9  004_3  -0.315354  -0.732110  336.370622  4         3


In [4]:
def calculate_score_for_n(group):
    """Calculate the score contribution for a single N value."""
    n = group['n'].iloc[0]
    
    # Create all tree polygons
    polygons = []
    for _, row in group.iterrows():
        poly = create_tree_polygon(row['x_val'], row['y_val'], row['deg_val'])
        polygons.append(poly)
    
    # Get all vertices
    all_coords = []
    for poly in polygons:
        coords = np.array(poly.exterior.coords)
        all_coords.append(coords)
    
    all_coords = np.vstack(all_coords)
    
    # Calculate bounding box side length
    min_x, min_y = all_coords.min(axis=0)
    max_x, max_y = all_coords.max(axis=0)
    side = max(max_x - min_x, max_y - min_y)
    
    # Score contribution for this N
    score_contribution = (side ** 2) / n
    
    return pd.Series({
        'n': n,
        'side': side,
        'score_contribution': score_contribution,
        'num_trees': len(group)
    })

print("Calculating scores for each N...")
scores_by_n = df.groupby('n').apply(calculate_score_for_n).reset_index(drop=True)
print(f"\nFirst 10 N values:")
print(scores_by_n.head(10))

Calculating scores for each N...



First 10 N values:
      n      side  score_contribution  num_trees
0   1.0  0.813173            0.661250        1.0
1   2.0  0.949504            0.450779        2.0
2   3.0  1.142031            0.434745        3.0
3   4.0  1.290806            0.416545        4.0
4   5.0  1.443692            0.416850        5.0
5   6.0  1.548438            0.399610        6.0
6   7.0  1.673104            0.399897        7.0
7   8.0  1.755921            0.385407        8.0
8   9.0  1.867280            0.387415        9.0
9  10.0  1.940696            0.376630       10.0


In [5]:
# Calculate total score
total_score = scores_by_n['score_contribution'].sum()
print(f"\n{'='*50}")
print(f"TOTAL SCORE: {total_score:.6f}")
print(f"{'='*50}")
print(f"\nExpected score: 70.676102")
print(f"Difference: {abs(total_score - 70.676102):.6f}")

# Show worst N values (highest score contribution)
print(f"\nWorst 10 N values (highest score contribution):")
worst_n = scores_by_n.nlargest(10, 'score_contribution')
print(worst_n)


TOTAL SCORE: 70.676102

Expected score: 70.676102
Difference: 0.000000

Worst 10 N values (highest score contribution):
       n      side  score_contribution  num_trees
0    1.0  0.813173            0.661250        1.0
1    2.0  0.949504            0.450779        2.0
2    3.0  1.142031            0.434745        3.0
4    5.0  1.443692            0.416850        5.0
3    4.0  1.290806            0.416545        4.0
6    7.0  1.673104            0.399897        7.0
5    6.0  1.548438            0.399610        6.0
8    9.0  1.867280            0.387415        9.0
7    8.0  1.755921            0.385407        8.0
14  15.0  2.384962            0.379203       15.0


In [6]:
# Validate: check for overlaps in a few configurations
def has_overlap(polygons):
    """Check if any polygons overlap (excluding touching)."""
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx != i and poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                # Check if it's a real overlap (not just numerical precision)
                intersection = poly.intersection(polygons[idx])
                if intersection.area > 1e-10:
                    return True
    return False

# Check a few N values for overlaps
print("Checking for overlaps in sample configurations...")
for n in [1, 10, 50, 100, 200]:
    group = df[df['n'] == n]
    polygons = [create_tree_polygon(row['x_val'], row['y_val'], row['deg_val']) 
                for _, row in group.iterrows()]
    overlap = has_overlap(polygons)
    print(f"N={n}: {'OVERLAP DETECTED!' if overlap else 'No overlaps'}")

print("\nValidation complete.")

Checking for overlaps in sample configurations...
N=1: No overlaps
N=10: No overlaps
N=50: No overlaps
N=100: No overlaps
N=200: No overlaps

Validation complete.


In [7]:
# Save metrics
import json

metrics = {
    'cv_score': total_score,
    'total_n_values': 200,
    'total_rows': len(df),
    'worst_n': int(worst_n.iloc[0]['n']),
    'worst_n_contribution': float(worst_n.iloc[0]['score_contribution'])
}

with open('/home/code/experiments/001_baseline/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"Metrics saved to experiments/001_baseline/metrics.json")
print(json.dumps(metrics, indent=2))

Metrics saved to experiments/001_baseline/metrics.json
{
  "cv_score": 70.67610239809181,
  "total_n_values": 200,
  "total_rows": 20100,
  "worst_n": 1,
  "worst_n_contribution": 0.6612499999999945
}


In [1]:
# Submission cell - this experiment uses pre-optimized CSV\nclass PreoptimizedCSV:\n    \"\"\"Placeholder class for pre-optimized submission.\"\"\"\n    def __init__(self, data='single'):\n        self.data = data\n    \n    def get_submission(self):\n        # Just copy the pre-optimized CSV\n        import shutil\n        shutil.copy('/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/santa-2025.csv', \n                    '/home/submission/submission.csv')\n        return '/home/submission/submission.csv'\n\nmodel = PreoptimizedCSV(data='single')\nmodel.get_submission()\nprint(\"Submission copied to /home/submission/submission.csv\")