# Experiment 001: Baseline with Pre-optimized Solution

Goal: Establish baseline score using best pre-optimized solution, apply fix_direction rotation optimization, and validate.

In [1]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely.strtree import STRtree
from shapely import affinity
from scipy.optimize import minimize_scalar
import warnings
warnings.filterwarnings('ignore')

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
TREE_VERTICES = list(zip(TX, TY))

print(f"Tree has {len(TREE_VERTICES)} vertices")
print(f"Tree bounding box: x=[{min(TX)}, {max(TX)}], y=[{min(TY)}, {max(TY)}]")

Tree has 15 vertices
Tree bounding box: x=[-0.35, 0.35], y=[-0.2, 0.8]


In [2]:
def parse_s_value(s):
    """Parse 's' prefixed value to float"""
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def format_s_value(v):
    """Format float to 's' prefixed string"""
    return f's{v}'

def create_tree_polygon(x, y, deg):
    """Create a tree polygon at position (x, y) with rotation deg"""
    poly = Polygon(TREE_VERTICES)
    # Rotate around origin
    poly = affinity.rotate(poly, deg, origin=(0, 0))
    # Translate to position
    poly = affinity.translate(poly, x, y)
    return poly

def get_bounding_box_side(polygons):
    """Get the side length of the bounding square for a list of polygons"""
    if not polygons:
        return 0
    all_coords = []
    for poly in polygons:
        all_coords.extend(list(poly.exterior.coords))
    xs = [c[0] for c in all_coords]
    ys = [c[1] for c in all_coords]
    return max(max(xs) - min(xs), max(ys) - min(ys))

def has_overlap(polygons):
    """Check if any polygons overlap (not just touch)"""
    if len(polygons) < 2:
        return False
    tree_index = STRtree(polygons)
    for i, poly in enumerate(polygons):
        indices = tree_index.query(poly)
        for idx in indices:
            if idx != i:
                if poly.intersects(polygons[idx]) and not poly.touches(polygons[idx]):
                    # Check if intersection has area (not just touching)
                    intersection = poly.intersection(polygons[idx])
                    if intersection.area > 1e-10:
                        return True
    return False

print("Helper functions defined")

Helper functions defined


In [3]:
# Load best pre-optimized solution
baseline_path = '/home/nonroot/snapshots/santa-2025/21116303805/code/preoptimized/best_ensemble.csv'
df = pd.read_csv(baseline_path)
print(f"Loaded {len(df)} rows")
print(df.head())

# Parse values
df['x_val'] = df['x'].apply(parse_s_value)
df['y_val'] = df['y'].apply(parse_s_value)
df['deg_val'] = df['deg'].apply(parse_s_value)

print(f"\nParsed values:")
print(df[['id', 'x_val', 'y_val', 'deg_val']].head())

Loaded 20100 rows
      id                       x                       y  \
0  001_0    s-48.196086194214246     s58.770984615214225   
1  002_0   s0.154097069621355887  s-0.038540742694794648   
2  002_1  s-0.154097069621372845  s-0.561459257305224058   
3  003_0      s1.123655816140301      s0.781101815992563   
4  003_1       s1.23405569584216      s1.275999500663759   

                       deg  
0                    s45.0  
1  s203.629377730656841550  
2   s23.629377730656791812  
3        s111.125132292893  
4         s66.370622269343  

Parsed values:
      id      x_val      y_val     deg_val
0  001_0 -48.196086  58.770985   45.000000
1  002_0   0.154097  -0.038541  203.629378
2  002_1  -0.154097  -0.561459   23.629378
3  003_0   1.123656   0.781102  111.125132
4  003_1   1.234056   1.276000   66.370622


In [None]:
def calculate_score(df):
    """Calculate total score for submission"""
    total_score = 0
    scores_by_n = {}
    
    for n in range(1, 201):
        prefix = f'{n:03d}_'
        group = df[df['id'].str.startswith(prefix)]
        
        if len(group) == 0:
            print(f"Warning: No trees for N={n}")
            continue
        
        polygons = []
        for _, row in group.iterrows():
            poly = create_tree_polygon(row['x_val'], row['y_val'], row['deg_val'])
            polygons.append(poly)
        
        side = get_bounding_box_side(polygons)
        score_n = side**2 / n
        scores_by_n[n] = {'side': side, 'score': score_n}
        total_score += score_n
    
    return total_score, scores_by_n

# Calculate baseline score
print("Calculating baseline score...")
baseline_score, scores_by_n = calculate_score(df)
print(f"\nBaseline score: {baseline_score:.6f}")
print(f"Target score: 68.919154")
print(f"Gap: {baseline_score - 68.919154:.6f}")

In [None]:
# Analyze score contribution by N
print("Top 20 N values by score contribution:")
sorted_scores = sorted(scores_by_n.items(), key=lambda x: x[1]['score'], reverse=True)
for n, data in sorted_scores[:20]:
    print(f"N={n:3d}: side={data['side']:.4f}, score={data['score']:.6f}")

In [None]:
# Validate no overlaps for a sample of N values
print("Validating overlaps for sample N values...")
test_ns = [1, 10, 50, 100, 150, 200]

for n in test_ns:
    prefix = f'{n:03d}_'
    group = df[df['id'].str.startswith(prefix)]
    
    polygons = []
    for _, row in group.iterrows():
        poly = create_tree_polygon(row['x_val'], row['y_val'], row['deg_val'])
        polygons.append(poly)
    
    overlap = has_overlap(polygons)
    print(f"N={n:3d}: {len(polygons)} trees, overlap={overlap}")

In [None]:
# Fix direction optimization - rotate entire configuration to minimize bounding box
def fix_direction_for_n(df, n):
    """Optimize rotation angle for configuration N"""
    prefix = f'{n:03d}_'
    group = df[df['id'].str.startswith(prefix)].copy()
    
    if len(group) == 0:
        return None, None
    
    # Get original polygons
    original_polygons = []
    for _, row in group.iterrows():
        poly = create_tree_polygon(row['x_val'], row['y_val'], row['deg_val'])
        original_polygons.append(poly)
    
    original_side = get_bounding_box_side(original_polygons)
    
    def objective(angle):
        """Calculate bounding box side after rotating all trees by angle"""
        rotated_polygons = []
        for poly in original_polygons:
            # Rotate around centroid of all polygons
            centroid_x = np.mean([p.centroid.x for p in original_polygons])
            centroid_y = np.mean([p.centroid.y for p in original_polygons])
            rotated = affinity.rotate(poly, angle, origin=(centroid_x, centroid_y))
            rotated_polygons.append(rotated)
        return get_bounding_box_side(rotated_polygons)
    
    # Search for best angle
    result = minimize_scalar(objective, bounds=(0.001, 89.999), method='bounded')
    best_angle = result.x
    new_side = result.fun
    
    if new_side < original_side:
        return best_angle, new_side - original_side
    return 0, 0

# Test fix_direction on a few N values
print("Testing fix_direction optimization...")
for n in [50, 100, 150, 200]:
    angle, improvement = fix_direction_for_n(df, n)
    if improvement:
        print(f"N={n}: best_angle={angle:.4f}, improvement={improvement:.6f}")
    else:
        print(f"N={n}: no improvement")

In [None]:
# Apply fix_direction to all N values and create improved submission
def apply_fix_direction(df, n, angle):
    """Apply rotation to all trees in configuration N"""
    prefix = f'{n:03d}_'
    mask = df['id'].str.startswith(prefix)
    group = df[mask].copy()
    
    if len(group) == 0 or angle == 0:
        return df
    
    # Get centroid
    polygons = []
    for _, row in group.iterrows():
        poly = create_tree_polygon(row['x_val'], row['y_val'], row['deg_val'])
        polygons.append(poly)
    
    centroid_x = np.mean([p.centroid.x for p in polygons])
    centroid_y = np.mean([p.centroid.y for p in polygons])
    
    # Rotate each tree position and adjust angle
    new_rows = []
    for idx, row in group.iterrows():
        x, y, deg = row['x_val'], row['y_val'], row['deg_val']
        
        # Rotate position around centroid
        cos_a = np.cos(np.radians(angle))
        sin_a = np.sin(np.radians(angle))
        new_x = centroid_x + (x - centroid_x) * cos_a - (y - centroid_y) * sin_a
        new_y = centroid_y + (x - centroid_x) * sin_a + (y - centroid_y) * cos_a
        new_deg = deg + angle
        
        df.loc[idx, 'x_val'] = new_x
        df.loc[idx, 'y_val'] = new_y
        df.loc[idx, 'deg_val'] = new_deg
    
    return df

print("Applying fix_direction to all N values...")
improved_df = df.copy()
total_improvement = 0

for n in range(1, 201):
    angle, improvement = fix_direction_for_n(improved_df, n)
    if improvement and improvement < -0.0001:  # Only apply if meaningful improvement
        improved_df = apply_fix_direction(improved_df, n, angle)
        total_improvement += improvement
        if n % 50 == 0:
            print(f"N={n}: applied angle={angle:.4f}, cumulative improvement={total_improvement:.6f}")

print(f"\nTotal improvement from fix_direction: {total_improvement:.6f}")

In [None]:
# Calculate improved score
print("Calculating improved score...")
improved_score, improved_scores_by_n = calculate_score(improved_df)
print(f"\nImproved score: {improved_score:.6f}")
print(f"Baseline score: {baseline_score:.6f}")
print(f"Improvement: {baseline_score - improved_score:.6f}")
print(f"Target score: 68.919154")
print(f"Gap to target: {improved_score - 68.919154:.6f}")

In [None]:
# Create submission file
def create_submission(df, output_path):
    """Create submission CSV with proper format"""
    submission = df[['id']].copy()
    submission['x'] = df['x_val'].apply(format_s_value)
    submission['y'] = df['y_val'].apply(format_s_value)
    submission['deg'] = df['deg_val'].apply(format_s_value)
    submission.to_csv(output_path, index=False)
    print(f"Saved submission to {output_path}")
    return submission

# Save improved submission
submission = create_submission(improved_df, '/home/submission/submission.csv')
print(submission.head())

In [None]:
# Final validation - check overlaps on all N
print("Final validation - checking all N for overlaps...")
overlap_count = 0

for n in range(1, 201):
    prefix = f'{n:03d}_'
    group = improved_df[improved_df['id'].str.startswith(prefix)]
    
    polygons = []
    for _, row in group.iterrows():
        poly = create_tree_polygon(row['x_val'], row['y_val'], row['deg_val'])
        polygons.append(poly)
    
    if has_overlap(polygons):
        overlap_count += 1
        print(f"N={n}: OVERLAP DETECTED!")

if overlap_count == 0:
    print("\nNo overlaps detected - submission is valid!")
else:
    print(f"\nWARNING: {overlap_count} configurations have overlaps!")

In [None]:
# Summary
print("="*50)
print("EXPERIMENT 001 SUMMARY")
print("="*50)
print(f"Baseline score: {baseline_score:.6f}")
print(f"Improved score: {improved_score:.6f}")
print(f"Improvement: {baseline_score - improved_score:.6f}")
print(f"Target score: 68.919154")
print(f"Gap to target: {improved_score - 68.919154:.6f}")
print(f"Overlaps: {overlap_count}")
print("="*50)