# Experiment 004: Lattice Construction + SA Optimization

The evaluator correctly identified that exp_003 made an apples-to-oranges comparison:
- RAW lattice output (88.33) vs OPTIMIZED baseline (70.66)

The correct workflow is:
1. Generate lattice configurations (starting point)
2. Apply C++ SA optimizer to lattice output
3. Compare OPTIMIZED lattice to baseline
4. Create ensemble picking best per-N

In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
import os
import subprocess
import time
import shutil
from tqdm import tqdm

# Tree geometry
TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]

def parse_value(s):
    if isinstance(s, str) and s.startswith('s'):
        return float(s[1:])
    return float(s)

def create_tree_polygon(x, y, deg):
    angle_rad = np.radians(deg)
    cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    vertices = [(tx * cos_a - ty * sin_a + x, tx * sin_a + ty * cos_a + y) for tx, ty in zip(TX, TY)]
    return Polygon(vertices)

def compute_bounding_side(polygons):
    if not polygons:
        return 0
    all_points = []
    for poly in polygons:
        all_points.extend(list(poly.exterior.coords))
    all_points = np.array(all_points)
    return max(all_points.max(axis=0) - all_points.min(axis=0))

def compute_score_for_n(df, n):
    prefix = f"{n:03d}_"
    trees = df[df['id'].str.startswith(prefix)]
    if len(trees) != n:
        return float('inf')
    polygons = [create_tree_polygon(parse_value(row['x']), parse_value(row['y']), parse_value(row['deg'])) for _, row in trees.iterrows()]
    side = compute_bounding_side(polygons)
    return side**2 / n

def compute_total_score(df):
    return sum(compute_score_for_n(df, n) for n in range(1, 201))

print("Functions defined")

In [None]:
# Step 1: Generate Zaburo-style lattice configurations for all N

def find_best_lattice_trees(n):
    """Zaburo-style lattice construction with alternating rows."""
    best_score, best_trees = float("inf"), None
    
    for n_even in range(1, n + 1):
        for n_odd in [n_even, n_even - 1]:
            if n_odd < 0:
                continue
            all_trees = []
            rest = n
            r = 0
            while rest > 0:
                m = min(rest, n_even if r % 2 == 0 else n_odd)
                if m <= 0:
                    break
                rest -= m
                
                angle = 0 if r % 2 == 0 else 180
                x_offset = 0 if r % 2 == 0 else 0.35
                y = r // 2 * 1.0 if r % 2 == 0 else (0.8 + (r - 1) // 2 * 1.0)
                
                for i in range(m):
                    all_trees.append({'x': 0.7 * i + x_offset, 'y': y, 'angle': angle})
                
                r += 1
            
            if len(all_trees) != n:
                continue
                
            # Compute bounding box
            polygons = [create_tree_polygon(t['x'], t['y'], t['angle']) for t in all_trees]
            side = compute_bounding_side(polygons)
            score = side ** 2
            
            if score < best_score:
                best_score = score
                best_trees = all_trees
    
    return best_score, best_trees

print("Generating lattice configurations for N=1 to 200...")
lattice_rows = []
for n in tqdm(range(1, 201)):
    score, trees = find_best_lattice_trees(n)
    for i, tree in enumerate(trees):
        lattice_rows.append({
            'id': f"{n:03d}_{i}",
            'x': f"s{tree['x']}",
            'y': f"s{tree['y']}",
            'deg': f"s{tree['angle']}"
        })

lattice_df = pd.DataFrame(lattice_rows)
lattice_df.to_csv('/home/code/experiments/004_lattice_plus_sa/lattice_initial.csv', index=False)
print(f"Saved lattice initial configuration with {len(lattice_df)} rows")

# Verify initial score
initial_score = compute_total_score(lattice_df)
print(f"Lattice initial score: {initial_score:.6f}")

In [None]:
# Step 2: Apply C++ SA optimizer to lattice output

# Check if the C++ optimizer exists
optimizer_path = '/home/code/experiments/002_cpp_optimizer/sa_v1_parallel'
if os.path.exists(optimizer_path):
    print(f"C++ optimizer found at {optimizer_path}")
else:
    print("C++ optimizer not found!")

# Copy lattice to working directory
shutil.copy('/home/code/experiments/004_lattice_plus_sa/lattice_initial.csv', 
            '/home/code/experiments/004_lattice_plus_sa/submission_best.csv')

# Create solutions directory
os.makedirs('/home/code/experiments/004_lattice_plus_sa/solutions', exist_ok=True)

print("\nRunning C++ SA optimizer on lattice output...")
print("This may take several minutes...")

start_time = time.time()
result = subprocess.run(
    [optimizer_path, '-i', '/home/code/experiments/004_lattice_plus_sa/submission_best.csv', 
     '-n', '10000', '-r', '5'],
    cwd='/home/code/experiments/004_lattice_plus_sa',
    capture_output=True,
    text=True,
    timeout=900  # 15 minute timeout
)
end_time = time.time()

print(f"\nExecution time: {end_time - start_time:.1f}s")
print("STDOUT:")
print(result.stdout[:3000] if result.stdout else "(empty)")
print("\nSTDERR:")
print(result.stderr[:500] if result.stderr else "(empty)")

In [None]:
# Step 3: Load and score the optimized lattice output

# Check for output files
output_files = [f for f in os.listdir('/home/code/experiments/004_lattice_plus_sa') if f.endswith('.csv')]
print(f"CSV files in directory: {output_files}")

# Also check solutions directory
if os.path.exists('/home/code/experiments/004_lattice_plus_sa/solutions'):
    solution_files = os.listdir('/home/code/experiments/004_lattice_plus_sa/solutions')
    print(f"Files in solutions directory: {solution_files}")

# Load the optimized output (submission.csv is the default output)
if os.path.exists('/home/code/experiments/004_lattice_plus_sa/submission.csv'):
    df_optimized = pd.read_csv('/home/code/experiments/004_lattice_plus_sa/submission.csv')
    optimized_score = compute_total_score(df_optimized)
    print(f"\nOptimized lattice score: {optimized_score:.6f}")
    print(f"Initial lattice score: {initial_score:.6f}")
    print(f"Improvement from SA: {initial_score - optimized_score:.6f}")
else:
    print("No submission.csv found - optimizer may not have produced output")
    optimized_score = initial_score

In [None]:
# Step 4: Compare optimized lattice to baseline and create ensemble

# Load baseline
df_baseline = pd.read_csv('/home/code/external_data/saspav/santa-2025.csv')
baseline_score = compute_total_score(df_baseline)
print(f"Baseline score: {baseline_score:.6f}")
print(f"Optimized lattice score: {optimized_score:.6f}")
print(f"Difference: {optimized_score - baseline_score:.6f}")

# Compare per-N
print("\nPer-N comparison (optimized lattice vs baseline):")
print("N\tLattice\t\tBaseline\tDiff\t\tBetter")
print("-" * 70)

improved_count = 0
improved_ns = []
for n in range(1, 201):
    lattice_score_n = compute_score_for_n(df_optimized, n)
    baseline_score_n = compute_score_for_n(df_baseline, n)
    diff = lattice_score_n - baseline_score_n
    better = "LATTICE" if diff < -1e-9 else ("BASELINE" if diff > 1e-9 else "SAME")
    if diff < -1e-9:
        improved_count += 1
        improved_ns.append(n)
    if n <= 10 or n >= 195 or diff < -1e-6:
        print(f"{n}\t{lattice_score_n:.6f}\t{baseline_score_n:.6f}\t{diff:.6f}\t{better}")

print(f"\nOptimized lattice is better for {improved_count} out of 200 N values")
if improved_ns:
    print(f"Improved N values: {improved_ns[:20]}..." if len(improved_ns) > 20 else f"Improved N values: {improved_ns}")

In [None]:
# Step 5: Create ensemble picking best per-N from baseline and optimized lattice

print("Creating ensemble (best per-N from baseline and optimized lattice)...")

ensemble_rows = []
ensemble_total = 0
lattice_wins = 0
baseline_wins = 0

for n in range(1, 201):
    lattice_score_n = compute_score_for_n(df_optimized, n)
    baseline_score_n = compute_score_for_n(df_baseline, n)
    
    if lattice_score_n < baseline_score_n - 1e-9:
        # Use optimized lattice solution
        prefix = f"{n:03d}_"
        lattice_trees = df_optimized[df_optimized['id'].str.startswith(prefix)]
        for _, row in lattice_trees.iterrows():
            ensemble_rows.append(row.to_dict())
        ensemble_total += lattice_score_n
        lattice_wins += 1
    else:
        # Use baseline solution
        prefix = f"{n:03d}_"
        baseline_trees = df_baseline[df_baseline['id'].str.startswith(prefix)]
        for _, row in baseline_trees.iterrows():
            ensemble_rows.append(row.to_dict())
        ensemble_total += baseline_score_n
        baseline_wins += 1

print(f"\nEnsemble composition:")
print(f"  Lattice wins: {lattice_wins}")
print(f"  Baseline wins: {baseline_wins}")
print(f"\nEnsemble total score: {ensemble_total:.6f}")
print(f"Baseline total score: {baseline_score:.6f}")
print(f"Improvement: {baseline_score - ensemble_total:.6f}")

In [None]:
# Save ensemble submission
ensemble_df = pd.DataFrame(ensemble_rows)
ensemble_df.to_csv('/home/submission/submission.csv', index=False)
print(f"Saved ensemble with {len(ensemble_df)} rows")

# Verify
df_verify = pd.read_csv('/home/submission/submission.csv')
verify_score = compute_total_score(df_verify)
print(f"Verified ensemble score: {verify_score:.6f}")

In [None]:
# Summary
print("="*60)
print("EXPERIMENT 004 SUMMARY: Lattice + SA Optimization")
print("="*60)
print(f"\nWorkflow:")
print(f"1. Lattice initial score: {initial_score:.6f}")
print(f"2. After SA optimization: {optimized_score:.6f}")
print(f"3. SA improvement: {initial_score - optimized_score:.6f}")
print(f"\nComparison:")
print(f"Baseline score: {baseline_score:.6f}")
print(f"Optimized lattice: {optimized_score:.6f}")
print(f"Ensemble score: {verify_score:.6f}")
print(f"\nImprovement over baseline: {baseline_score - verify_score:.6f}")
print("="*60)

In [None]:
# Model wrapper for submission
class LatticePlusSA:
    def __init__(self, data='single'):
        self.data = data
        
    def load_best(self):
        return pd.read_csv('/home/submission/submission.csv')
    
    def save_submission(self, path):
        df = self.load_best()
        df.to_csv(path, index=False)
        return df

model = LatticePlusSA(data='single')
print("Model wrapper defined")