# Evolver Loop 1 Analysis

## Current Status
- Best CV: 170.87 (greedy baseline with rotation optimization)
- Target: 68.92
- Gap: 101.95 points (~60% improvement needed)

## Key Question
Can we compile and use the bbox3 C++ optimizer to achieve competitive scores?

In [1]:
import os
import json
import pandas as pd
import numpy as np

# Load session state
with open('/home/code/session_state.json', 'r') as f:
    state = json.load(f)

print("Current experiments:")
for exp in state['experiments']:
    print(f"  {exp['name']}: CV={exp['cv_score']:.2f}")

print(f"\nTarget: 68.92")
print(f"Best CV: {state['experiments'][0]['cv_score']:.2f}")
print(f"Gap: {state['experiments'][0]['cv_score'] - 68.92:.2f}")

Current experiments:
  001_greedy_baseline: CV=170.87

Target: 68.92
Best CV: 170.87
Gap: 101.95


In [2]:
# Check if we can compile C++ code
import subprocess

# Check for g++ compiler
result = subprocess.run(['which', 'g++'], capture_output=True, text=True)
print(f"g++ location: {result.stdout.strip() or 'Not found'}")

# Check OpenMP support
result = subprocess.run(['g++', '--version'], capture_output=True, text=True)
print(f"\ng++ version:\n{result.stdout[:200]}")

# Check if OpenMP is available
test_code = '''
#include <omp.h>
#include <iostream>
int main() {
    #pragma omp parallel
    {
        #pragma omp single
        std::cout << "OpenMP threads: " << omp_get_num_threads() << std::endl;
    }
    return 0;
}
'''

with open('/tmp/test_omp.cpp', 'w') as f:
    f.write(test_code)

result = subprocess.run(['g++', '-fopenmp', '/tmp/test_omp.cpp', '-o', '/tmp/test_omp'], capture_output=True, text=True)
if result.returncode == 0:
    print("\nOpenMP compilation: SUCCESS")
    result = subprocess.run(['/tmp/test_omp'], capture_output=True, text=True)
    print(f"OpenMP test: {result.stdout.strip()}")
else:
    print(f"\nOpenMP compilation: FAILED")
    print(f"Error: {result.stderr}")

g++ location: /usr/bin/g++

g++ version:
g++ (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0
Copyright (C) 2019 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTAB

OpenMP compilation: SUCCESS
OpenMP test: OpenMP threads: 26


In [3]:
# Extract bbox3.cpp from the notebook
import json

with open('/home/code/research/kernels/jazivxt_why-not/why-not.ipynb', 'r') as f:
    notebook = json.load(f)

# Find the cell with bbox3.cpp
bbox3_code = None
for cell in notebook['cells']:
    if cell['cell_type'] == 'code':
        source = ''.join(cell['source']) if isinstance(cell['source'], list) else cell['source']
        if '%%writefile bbox3.cpp' in source:
            # Extract the C++ code (skip the magic command)
            lines = source.split('\n')
            bbox3_code = '\n'.join(lines[1:])  # Skip %%writefile line
            break

if bbox3_code:
    print(f"Found bbox3.cpp code ({len(bbox3_code)} characters)")
    # Save it
    with open('/home/code/bbox3.cpp', 'w') as f:
        f.write(bbox3_code)
    print("Saved to /home/code/bbox3.cpp")
else:
    print("Could not find bbox3.cpp in notebook")

Found bbox3.cpp code (30933 characters)
Saved to /home/code/bbox3.cpp


In [4]:
# Compile bbox3.cpp
import subprocess

result = subprocess.run(
    ['g++', '-O3', '-fopenmp', '-std=c++17', '/home/code/bbox3.cpp', '-o', '/home/code/bbox3'],
    capture_output=True,
    text=True
)

if result.returncode == 0:
    print("Compilation SUCCESS!")
    # Make executable
    subprocess.run(['chmod', '+x', '/home/code/bbox3'])
    print("bbox3 binary created at /home/code/bbox3")
else:
    print(f"Compilation FAILED:")
    print(result.stderr[:2000])

Compilation SUCCESS!
bbox3 binary created at /home/code/bbox3


In [5]:
# Test the bbox3 binary with a quick run
import subprocess
import shutil

# Copy sample submission as starting point
shutil.copy('/home/data/sample_submission.csv', '/home/code/submission.csv')

# Run bbox3 with short timeout
print("Running bbox3 with n=100, r=42 for 30 seconds...")
try:
    result = subprocess.run(
        ['/home/code/bbox3', '-n', '100', '-r', '42'],
        capture_output=True,
        text=True,
        timeout=30,
        cwd='/home/code'
    )
    print("\nSTDOUT:")
    print(result.stdout[-2000:] if len(result.stdout) > 2000 else result.stdout)
    if result.stderr:
        print("\nSTDERR:")
        print(result.stderr[-500:])
except subprocess.TimeoutExpired:
    print("Timeout reached (expected for short test run)")
except Exception as e:
    print(f"Error: {e}")

Running bbox3 with n=100, r=42 for 30 seconds...


Timeout reached (expected for short test run)


In [6]:
# Check the output submission
import pandas as pd

if os.path.exists('/home/code/submission.csv'):
    df = pd.read_csv('/home/code/submission.csv')
    print(f"Submission rows: {len(df)}")
    print(f"\nFirst 10 rows:")
    print(df.head(10))
    
    # Calculate score
    from decimal import Decimal
    from shapely.geometry import Polygon
    from shapely import affinity
    from shapely.ops import unary_union
    
    TX = [0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125]
    TY = [0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5]
    
    def get_tree_polygon(x, y, deg):
        base_poly = Polygon(zip(TX, TY))
        rotated = affinity.rotate(base_poly, deg, origin=(0, 0))
        return affinity.translate(rotated, xoff=x, yoff=y)
    
    def calculate_score(df):
        total = 0
        for n in range(1, 201):
            prefix = f'{n:03d}_'
            group = df[df['id'].str.startswith(prefix)]
            if len(group) == 0:
                continue
            
            polygons = []
            for _, row in group.iterrows():
                x = float(str(row['x']).lstrip('s'))
                y = float(str(row['y']).lstrip('s'))
                deg = float(str(row['deg']).lstrip('s'))
                polygons.append(get_tree_polygon(x, y, deg))
            
            bounds = unary_union(polygons).bounds
            side = max(bounds[2] - bounds[0], bounds[3] - bounds[1])
            total += (side ** 2) / n
        return total
    
    score = calculate_score(df)
    print(f"\nCalculated score: {score:.6f}")
else:
    print("No submission.csv found")

Submission rows: 20100

First 10 rows:
      id           x           y     deg
0  001_0        s0.0        s0.0   s90.0
1  002_0        s0.0        s0.0   s90.0
2  002_1   s0.202736  s-0.511271   s90.0
3  003_0        s0.0        s0.0   s90.0
4  003_1   s0.202736  s-0.511271   s90.0
5  003_2     s0.5206   s0.177413  s180.0
6  004_0        s0.0        s0.0   s90.0
7  004_1   s0.202736  s-0.511271   s90.0
8  004_2     s0.5206   s0.177413  s180.0
9  004_3  s-0.818657  s-0.228694  s180.0



Calculated score: 173.652299
