In [None]:
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate

# Tree template
TREE_TEMPLATE = [
    (0.0, 0.8), (0.125, 0.5), (0.0625, 0.5), (0.2, 0.25), (0.1, 0.25),
    (0.35, 0.0), (0.075, 0.0), (0.075, -0.2), (-0.075, -0.2), (-0.075, 0.0),
    (-0.35, 0.0), (-0.1, 0.25), (-0.2, 0.25), (-0.0625, 0.5), (-0.125, 0.5)
]

def parse_s_value(val):
    """Parse values that may have 's' prefix."""
    if isinstance(val, str):
        if val.startswith('s'):
            return float(val[1:])
        return float(val)
    return float(val)

def create_tree_polygon(x, y, angle):
    """Create a tree polygon at position (x, y) with given rotation angle."""
    tree = Polygon(TREE_TEMPLATE)
    tree = rotate(tree, angle, origin=(0, 0), use_radians=False)
    tree = translate(tree, x, y)
    return tree

def load_submission(path):
    """Load submission CSV and parse values."""
    df = pd.read_csv(path)
    df['x'] = df['x'].apply(parse_s_value)
    df['y'] = df['y'].apply(parse_s_value)
    df['deg'] = df['deg'].apply(parse_s_value)
    return df

def calculate_score(df):
    """Calculate total score for a submission."""
    scores = []
    for n in range(1, 201):
        group = df[df['id'].str.startswith(f'{n}_')]
        if len(group) == 0:
            continue
        
        # Get bounding box
        all_x = []
        all_y = []
        for _, row in group.iterrows():
            tree = create_tree_polygon(row['x'], row['y'], row['deg'])
            minx, miny, maxx, maxy = tree.bounds
            all_x.extend([minx, maxx])
            all_y.extend([miny, maxy])
        
        side = max(max(all_x) - min(all_x), max(all_y) - min(all_y))
        scores.append(side)
    
    return sum(scores)

def check_overlaps(df, n):
    """Check for overlaps in a specific N group."""
    group = df[df['id'].str.startswith(f'{n}_')]
    if len(group) == 0:
        return False
    
    trees = []
    for _, row in group.iterrows():
        tree = create_tree_polygon(row['x'], row['y'], row['deg'])
        trees.append(tree)
    
    for i in range(len(trees)):
        for j in range(i+1, len(trees)):
            if trees[i].overlaps(trees[j]) or trees[i].contains(trees[j]) or trees[j].contains(trees[i]):
                return True
    return False

def get_n_score(df, n):
    """Get score for a specific N."""
    group = df[df['id'].str.startswith(f'{n}_')]
    all_x = []
    all_y = []
    for _, row in group.iterrows():
        tree = create_tree_polygon(row['x'], row['y'], row['deg'])
        minx, miny, maxx, maxy = tree.bounds
        all_x.extend([minx, maxx])
        all_y.extend([miny, maxy])
    return max(max(all_x) - min(all_x), max(all_y) - min(all_y))

print("Functions defined")

In [None]:
# Load the C++ SA output
output_df = load_submission('/home/code/experiments/019_cpp_sa/output.csv')
print(f"Output shape: {output_df.shape}")
print(output_df.head())

In [None]:
# Load our best submission for comparison
best_df = pd.read_csv('/home/code/exploration/datasets/ensemble_best.csv')
print(f"Best shape: {best_df.shape}")
print(best_df.head())

In [None]:
# Calculate scores for both
print("Calculating output score...")
output_score = calculate_score(output_df)
print(f"Output score: {output_score}")

print("\nCalculating best score...")
best_score = calculate_score(best_df)
print(f"Best score: {best_score}")

print(f"\nDifference: {output_score - best_score}")

In [None]:
# Check if N=63 improved
print("Checking N=63 specifically...")

# Get N=63 from both
output_63 = output_df[output_df['id'].str.startswith('63_')]
best_63 = best_df[best_df['id'].str.startswith('63_')]

# Calculate scores for N=63
def get_n_score(df, n):
    group = df[df['id'].str.startswith(f'{n}_')]
    all_x = []
    all_y = []
    for _, row in group.iterrows():
        tree = create_tree_polygon(row['x'], row['y'], row['rotation'])
        minx, miny, maxx, maxy = tree.bounds
        all_x.extend([minx, maxx])
        all_y.extend([miny, maxy])
    return max(max(all_x) - min(all_x), max(all_y) - min(all_y))

output_63_score = get_n_score(output_df, 63)
best_63_score = get_n_score(best_df, 63)

print(f"Output N=63 score: {output_63_score}")
print(f"Best N=63 score: {best_63_score}")
print(f"Improvement: {best_63_score - output_63_score}")

In [None]:
# Check for overlaps in N=63
print("Checking overlaps in N=63...")
has_overlap = check_overlaps(output_df, 63)
print(f"N=63 has overlaps: {has_overlap}")

In [None]:
# Create ensemble: pick best valid solution for each N
print("Creating ensemble from output and best...")

ensemble_rows = []
improvements = []

for n in range(1, 201):
    output_group = output_df[output_df['id'].str.startswith(f'{n}_')]
    best_group = best_df[best_df['id'].str.startswith(f'{n}_')]
    
    # Calculate scores
    output_n_score = get_n_score(output_df, n)
    best_n_score = get_n_score(best_df, n)
    
    # Check overlaps in output
    output_has_overlap = check_overlaps(output_df, n)
    
    if not output_has_overlap and output_n_score < best_n_score:
        # Use output
        ensemble_rows.append(output_group)
        improvements.append((n, best_n_score - output_n_score))
    else:
        # Use best
        ensemble_rows.append(best_group)

ensemble_df = pd.concat(ensemble_rows, ignore_index=True)
print(f"Ensemble shape: {ensemble_df.shape}")
print(f"\nImprovements found: {len(improvements)}")
for n, imp in improvements:
    print(f"  N={n}: improved by {imp:.6f}")

In [None]:
# Calculate final ensemble score
print("\nCalculating ensemble score...")
ensemble_score = calculate_score(ensemble_df)
print(f"Ensemble score: {ensemble_score}")
print(f"Best score: {best_score}")
print(f"Improvement: {best_score - ensemble_score}")

In [None]:
# Save ensemble if improved
if ensemble_score < best_score:
    ensemble_df.to_csv('/home/code/experiments/019_cpp_sa/ensemble_cpp_sa.csv', index=False)
    print("Saved ensemble to ensemble_cpp_sa.csv")
    
    # Also save to submission folder
    ensemble_df.to_csv('/home/submission/submission.csv', index=False)
    print("Saved to /home/submission/submission.csv")
else:
    print("No improvement, keeping best submission")
    best_df.to_csv('/home/submission/submission.csv', index=False)
    print("Saved best to /home/submission/submission.csv")

In [None]:
# Save metrics
import json
metrics = {
    'cv_score': ensemble_score if ensemble_score < best_score else best_score,
    'output_score': output_score,
    'best_score': best_score,
    'improvements': len(improvements)
}
with open('/home/code/experiments/019_cpp_sa/metrics.json', 'w') as f:
    json.dump(metrics, f)
print(f"Metrics saved: {metrics}")