# Experiment 008: BackPacking Approach

From kernel: crodoc/74-75-backpacking-christmas-trees

**Concept:** Start from N=200 and iterate backward. For each N, if current solution is worse than best seen so far, take first N trees from the best larger configuration.

**Why it might work:** Good packing at N=200 often remains efficient when trees are removed. This can find hidden good solutions without introducing overlaps.

In [1]:
import pandas as pd
import numpy as np
import math
from numba import njit
import os
import json

# Tree geometry
TX = np.array([0, 0.125, 0.0625, 0.2, 0.1, 0.35, 0.075, 0.075, -0.075, -0.075, -0.35, -0.1, -0.2, -0.0625, -0.125])
TY = np.array([0.8, 0.5, 0.5, 0.25, 0.25, 0, 0, -0.2, -0.2, 0, 0, 0.25, 0.25, 0.5, 0.5])

@njit
def score_group(xs, ys, degs, tx, ty):
    n = xs.size
    V = tx.size
    mnx = mny = 1e300
    mxx = mxy = -1e300
    for i in range(n):
        r = degs[i] * math.pi / 180.0
        c, s = math.cos(r), math.sin(r)
        for j in range(V):
            X = c * tx[j] - s * ty[j] + xs[i]
            Y = s * tx[j] + c * ty[j] + ys[i]
            mnx, mxx = min(mnx, X), max(mxx, X)
            mny, mxy = min(mny, Y), max(mxy, Y)
    side = max(mxx - mnx, mxy - mny)
    return side * side / n

def get_group_data(df, n):
    """Get x, y, deg arrays for N-tree group"""
    mask = df['id'].str.startswith(f'{n:03d}_')
    group = df[mask].sort_values('id')
    xs = group['x'].str[1:].astype(float).values
    ys = group['y'].str[1:].astype(float).values
    degs = group['deg'].str[1:].astype(float).values
    return xs, ys, degs, group

def calculate_score_for_n(xs, ys, degs):
    """Calculate score for given coordinates"""
    return score_group(xs, ys, degs, TX, TY)

print("Functions defined")

Functions defined


In [2]:
# Load validated submission
os.chdir('/home/code/experiments/008_backpacking')
df = pd.read_csv('/home/code/submission_candidates/candidate_003.csv')

# Calculate initial scores for all N
initial_scores = {}
for n in range(1, 201):
    xs, ys, degs, _ = get_group_data(df, n)
    initial_scores[n] = calculate_score_for_n(xs, ys, degs)

initial_total = sum(initial_scores.values())
print(f"Initial total score: {initial_total:.6f}")
print(f"Expected: 70.622435")

Initial total score: 70.622435
Expected: 70.622435


In [3]:
# BackPacking approach:
# For each N from 200 down to 1:
#   Check if taking first N trees from any larger M gives better score
#   If yes, use that configuration

print("Implementing BackPacking...")
print("="*60)

# Store best configurations for each N
best_configs = {}  # n -> (xs, ys, degs, score, source)

# Initialize with current configurations
for n in range(1, 201):
    xs, ys, degs, group = get_group_data(df, n)
    best_configs[n] = {
        'xs': xs.copy(),
        'ys': ys.copy(), 
        'degs': degs.copy(),
        'score': initial_scores[n],
        'source': f'original_{n}'
    }

# BackPacking: iterate from N=200 down to N=2
improvements = []

for n in range(199, 0, -1):  # 199 down to 1
    current_score = best_configs[n]['score']
    best_score = current_score
    best_source = best_configs[n]['source']
    best_xs = best_configs[n]['xs']
    best_ys = best_configs[n]['ys']
    best_degs = best_configs[n]['degs']
    
    # Check all larger configurations (n+1 to 200)
    for m in range(n+1, 201):
        # Take first n trees from configuration m
        m_xs = best_configs[m]['xs'][:n]
        m_ys = best_configs[m]['ys'][:n]
        m_degs = best_configs[m]['degs'][:n]
        
        if len(m_xs) < n:
            continue
            
        # Calculate score for this subset
        subset_score = calculate_score_for_n(m_xs, m_ys, m_degs)
        
        if subset_score < best_score:
            best_score = subset_score
            best_source = f'from_{m}'
            best_xs = m_xs.copy()
            best_ys = m_ys.copy()
            best_degs = m_degs.copy()
    
    # Update if improved
    if best_score < current_score:
        improvement = current_score - best_score
        improvements.append({'n': n, 'improvement': improvement, 'source': best_source})
        best_configs[n] = {
            'xs': best_xs,
            'ys': best_ys,
            'degs': best_degs,
            'score': best_score,
            'source': best_source
        }
        if improvement > 0.0001:  # Only print significant improvements
            print(f"N={n:3d}: improved by {improvement:.6f} ({best_source})")

print(f"\nTotal improvements found: {len(improvements)}")

Implementing BackPacking...



Total improvements found: 0


In [4]:
# Calculate new total score
new_scores = {n: best_configs[n]['score'] for n in range(1, 201)}
new_total = sum(new_scores.values())

print(f"\nInitial total: {initial_total:.6f}")
print(f"New total: {new_total:.6f}")
print(f"Total improvement: {initial_total - new_total:.6f}")

# Show top improvements
if improvements:
    print(f"\nTop 10 improvements:")
    sorted_improvements = sorted(improvements, key=lambda x: x['improvement'], reverse=True)
    for imp in sorted_improvements[:10]:
        print(f"  N={imp['n']:3d}: {imp['improvement']:.6f} ({imp['source']})")


Initial total: 70.622435
New total: 70.622435
Total improvement: 0.000000


In [5]:
# Create new submission dataframe
new_rows = []

for n in range(1, 201):
    config = best_configs[n]
    for i in range(n):
        new_rows.append({
            'id': f'{n:03d}_{i}',
            'x': f's{config["xs"][i]}',
            'y': f's{config["ys"][i]}',
            'deg': f's{config["degs"][i]}'
        })

df_new = pd.DataFrame(new_rows)
print(f"New submission has {len(df_new)} rows")
print(df_new.head())

New submission has 20100 rows
      id                     x                      y                  deg
0  001_0  s-48.196086194214246    s58.770984615214225                s45.0
1  002_0   s0.1540970696213643  s-0.03854074269478543  s203.62937773065684
2  002_1  s-0.1540970696213643   s-0.5614592573052146  s23.629377730656792
3  003_0    s1.123655816140301     s0.781101815992563    s111.125132292893
4  003_1     s1.23405569584216     s1.275999500663759     s66.370622269343


In [6]:
# Verify score
def calculate_total_score(df):
    total = 0.0
    for n in range(1, 201):
        xs, ys, degs, _ = get_group_data(df, n)
        total += calculate_score_for_n(xs, ys, degs)
    return total

final_score = calculate_total_score(df_new)
print(f"Final verified score: {final_score:.6f}")
print(f"Improvement: {initial_total - final_score:.6f}")

Final verified score: 70.622435
Improvement: 0.000000


In [7]:
# Save submission
df_new.to_csv('submission.csv', index=False)
print("Saved to submission.csv")

# Copy to submission folder
import shutil
os.makedirs('/home/submission', exist_ok=True)
shutil.copy('submission.csv', '/home/submission/submission.csv')
print("Copied to /home/submission/submission.csv")

Saved to submission.csv
Copied to /home/submission/submission.csv


In [None]:
# Save metrics
metrics = {
    'cv_score': final_score,
    'initial_score': initial_total,
    'improvement': initial_total - final_score,
    'n_improvements': len(improvements),
    'approach': 'BackPacking - take first N trees from larger configurations',
    'source': 'candidate_003.csv (validated LB: 70.622435)'
}

with open('metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print(f"Metrics saved. Final CV Score: {final_score:.6f}")