# Loop 11 Analysis: Understanding the Gap and Finding New Approaches

## Current Status
- Best CV/LB: 70.659944 (crodoc ensemble)
- Target: 68.919154
- Gap: 1.74 points (2.46%)

## Key Finding from Loop 10
The crodoc ensemble loaded 694 CSV files and found only 0.000015 improvement over baseline.
This confirms ALL public pre-optimized solutions converge to the same local optimum.

In [None]:
import pandas as pd
import numpy as np
import os
import glob

# Load the best baseline
baseline_path = '/home/code/external_data/saspav_latest/santa-2025.csv'
df = pd.read_csv(baseline_path)
print(f"Loaded baseline with {len(df)} rows")

In [None]:
# Check if eazy optimizer is compiled and what input it uses
import subprocess

eazy_path = '/home/code/research/kernels/jazivxt_eazy-optimizer/eazy'
if os.path.exists(eazy_path):
    print(f"eazy optimizer exists at {eazy_path}")
    result = subprocess.run(['file', eazy_path], capture_output=True, text=True)
    print(result.stdout)
else:
    print("eazy optimizer not found, need to compile")
    
# Check the source
eazy_cpp = '/home/code/research/kernels/jazivxt_eazy-optimizer/eazy.cpp'
if os.path.exists(eazy_cpp):
    print(f"\neazy.cpp exists, checking input path...")
    with open(eazy_cpp, 'r') as f:
        content = f.read()
        # Find the loadCSV call
        import re
        match = re.search(r'loadCSV\("([^"]+)"\)', content)
        if match:
            print(f"Input path: {match.group(1)}")

In [None]:
# List all experiments and their scores
print("\nExperiment history:")
exp_dirs = sorted(glob.glob('/home/code/experiments/*'))
for exp_dir in exp_dirs:
    exp_name = os.path.basename(exp_dir)
    # Check for submission.csv
    sub_path = os.path.join(exp_dir, 'submission.csv')
    if os.path.exists(sub_path):
        print(f"  {exp_name}: has submission.csv")
    else:
        print(f"  {exp_name}: no submission")

In [None]:
# What approaches have NOT been tried?
print("\nApproaches tried:")
print("1. SA optimization (sa_v1_parallel, bbox3) - NO improvement")
print("2. Fractional translation - NO improvement")
print("3. Backward propagation - NO improvement")
print("4. Lattice SA (jiweiliu kernel) - WORSE results")
print("5. Row-based approach (zaburo kernel) - WORSE results")
print("6. Ensemble of all solutions (crodoc kernel) - 0.000015 improvement")
print("7. Corner extraction (chistyakov kernel) - tried in jiweiliu")
print("8. Perturbation + re-optimization - causes collisions")

print("\nApproaches NOT tried:")
print("1. eazy-optimizer C++ code directly (uses orbital moves, square pressure)")
print("2. Very long optimization runs (hours, not minutes)")
print("3. Random restart strategy (different starting points)")
print("4. nikitakuznetsof just-luck kernel (basin hopping, gradient descent)")
print("5. hardikmakhija dimer-mosaic-solver (different approach?)")

In [None]:
# The eazy-optimizer uses fundamentally different techniques:
# 1. Complex Orbital Moves - rotates positions in complex plane
# 2. Square Calculus Pressure - log-barrier gradient to push trees toward center
# 3. Elastic Pulse - periodic squeeze and relax
# 4. Multi-scale optimization - runs at scales 1e-3, 1e-5, 1e-7, 1e-9

# Let's compile and run it on our baseline
print("Compiling eazy optimizer...")
os.chdir('/home/code/research/kernels/jazivxt_eazy-optimizer')

# First, modify the input path to use our baseline
with open('eazy.cpp', 'r') as f:
    content = f.read()

# Replace the input path
new_content = content.replace(
    '/kaggle/input/why-not/submission.csv',
    '/home/code/external_data/saspav_latest/santa-2025.csv'
)

# Also handle the 'angle' vs 'deg' column issue
# The eazy.cpp expects 'deg' column but our baseline might have 'angle'
print("Checking baseline column names...")
print(df.columns.tolist())

In [None]:
# The baseline has 'angle' column, but eazy.cpp expects 'deg'
# Let's create a modified version with 'deg' column
df_modified = df.copy()
if 'angle' in df.columns and 'deg' not in df.columns:
    df_modified = df_modified.rename(columns={'angle': 'deg'})
    
# Save to a temp file
temp_input = '/home/code/external_data/saspav_latest/santa-2025-deg.csv'
df_modified.to_csv(temp_input, index=False)
print(f"Saved modified baseline to {temp_input}")
print(f"Columns: {df_modified.columns.tolist()}")

In [None]:
# Now modify eazy.cpp to use our input
with open('eazy.cpp', 'r') as f:
    content = f.read()

# Replace the input path
new_content = content.replace(
    'loadCSV("/kaggle/input/why-not/submission.csv")',
    f'loadCSV("{temp_input}")'
)

# Also replace any other kaggle paths
new_content = new_content.replace('/kaggle/input', '/home/code/external_data')
new_content = new_content.replace('/kaggle/working', '/home/code/experiments/016_eazy_optimizer')

# Save modified version
with open('eazy_modified.cpp', 'w') as f:
    f.write(new_content)
    
print("Modified eazy.cpp saved to eazy_modified.cpp")

In [None]:
# Compile the modified eazy optimizer
import subprocess

os.makedirs('/home/code/experiments/016_eazy_optimizer', exist_ok=True)

result = subprocess.run(
    ['g++', '-O3', '-fopenmp', 'eazy_modified.cpp', '-o', '/home/code/experiments/016_eazy_optimizer/eazy'],
    capture_output=True, text=True
)

if result.returncode == 0:
    print("Compilation successful!")
else:
    print(f"Compilation failed:")
    print(result.stderr)

In [None]:
# Run the eazy optimizer (this will take a while)
import time

os.chdir('/home/code/experiments/016_eazy_optimizer')

print("Running eazy optimizer...")
print("This uses multi-scale optimization at scales: 1e-3, 1e-5, 1e-7, 1e-9")
print("Each N gets up to 20 seconds of optimization per scale")
print("Total expected time: ~4 scales * 200 N * 20 sec = ~4.4 hours")
print("\nStarting optimization (will run for a limited time)...")

start_time = time.time()

# Run with timeout
result = subprocess.run(
    ['./eazy'],
    capture_output=True, text=True,
    timeout=600  # 10 minute timeout for testing
)

elapsed = time.time() - start_time
print(f"\nRan for {elapsed:.1f} seconds")
print("\nOutput:")
print(result.stdout[-2000:] if len(result.stdout) > 2000 else result.stdout)

In [None]:
# Check if submission.csv was created
if os.path.exists('submission.csv'):
    print("submission.csv created!")
    
    # Load and score it
    df_result = pd.read_csv('submission.csv')
    print(f"Rows: {len(df_result)}")
    print(f"Columns: {df_result.columns.tolist()}")
else:
    print("No submission.csv created yet")