# Delta Calibration Notebook

This notebook calibrates the δ (delta) metric weights based on S1+S2 task performance.


In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from scipy.optimize import minimize
from scipy.stats import pearsonr
from sklearn.metrics import mean_squared_error

# Load task data
def load_tasks(s1_dir="corpus/s1", s2_dir="corpus/s2"):
    tasks = []
    
    for task_dir in [s1_dir, s2_dir]:
        for task_file in Path(task_dir).glob("*.json"):
            with open(task_file, 'r') as f:
                task_data = json.load(f)
                tasks.append(task_data)
    
    return tasks

tasks = load_tasks()
print(f"Loaded {len(tasks)} tasks")


In [None]:
# Calculate delta for each task
def calculate_task_delta(task, weights):
    """Calculate delta for a single task with given weights"""
    # Extract dimensions from task data
    h_score = min(len(task.get('constraints', [])) / 5, 1.0)
    e_score = min(len(task.get('test_cases', [])) / 3, 1.0)
    k_score = min(len(task.get('obligations', [])) / 5, 1.0)
    a_score = 0.5  # Mock artifact quality
    j_score = 0.5  # Mock journal quality
    
    # Calculate weighted delta
    delta = (
        (1.0 - h_score) * weights['H'] +
        (1.0 - e_score) * weights['E'] +
        (1.0 - k_score) * weights['K'] +
        (1.0 - a_score) * weights['A'] +
        (1.0 - j_score) * weights['J']
    )
    
    return delta

# Initial weights
initial_weights = {'H': 0.3, 'E': 0.3, 'K': 0.2, 'A': 0.1, 'J': 0.1}

# Calculate deltas for all tasks
deltas = []
incidents = []
task_ids = []

for task in tasks:
    delta = calculate_task_delta(task, initial_weights)
    deltas.append(delta)
    incidents.append(1 if task.get('expected_fail') else 0)
    task_ids.append(task['id'])

print(f"Calculated deltas for {len(deltas)} tasks")
print(f"Mean delta: {np.mean(deltas):.3f}")
print(f"Mean incidents: {np.mean(incidents):.3f}")


In [None]:
# Plot delta vs incidents
plt.figure(figsize=(10, 6))
plt.scatter(deltas, incidents, alpha=0.7)
plt.xlabel('Delta (δ)')
plt.ylabel('Incidents')
plt.title('Delta vs Incidents Correlation')
plt.grid(True, alpha=0.3)

# Add correlation line
z = np.polyfit(deltas, incidents, 1)
p = np.poly1d(z)
plt.plot(deltas, p(deltas), "r--", alpha=0.8)

plt.show()

# Calculate correlation
correlation, p_value = pearsonr(deltas, incidents)
print(f"Correlation: {correlation:.3f} (p-value: {p_value:.3f})")


In [None]:
# Optimize weights to maximize correlation
def objective(weights_array):
    """Objective function to maximize correlation"""
    weights = {
        'H': weights_array[0],
        'E': weights_array[1], 
        'K': weights_array[2],
        'A': weights_array[3],
        'J': weights_array[4]
    }
    
    # Calculate deltas with new weights
    new_deltas = []
    for task in tasks:
        delta = calculate_task_delta(task, weights)
        new_deltas.append(delta)
    
    # Calculate correlation
    correlation, _ = pearsonr(new_deltas, incidents)
    
    # Return negative correlation (minimize negative = maximize positive)
    return -abs(correlation)

# Constraints: weights must sum to 1.0 and be non-negative
constraints = [
    {'type': 'eq', 'fun': lambda x: np.sum(x) - 1.0}
]

bounds = [(0.0, 1.0) for _ in range(5)]

# Initial guess
x0 = [0.3, 0.3, 0.2, 0.1, 0.1]

# Optimize
result = minimize(objective, x0, method='SLSQP', bounds=bounds, constraints=constraints)

optimized_weights = {
    'H': result.x[0],
    'E': result.x[1],
    'K': result.x[2], 
    'A': result.x[3],
    'J': result.x[4]
}

print("Optimized weights:")
for dim, weight in optimized_weights.items():
    print(f"  {dim}: {weight:.3f}")

print(f"\nOptimization success: {result.success}")
print(f"Final correlation: {-result.fun:.3f}")


In [None]:
# Save optimized weights
weights_file = Path("weights.json")
with open(weights_file, 'w') as f:
    json.dump(optimized_weights, f, indent=2)

print(f"Optimized weights saved to: {weights_file}")

# Calculate final correlation
final_deltas = []
for task in tasks:
    delta = calculate_task_delta(task, optimized_weights)
    final_deltas.append(delta)

final_correlation, final_p_value = pearsonr(final_deltas, incidents)
print(f"Final correlation: {final_correlation:.3f} (p-value: {final_p_value:.3f})")

if abs(final_correlation) >= 0.5:
    print("✅ Correlation target achieved (|ρ| ≥ 0.5)")
else:
    print("❌ Correlation target not achieved (|ρ| < 0.5)")
