# Analysis of Learning Rate Scaling in Entropy Experiments

This notebook analyzes how deltaH_true scales with learning rate across multiple experimental runs.

In [None]:
import json
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import pandas as pd

# Set up matplotlib
plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 11

In [None]:
# Load all run results
results_dir = Path('..')  # Changed to parent directory since we're now in notebooks/
runs_data = {}

for run_id in range(1, 9):
    run_dir = results_dir / f'run_{run_id:02d}'
    results_file = run_dir / 'results.json'
    
    if results_file.exists():
        with open(results_file, 'r') as f:
            runs_data[f'run_{run_id:02d}'] = json.load(f)
        print(f"Loaded run_{run_id:02d}")
    else:
        print(f"Warning: run_{run_id:02d} not found")

print(f"\nTotal runs loaded: {len(runs_data)}")

In [None]:
# Extract eta values and deltaH_true for all runs
all_results = {}

for run_name, data in runs_data.items():
    sweep = data['sweep']
    etas = [item['eta'] for item in sweep]
    deltaH_true = [abs(item['deltaH_true']) for item in sweep]  # Taking absolute value
    
    all_results[run_name] = {
        'etas': np.array(etas),
        'deltaH_true': np.array(deltaH_true)
    }

# Check the eta values (should be powers of 2)
sample_run = list(all_results.values())[0]
print("Learning rates (eta):")
for i, eta in enumerate(sample_run['etas']):
    if i > 0:
        ratio = eta / sample_run['etas'][i-1]
        print(f"  {eta:.2e} (ratio to previous: {ratio:.1f})")
    else:
        print(f"  {eta:.2e}")

In [None]:
# Calculate ratios of deltaH_true between consecutive etas
doubling_ratios = {}

for run_name, data in all_results.items():
    deltaH = data['deltaH_true']
    ratios = []
    
    for i in range(1, len(deltaH)):
        if deltaH[i-1] > 0:  # Avoid division by zero
            ratio = deltaH[i] / deltaH[i-1]
            ratios.append(ratio)
    
    doubling_ratios[run_name] = ratios

# Create DataFrame for easier visualization
df_ratios = pd.DataFrame(doubling_ratios).T
df_ratios.columns = [f'η_{i+1}/η_{i}' for i in range(len(df_ratios.columns))]

print("Ratios of deltaH_true between consecutive learning rates:")
print("(Should be ~2 if deltaH_true scales linearly with η)\n")
print(df_ratios.round(2))
print("\nMean ratios across runs:")
print(df_ratios.mean().round(2))
print("\nStd of ratios across runs:")
print(df_ratios.std().round(2))

In [None]:
# Plot 1: Bar chart showing deviation from perfect doubling
fig, ax = plt.subplots(figsize=(14, 6))

# Calculate deviation from 2.0 for each ratio
deviations = df_ratios - 2.0

# Plot bars for each run
x = np.arange(len(df_ratios.columns))
width = 0.1

for i, (run_name, values) in enumerate(deviations.iterrows()):
    offset = (i - len(deviations)/2) * width
    ax.bar(x + offset, values, width, label=run_name, alpha=0.8)

ax.axhline(y=0, color='black', linestyle='--', alpha=0.5, label='Perfect doubling (ratio=2)')
ax.set_xlabel('Consecutive η pairs')
ax.set_ylabel('Deviation from perfect doubling (ratio - 2.0)')
ax.set_title('How close is deltaH_true to doubling with each doubling of η?')
ax.set_xticks(x)
ax.set_xticklabels(df_ratios.columns)
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Plot 2: Line plot showing deltaH_true vs eta for all runs
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Left plot: log-log scale
for run_name, data in all_results.items():
    ax1.loglog(data['etas'], data['deltaH_true'], 'o-', label=run_name, alpha=0.7, markersize=6)

# Add reference line with slope 1 (perfect linear scaling)
etas_ref = sample_run['etas']
deltaH_ref = sample_run['deltaH_true'][0] * (etas_ref / etas_ref[0])
ax1.loglog(etas_ref, deltaH_ref, 'k--', alpha=0.5, label='Perfect linear scaling', linewidth=2)

ax1.set_xlabel('Learning rate (η)')
ax1.set_ylabel('|deltaH_true|')
ax1.set_title('deltaH_true vs Learning Rate (log-log scale)')
ax1.legend(fontsize=9)
ax1.grid(True, alpha=0.3)

# Right plot: Normalized deltaH/eta ratio
for run_name, data in all_results.items():
    ratio = data['deltaH_true'] / data['etas']
    ax2.semilogx(data['etas'], ratio / ratio[0], 'o-', label=run_name, alpha=0.7, markersize=6)

ax2.axhline(y=1, color='black', linestyle='--', alpha=0.5, label='Perfect linear scaling')
ax2.set_xlabel('Learning rate (η)')
ax2.set_ylabel('(deltaH/η) normalized to first point')
ax2.set_title('Normalized deltaH/η Ratio vs Learning Rate')
ax2.legend(fontsize=9)
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Plot 3: Box plot of doubling ratios
fig, ax = plt.subplots(figsize=(12, 6))

# Prepare data for box plot
box_data = [df_ratios[col].values for col in df_ratios.columns]

bp = ax.boxplot(box_data, labels=df_ratios.columns, patch_artist=True)

# Color the boxes
for patch in bp['boxes']:
    patch.set_facecolor('lightblue')
    patch.set_alpha(0.7)

# Add a horizontal line at y=2
ax.axhline(y=2, color='red', linestyle='--', alpha=0.7, label='Expected ratio (2.0) for linear scaling')

# Add individual points
for i, col in enumerate(df_ratios.columns):
    y = df_ratios[col].values
    x = np.random.normal(i+1, 0.04, size=len(y))
    ax.scatter(x, y, alpha=0.6, s=30, color='darkblue')

ax.set_xlabel('Consecutive η pairs')
ax.set_ylabel('Ratio of deltaH_true values')
ax.set_title('Distribution of deltaH Doubling Ratios Across Runs')
ax.legend()
ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

In [None]:
# Statistical summary
print("Summary Statistics for Doubling Ratios\n" + "="*40)
print("\nExpected ratio for perfect linear scaling: 2.00\n")

for col in df_ratios.columns:
    values = df_ratios[col]
    print(f"\n{col}:")
    print(f"  Mean:   {values.mean():.3f}")
    print(f"  Median: {values.median():.3f}")
    print(f"  Std:    {values.std():.3f}")
    print(f"  Min:    {values.min():.3f}")
    print(f"  Max:    {values.max():.3f}")
    print(f"  % deviation from 2.0: {abs(values.mean() - 2.0) / 2.0 * 100:.1f}%")

In [None]:
# Analyze where the linear approximation breaks down
print("Analysis of Linear Scaling Breakdown\n" + "="*40)

threshold_deviation = 0.2  # 20% deviation from expected ratio of 2

for run_name in all_results.keys():
    ratios = doubling_ratios[run_name]
    etas = all_results[run_name]['etas']
    
    print(f"\n{run_name}:")
    for i, ratio in enumerate(ratios):
        deviation = abs(ratio - 2.0) / 2.0
        if deviation > threshold_deviation:
            print(f"  Significant deviation at η_{i+1}={etas[i+1]:.2e}: ratio={ratio:.2f} ({deviation*100:.1f}% off)")
    
    # Find where it starts deviating consistently
    for i in range(len(ratios)):
        if all(abs(r - 2.0) / 2.0 > threshold_deviation for r in ratios[i:]):
            print(f"  Linear scaling breaks down starting at η={etas[i+1]:.2e}")
            break
    else:
        if any(abs(r - 2.0) / 2.0 > threshold_deviation for r in ratios[-2:]):
            print(f"  Linear scaling degrades at higher learning rates")

In [None]:
# Linear fitting analysis - excluding η_0 and η_6, η_7 where numerical errors or quadratic effects dominate
from scipy import stats
import numpy as np

print("Linear Fitting Analysis (Excluding Problematic Regions)\n" + "="*60)
print("Excluding: η_0 (numerical precision errors) and η_6, η_7 (quadratic effects)")
print()

# Store fitted parameters for each run
fit_results = []

fig, axes = plt.subplots(2, 4, figsize=(16, 8))
axes = axes.flatten()

for idx, (run_name, data) in enumerate(all_results.items()):
    etas = data['etas']
    deltaH = data['deltaH_true']
    
    # Select indices 1-5 (excluding 0, 6, 7)
    # This corresponds to η_1 through η_5
    valid_indices = slice(1, 6)
    etas_fit = etas[valid_indices]
    deltaH_fit = deltaH[valid_indices]
    
    # Perform linear regression using scipy.stats
    res = stats.linregress(etas_fit, deltaH_fit)
    
    slope = res.slope
    intercept = res.intercept
    r2 = res.rvalue**2  # R-squared is the square of correlation coefficient
    
    fit_results.append({
        'run': run_name,
        'slope': slope,
        'intercept': intercept,
        'r2': r2,
        'stderr': res.stderr,
        'pvalue': res.pvalue
    })
    
    # Plot
    ax = axes[idx]
    
    # Plot all points
    ax.scatter(etas, deltaH, alpha=0.5, color='gray', label='All data', s=30)
    
    # Highlight fitted points
    ax.scatter(etas_fit, deltaH_fit, color='blue', label='Fitted points', s=50)
    
    # Plot fitted line extended to η=0
    eta_range = np.linspace(0, etas[-1], 100)
    deltaH_pred = slope * eta_range + intercept
    ax.plot(eta_range, deltaH_pred, 'r-', alpha=0.7, label=f'Fit: y = {slope:.1f}η + {intercept:.2e}')
    
    # Mark the y-intercept
    ax.scatter(0, intercept, color='green', s=100, marker='*', zorder=5, 
               label=f'Y-intercept: {intercept:.2e}')
    
    ax.set_xlabel('Learning rate (η)')
    ax.set_ylabel('|deltaH_true|')
    ax.set_title(f'{run_name}: R² = {r2:.4f}')
    ax.grid(True, alpha=0.3)
    ax.legend(fontsize=7, loc='upper left')
    
    # Set x-axis to start from 0
    ax.set_xlim(left=-etas[-1]*0.02, right=etas[-1]*1.1)

plt.suptitle('Linear Fits to deltaH_true vs η (Using η₁ through η₅)', fontsize=14, y=1.02)
plt.tight_layout()
plt.show()

# Summary table
df_fits = pd.DataFrame(fit_results)
print("\nFitted Parameters Summary:")
print(df_fits.to_string(index=False))

print("\n" + "-"*60)
print("Statistical Summary Across Runs:")
print(f"Mean intercept:   {df_fits['intercept'].mean():.3e} ± {df_fits['intercept'].std():.3e}")
print(f"Mean slope:       {df_fits['slope'].mean():.1f} ± {df_fits['slope'].std():.1f}")
print(f"Mean R²:          {df_fits['r2'].mean():.4f} ± {df_fits['r2'].std():.4f}")

# Check how close intercepts are to zero
print("\n" + "-"*60)
print("Analysis of Y-Intercepts (Should be ≈0 for perfect linear scaling):")
intercepts = df_fits['intercept'].values
print(f"  Mean absolute intercept: {np.mean(np.abs(intercepts)):.3e}")
print(f"  Max absolute intercept:  {np.max(np.abs(intercepts)):.3e}")
print(f"  Intercept as % of typical deltaH at η=1e-6: {np.mean(np.abs(intercepts))/(1e-6 * df_fits['slope'].mean()) * 100:.1f}%")