# INITIAL TEST

In [None]:
!pip install seaborn
!pip install gitpython PyGithub



# OptimizationResult, First-order Optimisers and Functions

In [None]:
class OptimizationResult:
    """Enhanced optimization result storage"""
    def __init__(self, **kwargs):
        self.x_final = kwargs.get('x_final')
        self.f_final = kwargs.get('f_final')
        self.success = kwargs.get('success')
        self.iterations = kwargs.get('iterations')
        self.runtime = kwargs.get('runtime')
        self.path = kwargs.get('path', [])
        self.f_path = kwargs.get('f_path', [])
        self.grad_norm_path = kwargs.get('grad_norm_path', [])
        self.grad_cosine_sim_path = kwargs.get('grad_cosine_sim_path', [])  # New
        self.grad_angle_path = kwargs.get('grad_angle_path', [])            # New
        self.timestamps = kwargs.get('timestamps', [])
        self.method = kwargs.get('method')
        self.dimension = kwargs.get('dimension')
        self.function_name = kwargs.get('function_name')
        self.x_initial = kwargs.get('x_initial')
        self.f_initial = kwargs.get('f_initial')
        self.grad_initial = kwargs.get('grad_initial')
        self.grad_final = kwargs.get('grad_final')
        self.step_sizes = kwargs.get('step_sizes', [])
        self.improvements = kwargs.get('improvements', [])
        self.best_so_far = kwargs.get('best_so_far', [])
        self.relative_improvements = kwargs.get('relative_improvements', [])
        self.distance_to_minimum_path = kwargs.get('distance_to_minimum_path', [])
        self.final_distance_to_minimum = None
        self.initial_distance_to_minimum = None  # Add this

        # Get global minimum using the function name and dimension we already have
        x_min, f_min = TestFunctions.get_global_minimum(self.function_name, self.dimension)
        if x_min is not None and f_min is not None and self.x_initial is not None:
            # Calculate initial distance only if we have both x_initial and x_min
            self.initial_distance_to_minimum = np.linalg.norm(self.x_initial - x_min)

            # Use the final value from path if available, otherwise calculate
            if self.distance_to_minimum_path:
                self.final_distance_to_minimum = self.distance_to_minimum_path[-1]
            elif self.x_final is not None:
                self.final_distance_to_minimum = np.linalg.norm(self.x_final - x_min)
            self.f_error = abs(self.f_final - f_min) if self.f_final is not None else None
        else:
            self.initial_distance_to_minimum = None  # Add this
            self.final_distance_to_minimum = None
            self.f_error = None

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from dataclasses import dataclass
from typing import Callable, List, Dict, Optional, Tuple
import os
import json
import time
from datetime import datetime
import pandas as pd
import psutil
import seaborn as sns



class TestFunctions:
    """Test functions that work with any dimension"""
    @staticmethod
    def get_global_minimum(func_name: str, dimension: int = 2) -> tuple:
        """Get global minimum for a given function and dimension"""
        global_minima = {
            'ackley': (np.zeros(dimension), 0.0),
            'rastrigin': (np.zeros(dimension), 0.0),
            'rosenbrock': (np.ones(dimension), 0.0),
            'sphere': (np.zeros(dimension), 0.0),
            'schwefel': (420.9687 * np.ones(dimension), 0.0),  # Add this
            'sum_squares': (np.zeros(dimension), 0.0),         # Add this
            'michalewicz': (None, None),  # Varies with dimension
        }
        return global_minima.get(func_name, (None, None))

    @staticmethod
    def ackley(x: np.ndarray) -> float:
        """Ackley function for n dimensions"""
        n = len(x)
        sum_sq = np.sum(x**2)
        sum_cos = np.sum(np.cos(2 * np.pi * x))
        return (-20 * np.exp(-0.2 * np.sqrt(sum_sq / n))
                - np.exp(sum_cos / n)
                + 20 + np.e)

    @staticmethod
    def ackley_gradient(x: np.ndarray) -> np.ndarray:
        """Gradient of Ackley function"""
        n = len(x)
        sum_sq = np.sum(x**2)
        sum_cos = np.sum(np.cos(2 * np.pi * x))

        term1 = (20 * 0.2 / np.sqrt(n * sum_sq)) * np.exp(-0.2 * np.sqrt(sum_sq / n)) * x
        term2 = (2 * np.pi / n) * np.exp(sum_cos / n) * np.sin(2 * np.pi * x)
        return term1 + term2

    @staticmethod
    def ackley_hessian(x: np.ndarray) -> np.ndarray:
        """Numerical approximation of Ackley Hessian"""
        eps = 1e-8
        n = len(x)
        H = np.zeros((n, n))
        grad = TestFunctions.ackley_gradient

        for i in range(n):
            for j in range(n):
                x_ij = x.copy()
                x_ij[i] += eps
                x_ij[j] += eps
                H[i,j] = (grad(x_ij)[i] - grad(x)[i]) / eps

        return (H + H.T) / 2  # Ensure symmetry

    @staticmethod
    def rastrigin(x: np.ndarray) -> float:
        """Rastrigin function for n dimensions"""
        n = len(x)
        return 10 * n + np.sum(x**2 - 10 * np.cos(2 * np.pi * x))

    @staticmethod
    def rastrigin_gradient(x: np.ndarray) -> np.ndarray:
        """Gradient of Rastrigin function"""
        return 2 * x + 20 * np.pi * np.sin(2 * np.pi * x)

    @staticmethod
    def rastrigin_hessian(x: np.ndarray) -> np.ndarray:
        """Hessian of Rastrigin function"""
        n = len(x)
        return 2 * np.eye(n) + 40 * np.pi**2 * np.diag(np.cos(2 * np.pi * x))

    @staticmethod
    def schwefel(x: np.ndarray) -> float:
        """Schwefel function for n dimensions"""
        n = len(x)
        return 418.9829 * n - np.sum(x * np.sin(np.sqrt(np.abs(x))))

    @staticmethod
    def schwefel_gradient(x: np.ndarray) -> np.ndarray:
        """Gradient of Schwefel function"""
        sqrt_abs_x = np.sqrt(np.abs(x))
        term1 = np.sin(sqrt_abs_x)
        term2 = x * np.cos(sqrt_abs_x) / (2 * sqrt_abs_x)
        return -(term1 + term2)

    @staticmethod
    def schwefel_hessian(x: np.ndarray) -> np.ndarray:
        """Numerical approximation of Schwefel Hessian"""
        eps = 1e-8
        n = len(x)
        H = np.zeros((n, n))
        grad = TestFunctions.schwefel_gradient

        for i in range(n):
            for j in range(n):
                x_ij = x.copy()
                x_ij[i] += eps
                x_ij[j] += eps
                H[i,j] = (grad(x_ij)[i] - grad(x)[i]) / eps

        return (H + H.T) / 2

    @staticmethod
    def sphere(x: np.ndarray) -> float:
        """Sphere function for n dimensions"""
        return np.sum(x**2)

    @staticmethod
    def sphere_gradient(x: np.ndarray) -> np.ndarray:
        """Gradient of Sphere function"""
        return 2 * x

    @staticmethod
    def sphere_hessian(x: np.ndarray) -> np.ndarray:
        """Hessian of Sphere function"""
        n = len(x)
        return 2 * np.eye(n)

    @staticmethod
    def sum_squares(x: np.ndarray) -> float:
        """Sum squares function for n dimensions"""
        return np.sum((np.arange(1, len(x) + 1) * x**2))

    @staticmethod
    def sum_squares_gradient(x: np.ndarray) -> np.ndarray:
        """Gradient of Sum squares function"""
        return 2 * np.arange(1, len(x) + 1) * x

    @staticmethod
    def sum_squares_hessian(x: np.ndarray) -> np.ndarray:
        """Hessian of Sum squares function"""
        n = len(x)
        return 2 * np.diag(np.arange(1, n + 1))

    @staticmethod
    def rosenbrock(x: np.ndarray) -> float:
        """Rosenbrock function for n dimensions"""
        return np.sum(100.0 * (x[1:] - x[:-1]**2)**2 + (1 - x[:-1])**2)

    @staticmethod
    def rosenbrock_gradient(x: np.ndarray) -> np.ndarray:
        """Gradient of Rosenbrock function"""
        n = len(x)
        grad = np.zeros(n)
        grad[0] = -400 * x[0] * (x[1] - x[0]**2) - 2 * (1 - x[0])
        grad[-1] = 200 * (x[-1] - x[-2]**2)
        if n > 2:
            grad[1:-1] = 200 * (x[1:-1] - x[:-2]**2) - 400 * x[1:-1] * (x[2:] - x[1:-1]**2) - 2 * (1 - x[1:-1])
        return grad

    @staticmethod
    def rosenbrock_hessian(x: np.ndarray) -> np.ndarray:
        """Numerical approximation of Rosenbrock Hessian"""
        eps = 1e-8
        n = len(x)
        H = np.zeros((n, n))
        grad = TestFunctions.rosenbrock_gradient

        for i in range(n):
            for j in range(n):
                x_ij = x.copy()
                x_ij[i] += eps
                x_ij[j] += eps
                H[i,j] = (grad(x_ij)[i] - grad(x)[i]) / eps

        return (H + H.T) / 2  # Ensure symmetry

    @staticmethod
    def michalewicz(x: np.ndarray) -> float:
        """Michalewicz function for n dimensions"""
        i = np.arange(1, len(x) + 1)
        return -np.sum(np.sin(x) * (np.sin(i * x**2 / np.pi))**(2 * 10))

    @staticmethod
    def michalewicz_gradient(x: np.ndarray) -> np.ndarray:
        """Gradient of Michalewicz function with numerical stability"""
        n = len(x)
        i = np.arange(1, n + 1)

        # Add small epsilon to avoid division by zero
        eps = 1e-10

        # Compute terms separately for better numerical stability
        sin_x = np.sin(x)
        cos_x = np.cos(x)
        sin_ix2 = np.sin(i * x**2 / np.pi)
        cos_ix2 = np.cos(i * x**2 / np.pi)

        # Compute the power term with clipping to avoid numerical issues
        power_term = np.clip(sin_ix2, -1 + eps, 1 - eps)**(2 * 10 - 1)

        term1 = -cos_x * (sin_ix2)**(2 * 10)
        term2 = -sin_x * (2 * 10) * power_term * cos_ix2 * (2 * i * x / np.pi)

        return -(term1 + term2)

    @staticmethod
    def michalewicz_hessian(x: np.ndarray) -> np.ndarray:
        """Numerical approximation of Michalewicz Hessian"""
        eps = 1e-8
        n = len(x)
        H = np.zeros((n, n))
        grad = TestFunctions.michalewicz_gradient

        for i in range(n):
            for j in range(n):
                x_ij = x.copy()
                x_ij[i] += eps
                x_ij[j] += eps
                H[i,j] = (grad(x_ij)[i] - grad(x)[i]) / eps

        return (H + H.T) / 2

In [None]:
class GradientDescent:
    def __init__(self, learning_rate=0.01):
        self.learning_rate = learning_rate

    def optimize(self, f: Callable, grad: Callable, x0: np.ndarray,
            function_name: str,  # Add this parameter
            max_iter=1000, tol=1e-6, callback=None) -> dict:
        x = x0.copy()
        step_logger = StepLogger()

        # Get the global minimum for distance tracking
        x_min, _ = TestFunctions.get_global_minimum(function_name, len(x0))

        # Log initial point
        f_initial = f(x)
        g_initial = grad(x)
        step_logger.log_iteration(x, f_initial, g_initial, global_minimum=x_min)

        for i in range(max_iter):
            g = grad(x)
            x = x - self.learning_rate * g

            # Log step
            f_val = f(x)
            grad_norm = np.linalg.norm(g)
            step_logger.log_iteration(x, f_val, grad_norm, global_minimum=x_min)

            if callback:
                callback(x)

            if grad_norm < tol:
                break

        return {
            'x': x,
            'fun': f(x),
            'success': grad_norm < tol,
            'nit': i + 1,
            'path': step_logger.path,
            'f_path': step_logger.f_path,
            'grad_path': step_logger.grad_norm_path,
            'timestamps': step_logger.timestamps,
            'grad_final': grad(x),
            'step_sizes': step_logger.step_sizes,
            'improvements': step_logger.improvements,
            'best_so_far': step_logger.best_so_far,
            'relative_improvements': step_logger.relative_improvements,
            'distance_to_minimum_path': step_logger.distance_to_minimum_path  # Changed to path
        }

class SGD:
    """Stochastic Gradient Descent optimizer"""
    def __init__(self, learning_rate=0.01, noise_scale=0.01):
        self.learning_rate = learning_rate
        self.noise_scale = noise_scale

    def optimize(self, f: Callable, grad: Callable, x0: np.ndarray,
            function_name: str,  # Add this parameter
            max_iter=1000, tol=1e-6, callback=None) -> dict:
        x = x0.copy()
        step_logger = StepLogger()

        # Get the global minimum for distance tracking
        x_min, _ = TestFunctions.get_global_minimum(function_name, len(x0))

        # Log initial point
        f_initial = f(x)
        g_initial = grad(x)
        step_logger.log_iteration(x, f_initial, g_initial, global_minimum=x_min)

        for i in range(max_iter):
            g = grad(x)
            # Add stochastic noise
            noise = np.random.normal(0, self.noise_scale, size=x.shape)
            g = g + noise
            x = x - self.learning_rate * g

            # Log step
            f_val = f(x)
            grad_norm = np.linalg.norm(g)
            step_logger.log_iteration(x, f_val, grad_norm, global_minimum=x_min)

            if callback:
                callback(x)

            if grad_norm < tol:
                break

        return {
            'x': x,
            'fun': f(x),
            'success': grad_norm < tol,
            'nit': i + 1,
            'path': step_logger.path,
            'f_path': step_logger.f_path,
            'grad_path': step_logger.grad_norm_path,
            'timestamps': step_logger.timestamps,
            'grad_final': grad(x),
            'step_sizes': step_logger.step_sizes,
            'improvements': step_logger.improvements,
            'best_so_far': step_logger.best_so_far,
            'relative_improvements': step_logger.relative_improvements,
            'distance_to_minimum_path': step_logger.distance_to_minimum_path  # Changed to path
        }

class SGDMomentum:
    """SGD with momentum optimizer"""
    def __init__(self, learning_rate=0.01, momentum=0.9, noise_scale=0.01):
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.noise_scale = noise_scale

    def optimize(self, f: Callable, grad: Callable, x0: np.ndarray,
            function_name: str,  # Add this parameter
            max_iter=1000, tol=1e-6, callback=None) -> dict:
        x = x0.copy()
        v = np.zeros_like(x)  # Initialize velocity
        step_logger = StepLogger()

        # Get the global minimum for distance tracking
        x_min, _ = TestFunctions.get_global_minimum(function_name, len(x0))

        # Log initial point
        f_initial = f(x)
        g_initial = grad(x)
        step_logger.log_iteration(x, f_initial, g_initial, global_minimum=x_min)

        for i in range(max_iter):
            g = grad(x)
            # Add stochastic noise
            noise = np.random.normal(0, self.noise_scale, size=x.shape)
            g = g + noise

            # Update with momentum
            v = self.momentum * v - self.learning_rate * g
            x = x + v

            # Log step
            f_val = f(x)
            grad_norm = np.linalg.norm(g)
            step_logger.log_iteration(x, f_val, grad_norm, global_minimum=x_min)

            if callback:
                callback(x)

            if grad_norm < tol:
                break

        return {
            'x': x,
            'fun': f(x),
            'success': grad_norm < tol,
            'nit': i + 1,
            'path': step_logger.path,
            'f_path': step_logger.f_path,
            'grad_path': step_logger.grad_norm_path,
            'timestamps': step_logger.timestamps,
            'grad_final': grad(x),
            'step_sizes': step_logger.step_sizes,
            'improvements': step_logger.improvements,
            'best_so_far': step_logger.best_so_far,
            'relative_improvements': step_logger.relative_improvements,
            'distance_to_minimum_path': step_logger.distance_to_minimum_path  # Changed to path
        }

class MomentumGD:
    """Gradient Descent with Momentum"""
    def __init__(self, learning_rate=0.01, momentum=0.9):
        self.learning_rate = learning_rate
        self.momentum = momentum

    def optimize(self, f: Callable, grad: Callable, x0: np.ndarray,
            function_name: str,  # Add this parameter
            max_iter=1000, tol=1e-6, callback=None) -> dict:
        x = x0.copy()
        v = np.zeros_like(x)
        step_logger = StepLogger()

        # Get the global minimum for distance tracking
        x_min, _ = TestFunctions.get_global_minimum(function_name, len(x0))

        # Log initial point
        f_initial = f(x)
        g_initial = grad(x)
        step_logger.log_iteration(x, f_initial, g_initial, global_minimum=x_min)

        for i in range(max_iter):
            g = grad(x)
            v = self.momentum * v - self.learning_rate * g
            x = x + v

            # Log step
            f_val = f(x)
            grad_norm = np.linalg.norm(g)
            step_logger.log_iteration(x, f_val, grad_norm, global_minimum=x_min)

            if callback:
                callback(x)

            if grad_norm < tol:
                break

        return {
            'x': x,
            'fun': f(x),
            'success': grad_norm < tol,
            'nit': i + 1,
            'path': step_logger.path,
            'f_path': step_logger.f_path,
            'grad_path': step_logger.grad_norm_path,
            'timestamps': step_logger.timestamps,
            'grad_final': grad(x),
            'step_sizes': step_logger.step_sizes,
            'improvements': step_logger.improvements,
            'best_so_far': step_logger.best_so_far,
            'relative_improvements': step_logger.relative_improvements,
            'distance_to_minimum_path': step_logger.distance_to_minimum_path  # Changed to path
        }

class RMSprop:
    """RMSprop optimizer"""
    def __init__(self, learning_rate=0.01, decay_rate=0.9, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.decay_rate = decay_rate
        self.epsilon = epsilon

    def optimize(self, f: Callable, grad: Callable, x0: np.ndarray,
            function_name: str,  # Add this parameter
            max_iter=1000, tol=1e-6, callback=None) -> dict:
        x = x0.copy()
        v = np.zeros_like(x)
        step_logger = StepLogger()

        # Get the global minimum for distance tracking
        x_min, _ = TestFunctions.get_global_minimum(function_name, len(x0))

        # Log initial point
        f_initial = f(x)
        g_initial = grad(x)
        step_logger.log_iteration(x, f_initial, g_initial, global_minimum=x_min)

        for i in range(max_iter):
            g = grad(x)
            v = self.decay_rate * v + (1 - self.decay_rate) * g**2
            x = x - self.learning_rate * g / (np.sqrt(v) + self.epsilon)

            # Log step
            f_val = f(x)
            grad_norm = np.linalg.norm(g)
            step_logger.log_iteration(x, f_val, grad_norm, global_minimum=x_min)

            if callback:
                callback(x)

            if grad_norm < tol:
                break

        return {
            'x': x,
            'fun': f(x),
            'success': grad_norm < tol,
            'nit': i + 1,
            'path': step_logger.path,
            'f_path': step_logger.f_path,
            'grad_path': step_logger.grad_norm_path,
            'timestamps': step_logger.timestamps,
            'grad_final': grad(x),
            'step_sizes': step_logger.step_sizes,
            'improvements': step_logger.improvements,
            'best_so_far': step_logger.best_so_far,
            'relative_improvements': step_logger.relative_improvements,
            'distance_to_minimum_path': step_logger.distance_to_minimum_path  # Changed to path
        }

class Adam:
    """Adam optimizer"""
    def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon

    def optimize(self, f: Callable, grad: Callable, x0: np.ndarray,
            function_name: str,  # Add this parameter
            max_iter=1000, tol=1e-6, callback=None) -> dict:
        x = x0.copy()
        m = np.zeros_like(x)
        v = np.zeros_like(x)
        step_logger = StepLogger()

        # Get the global minimum for distance tracking
        x_min, _ = TestFunctions.get_global_minimum(function_name, len(x0))

        # Log initial point
        f_initial = f(x)
        g_initial = grad(x)
        step_logger.log_iteration(x, f_initial, g_initial, global_minimum=x_min)

        for i in range(max_iter):
            g = grad(x)
            m = self.beta1 * m + (1 - self.beta1) * g
            v = self.beta2 * v + (1 - self.beta2) * g**2

            # Bias correction
            m_hat = m / (1 - self.beta1**(i + 1))
            v_hat = v / (1 - self.beta2**(i + 1))

            x = x - self.learning_rate * m_hat / (np.sqrt(v_hat) + self.epsilon)

            # Log step
            f_val = f(x)
            grad_norm = np.linalg.norm(g)
            step_logger.log_iteration(x, f_val, grad_norm, global_minimum=x_min)

            if callback:
                callback(x)

            if grad_norm < tol:
                break

        return {
            'x': x,
            'fun': f(x),
            'success': grad_norm < tol,
            'nit': i + 1,
            'path': step_logger.path,
            'f_path': step_logger.f_path,
            'grad_path': step_logger.grad_norm_path,
            'timestamps': step_logger.timestamps,
            'grad_final': grad(x),
            'step_sizes': step_logger.step_sizes,
            'improvements': step_logger.improvements,
            'best_so_far': step_logger.best_so_far,
            'relative_improvements': step_logger.relative_improvements,
            'distance_to_minimum_path': step_logger.distance_to_minimum_path  # Changed to path
        }

# Visualisation

In [None]:
class Visualizer:
    @staticmethod
    def plot_with_cleanup(plot_func):
        """Decorator to ensure proper figure cleanup"""
        def wrapper(*args, **kwargs):
            try:
                return plot_func(*args, **kwargs)
            finally:
                plt.close('all')  # Ensure all figures are closed
        return wrapper

    @staticmethod
    @plot_with_cleanup
    def plot_convergence(results: Dict[str, List[OptimizationResult]], save_dir: str, function_name: str):
        """Plot convergence with error bands showing variation across runs"""

        os.makedirs(save_dir, exist_ok=True)

        try:
            plt.figure(figsize=(20, 8))
            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))

            colors = plt.cm.tab20(np.linspace(0, 1, len(results)))

            _, f_min = TestFunctions.get_global_minimum(function_name)
            f_min_text = f"(Global min: {f_min})" if f_min is not None else ""

            # Plot for each method
            for (method, method_results), color in zip(results.items(), colors):
                try:
                    if not isinstance(method_results, list):
                        method_results = [method_results]  # Convert single result to list if necessary

                    # Get max length of trajectories
                    max_len = max(len(r.f_path) for r in method_results)

                    # Initialize arrays for storing values
                    f_values = np.full((len(method_results), max_len), np.nan)
                    grad_norms = np.full((len(method_results), max_len), np.nan)

                    # Fill arrays with available data
                    for i, result in enumerate(method_results):
                        f_values[i, :len(result.f_path)] = result.f_path
                        grad_norms[i, :len(result.grad_norm_path)] = result.grad_norm_path

                    # Calculate statistics
                    f_mean = np.nanmean(f_values, axis=0)
                    f_std = np.nanstd(f_values, axis=0)
                    grad_mean = np.nanmean(grad_norms, axis=0)
                    grad_std = np.nanstd(grad_norms, axis=0)

                    # Plot with error bands
                    x = np.arange(max_len)
                    ax1.semilogy(x, f_mean, label=f"{method}", color=color)
                    ax1.fill_between(x, f_mean - f_std, f_mean + f_std, alpha=0.2, color=color)

                    ax2.semilogy(x, grad_mean, label=f"{method}", color=color)
                    ax2.fill_between(x, grad_mean - grad_std, grad_mean + grad_std, alpha=0.2, color=color)

                except Exception as e:
                    print(f"Warning: Could not plot method {method}: {str(e)}")
                    continue

            # Set labels and titles
            ax1.set_xlabel('Iteration')
            ax1.set_ylabel('Function Value (log scale)')
            ax1.set_title(f'Function Value Convergence {f_min_text}')
            ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
            ax1.grid(True)

            ax2.set_xlabel('Iteration')
            ax2.set_ylabel('Gradient Norm (log scale)')
            ax2.set_title('Gradient Norm Convergence')
            ax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
            ax2.grid(True)

            plt.tight_layout(rect=[0, 0, 0.85, 1])
            plt.savefig(os.path.join(save_dir, function_name), dpi=300, bbox_inches='tight')
            plt.close()

        except Exception as e:
            print(f"Warning: Could not create convergence plot: {str(e)}")

    @staticmethod
    @plot_with_cleanup
    def plot_2d_trajectory(f: Callable, result: OptimizationResult, save_dir: str,
                          experiment_num: int = None, equal_aspect: bool = True):
        """Plot optimization trajectory for 2D problems"""

        os.makedirs(save_dir, exist_ok=True)

        if result.dimension != 2:
            return

        # Even more reduced resolution for better performance
        grid_size = 50

        for show_global_min in [True, False]:  # Create both variants
            plt.figure(figsize=(12, 10))

            # Get path bounds and include (0,0)
            path = np.array(result.path)
            x_min_traj = min(float(path[:, 0].min()), 0)
            x_max_traj = max(float(path[:, 0].max()), 0)
            y_min_traj = min(float(path[:, 1].min()), 0)
            y_max_traj = max(float(path[:, 1].max()), 0)

            # Add margin to bounds
            margin = 0.1
            x_range = x_max_traj - x_min_traj
            y_range = y_max_traj - y_min_traj

            if equal_aspect:
                max_range = max(x_range, y_range)
                x_center = (x_max_traj + x_min_traj) / 2
                y_center = (y_max_traj + y_min_traj) / 2
                x_min_traj = x_center - max_range/2
                x_max_traj = x_center + max_range/2
                y_min_traj = y_center - max_range/2
                y_max_traj = y_center + max_range/2
                x_range = y_range = max_range

            plot_x_min = x_min_traj - margin * x_range
            plot_x_max = x_max_traj + margin * x_range
            plot_y_min = y_min_traj - margin * y_range
            plot_y_max = y_max_traj + margin * y_range

            # Create contour plot with reduced resolution
            x = np.linspace(plot_x_min, plot_x_max, grid_size)
            y = np.linspace(plot_y_min, plot_y_max, grid_size)
            X, Y = np.meshgrid(x, y)

            # Compute Z values
            points = np.column_stack((X.ravel(), Y.ravel()))
            Z = np.array([f(point) for point in points]).reshape(X.shape)

            # Reduced number of contour levels
            global_max = float(Z.max())
            global_levels = np.linspace(0, global_max, 15)

            # Plot contours
            contour = plt.contour(X, Y, Z, levels=global_levels, cmap='viridis', alpha=0.7)
            plt.colorbar(contour, label='Function Value')

            # Plot trajectory
            plt.plot(path[:, 0], path[:, 1], 'r.-', label='Optimization Path',
                    linewidth=1, markersize=2, zorder=5)
            plt.plot(path[0, 0], path[0, 1], 'go', label='Start',
                    markersize=8, zorder=6)
            plt.plot(path[-1, 0], path[-1, 1], 'ro', label='End',
                    markersize=8, zorder=6)

            # Only plot global minimum in the first variant
            if show_global_min:
                x_min, f_min = TestFunctions.get_global_minimum(result.function_name)
                if x_min is not None:
                    plt.plot(x_min[0], x_min[1], 'k*', label='Global Minimum',
                            markersize=10, zorder=6)

            plt.xlim(float(plot_x_min), float(plot_x_max))
            plt.ylim(float(plot_y_min), float(plot_y_max))

            if equal_aspect:
                plt.gca().set_aspect('equal')

            plt.grid(True)
            plt.title(f'{result.function_name} - {result.method}\n'
                    f'Final value: {result.f_final:.6f}\n'
                    f'Iterations: {result.iterations}')
            plt.xlabel('x₁')
            plt.ylabel('x₂')
            plt.legend()

            # Include experiment number and variant in filename
            experiment_suffix = f'_exp{experiment_num}' if experiment_num is not None else ''
            variant_suffix = '_with_global_min' if show_global_min else '_path_only'
            aspect_suffix = '_equal_aspect' if equal_aspect else ''
            filename = f'trajectory_{result.function_name}_{result.method}{experiment_suffix}{variant_suffix}{aspect_suffix}.png'

            plt.savefig(os.path.join(save_dir, filename), dpi=150, bbox_inches='tight')
            plt.close()

    @staticmethod
    @plot_with_cleanup
    def plot_metric_matrix(results: Dict[str, List[OptimizationResult]], save_dir: str,
                          metric_name: str, function_name: str, y_label: str,
                          log_scale: bool = True):
        """
        Plot any optimization metric in a 2x2 matrix layout.

        Args:
            results: Dictionary of results by method
            save_dir: Directory to save the plot
            metric_name: Name of the metric to plot (e.g., 'grad_norm_path', 'best_so_far')
            function_name: Name of the optimization function
            y_label: Label for y-axis
            log_scale: Whether to use log scale for y-axis
        """
        os.makedirs(save_dir, exist_ok=True)

        try:
            # Create figure with 2x2 subplot layout
            fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(20, 20))

            # Method groupings
            first_order = ['gradient_descent', 'sgd', 'sgd_momentum', 'momentum', 'rmsprop', 'adam']
            second_order = ['BFGS', 'newton-cg', 'trust-exact', 'trust-krylov']

            # Colors for each group
            first_order_colors = plt.cm.tab10(np.linspace(0, 1, len(first_order)))
            second_order_colors = plt.cm.Set2(np.linspace(0, 1, len(second_order)))

            def plot_group(methods, colors, ax_mean, ax_std, results):
                """Plot a group of methods on given axes"""
                for method, color in zip(methods, colors):
                    if method in results:
                        method_results = results[method]
                        if not isinstance(method_results, list):
                            method_results = [method_results]

                        # Get the metric data from each result
                        metric_data = [getattr(r, metric_name) for r in method_results]
                        max_len = max(len(d) for d in metric_data)

                        # Create array and fill with data
                        values = np.full((len(method_results), max_len), np.nan)
                        for i, data in enumerate(metric_data):
                            values[i, :len(data)] = data

                        # Calculate statistics
                        mean_values = np.nanmean(values, axis=0)
                        std_values = np.nanstd(values, axis=0)
                        x = np.arange(max_len)

                        # Plot on mean-only axis
                        if log_scale:
                            ax_mean.semilogy(x, mean_values, label=method, color=color, linewidth=2)
                        else:
                            ax_mean.plot(x, mean_values, label=method, color=color, linewidth=2)

                        # Plot on mean±std axis
                        if log_scale:
                            ax_std.semilogy(x, mean_values, label=method, color=color, linewidth=2)
                        else:
                            ax_std.plot(x, mean_values, label=method, color=color, linewidth=2)

                        if len(method_results) > 1:  # Only show std if we have multiple runs
                            ax_std.fill_between(x, mean_values - std_values, mean_values + std_values,
                                            alpha=0.15, color=color)

            # Plot each group
            plot_group(first_order, first_order_colors, ax1, ax3, results)
            plot_group(second_order, second_order_colors, ax2, ax4, results)

            # Titles for each subplot
            titles = [
                'First-Order Methods (Mean)',
                'Second-Order Methods (Mean)',
                'First-Order Methods (Mean ± Std)',
                'Second-Order Methods (Mean ± Std)'
            ]

            # Style all subplots
            for ax, title in zip([ax1, ax2, ax3, ax4], titles):
                ax.set_xlabel('Iteration')
                ax.set_ylabel(y_label)
                ax.set_title(title)
                ax.grid(True, alpha=0.3)
                ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

            plt.suptitle(f'{function_name} - {metric_name}', y=1.02, fontsize=16)
            plt.tight_layout(rect=[0, 0, 0.9, 1])

            # Save the plot
            filename = f'{metric_name}_{function_name}_matrix.png'
            plt.savefig(os.path.join(save_dir, filename), dpi=300, bbox_inches='tight')
            plt.close()

        except Exception as e:
            print(f"Warning: Could not create {metric_name} plot: {str(e)}")
            plt.close()

    # Updated Visualizer class method to plot all metrics
    @staticmethod
    @plot_with_cleanup
    def plot_all_metrics(results: Dict[str, List[OptimizationResult]], save_dir: str, function_name: str):
        """Plot all optimization metrics in consistent 2x2 matrix layouts"""
        metrics_config = [
            ('grad_norm_path', 'Gradient Norm', True),
            ('best_so_far', 'Best Value So Far', True),
            ('distance_to_minimum_path', 'Distance to Minimum', True),
            ('improvements', 'Improvement (%)', False),
            ('relative_improvements', 'Relative Improvement (%)', False),
            ('step_sizes', 'Step Size', True)
        ]

        # Create subdirectories for each metric type
        for metric_name, label, log_scale in metrics_config:
            metric_dir = os.path.join(save_dir, metric_name)
            os.makedirs(metric_dir, exist_ok=True)
            Visualizer.plot_metric_matrix(results, metric_dir, metric_name, function_name, label, log_scale)

    @staticmethod
    @plot_with_cleanup
    def plot_step_metrics(results: Dict[str, List[OptimizationResult]], save_dir: str,
                         function_name: str, experiment_num: Optional[int] = None):
        """Plot step-related metrics with error bands"""
        metrics = [
            ('step_sizes', 'Step Size', 'Step Size'),
            ('improvements', 'Improvement per Step (%)', 'Improvement'),
            ('distance_to_minimum_path', 'Distance to Global Minimum', 'Distance'),  # Changed from distance_to_minimum
            ('best_so_far', 'Best Value So Far', 'Value'),
            ('relative_improvements', 'Relative Improvement (%)', 'Improvement')
        ]

        for metric_name, ylabel, title_suffix in metrics:
            plt.figure(figsize=(12, 8))

            colors = plt.cm.tab20(np.linspace(0, 1, len(results)))

            for (method, method_results), color in zip(results.items(), colors):
                try:
                    # Get valid data for this metric
                    valid_data = []
                    for result in method_results:
                        data = getattr(result, metric_name, None)
                        if data is not None and len(data) > 0:
                            valid_data.append(data)

                    if not valid_data:
                        continue

                    # Get max length for this metric
                    max_len = max(len(data) for data in valid_data)

                    # Initialize array
                    values = np.full((len(valid_data), max_len), np.nan)

                    # Fill array
                    for i, data in enumerate(valid_data):
                        values[i, :len(data)] = data

                    # Calculate statistics
                    mean_values = np.nanmean(values, axis=0)
                    std_values = np.nanstd(values, axis=0)

                    # Plot with error bands
                    x = np.arange(max_len)
                    plt.plot(x, mean_values, label=method, color=color)
                    plt.fill_between(x, mean_values - std_values, mean_values + std_values,
                                   alpha=0.2, color=color)

                except Exception as e:
                    print(f"Warning: Could not plot {metric_name} for {method}: {str(e)}")
                    continue

            if plt.gca().get_lines():  # Only add labels if there are plots
                plt.xlabel('Iteration')
                plt.ylabel(ylabel)
                plt.title(f'{function_name} - {title_suffix}')
                plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
                plt.grid(True)

                # Create metric-specific directories
                metric_dir = os.path.join(save_dir, metric_name.replace('_', ''))
                os.makedirs(metric_dir, exist_ok=True)

                # Save with experiment number if provided
                suffix = f'_exp_{experiment_num}' if experiment_num is not None else '_summary'
                filename = f'{metric_name}_{function_name}{suffix}.png'
                plt.savefig(os.path.join(metric_dir, filename), dpi=300, bbox_inches='tight')

            plt.close()


# Metrics and Logging

In [None]:
def compute_gradient_metrics(grad_current: np.ndarray, grad_previous: np.ndarray) -> Tuple[float, float]:
    """Compute cosine similarity and angle between two gradient vectors"""
    if np.all(grad_previous == 0) or np.all(grad_current == 0):
        return 0.0, 0.0

    cosine_sim = np.dot(grad_current, grad_previous) / (np.linalg.norm(grad_current) * np.linalg.norm(grad_previous))
    # Clip to handle numerical errors
    cosine_sim = np.clip(cosine_sim, -1.0, 1.0)
    angle = np.arccos(cosine_sim) * 180 / np.pi

    return cosine_sim, angle

# Helper function to compute summary metrics
def compute_summary_metrics(results: List[OptimizationResult]) -> Dict:
    """Compute summary statistics for new metrics"""
    summary = {}

    # Step size statistics
    step_sizes = [np.mean(r.step_sizes) for r in results if hasattr(r, 'step_sizes')]
    if step_sizes:
        summary.update({
            'mean_step_size': np.mean(step_sizes),
            'std_step_size': np.std(step_sizes)
        })

    # Improvement statistics
    improvements = [np.mean(r.improvements) for r in results if hasattr(r, 'improvements')]
    if improvements:
        summary.update({
            'mean_improvement_per_step': np.mean(improvements),
            'std_improvement_per_step': np.std(improvements)
        })

    # Plateau statistics
    plateau_metrics = [StepMetricsCalculator.compute_plateau_metrics(r.f_path)
                      for r in results]
    if plateau_metrics:
        summary.update({
            'mean_plateau_percentage': np.mean([m['plateau_percentage'] for m in plateau_metrics]),
            'mean_plateau_length': np.mean([m['max_plateau_length'] for m in plateau_metrics]),
            'mean_num_plateaus': np.mean([m['num_plateaus'] for m in plateau_metrics])
        })

    # Improvement threshold statistics
    threshold_metrics = [StepMetricsCalculator.compute_improvement_thresholds(r.f_path)
                        for r in results]
    if threshold_metrics:
        for threshold in [10, 20, 30, 40, 50]:
            steps = [m[f"{threshold}%_improvement_steps"] for m in threshold_metrics
                    if m[f"{threshold}%_improvement_steps"] is not None]
            if steps:
                summary.update({
                    f'mean_steps_to_{threshold}%_improvement': np.mean(steps),
                    f'std_steps_to_{threshold}%_improvement': np.std(steps)
                })

    return summary

In [None]:
# New helper class for computing step metrics
class StepMetricsCalculator:
    """Helper class for computing various step-based metrics"""
    @staticmethod
    def compute_step_size(x_current: np.ndarray, x_previous: np.ndarray) -> float:
        """Compute Euclidean distance between consecutive steps"""
        return np.linalg.norm(x_current - x_previous)

    @staticmethod
    def compute_improvement(f_current: float, f_previous: float) -> float:
        """Compute relative improvement between steps"""
        if f_previous == 0:
            return 0.0
        return (f_previous - f_current) / abs(f_previous) * 100

    @staticmethod
    def compute_plateau_metrics(f_path: List[float], threshold: float = 1.0) -> Dict:
        """Compute plateau-related metrics"""
        improvements = np.array([abs((f_path[i] - f_path[i-1])/f_path[i-1]*100)
                               for i in range(1, len(f_path))])
        plateau_mask = improvements < threshold

        # Find plateau sequences
        plateau_sequences = []
        current_sequence = []
        for i, is_plateau in enumerate(plateau_mask):
            if is_plateau:
                current_sequence.append(i)
            elif current_sequence:
                plateau_sequences.append(current_sequence)
                current_sequence = []
        if current_sequence:
            plateau_sequences.append(current_sequence)

        return {
            'total_plateau_steps': np.sum(plateau_mask),
            'plateau_percentage': np.mean(plateau_mask) * 100 if len(plateau_mask) > 0 else 0,
            'max_plateau_length': max([len(seq) for seq in plateau_sequences]) if plateau_sequences else 0,
            'num_plateaus': len(plateau_sequences),
            'plateau_sequences': plateau_sequences
        }

    @staticmethod
    def compute_improvement_thresholds(f_path: List[float],
                                     thresholds: List[float] = [10, 20, 30, 40, 50]) -> Dict:
        """Compute steps required for various improvement thresholds"""
        if not f_path:
            return {f"{t}%_improvement_steps": None for t in thresholds}

        initial_value = f_path[0]
        results = {}

        for threshold in thresholds:
            target_value = initial_value * (1 - threshold/100)
            steps = next((i for i, v in enumerate(f_path) if v <= target_value), None)
            results[f"{threshold}%_improvement_steps"] = steps

        return results

    @staticmethod
    def compute_best_so_far(f_path: List[float]) -> Tuple[List[float], List[float]]:
        """Compute best value so far and relative improvement from initial"""
        if not f_path:
            return [], []

        best_so_far = []
        relative_improvement = []
        current_best = float('inf')
        initial_value = f_path[0]

        for value in f_path:
            current_best = min(current_best, value)
            best_so_far.append(current_best)
            rel_imp = (initial_value - current_best) / abs(initial_value) * 100
            relative_improvement.append(rel_imp)

        return best_so_far, relative_improvement


# Enhance StepLogger class
class StepLogger:
    """Enhanced step logger with additional metrics"""
    def __init__(self):
        self.reset()

    def reset(self):
        """Reset all logging arrays"""
        # Existing attributes
        self.path = []
        self.f_path = []
        self.grad_norm_path = []
        self.grad_cosine_sim_path = []
        self.grad_angle_path = []
        self.timestamps = []
        self.start_time = time.time()
        self.previous_grad = None

        # New attributes
        self.step_sizes = []
        self.improvements = []
        self.best_so_far = []
        self.relative_improvements = []
        self.distance_to_minimum_path = []  # Initialize this instead of distance_to_minimum

    def log_iteration(self, x: np.ndarray, f: float, grad: np.ndarray,
                    global_minimum: Optional[np.ndarray] = None):
        """Enhanced logging with better None handling"""
        # Existing logging
        grad_norm = np.linalg.norm(grad)
        if self.previous_grad is not None:
            cosine_sim, angle = compute_gradient_metrics(grad, self.previous_grad)
            self.grad_cosine_sim_path.append(cosine_sim)
            self.grad_angle_path.append(angle)
        else:
            self.grad_cosine_sim_path.append(0.0)
            self.grad_angle_path.append(0.0)

        # New metrics
        if len(self.path) > 0:
            step_size = StepMetricsCalculator.compute_step_size(x, self.path[-1])
            improvement = StepMetricsCalculator.compute_improvement(f, self.f_path[-1])
            self.step_sizes.append(step_size)
            self.improvements.append(improvement)
        else:
            self.step_sizes.append(0.0)
            self.improvements.append(0.0)

        # Update best so far and relative improvement
        if not self.f_path:
            self.best_so_far.append(f)
            self.relative_improvements.append(0.0)
        else:
            self.best_so_far.append(min(f, self.best_so_far[-1]))
            rel_imp = (self.f_path[0] - self.best_so_far[-1]) / abs(self.f_path[0]) * 100
            self.relative_improvements.append(rel_imp)

        # Distance to global minimum if provided
        if global_minimum is not None and x is not None:
            try:
                dist = np.linalg.norm(x - global_minimum)
                self.distance_to_minimum_path.append(dist)
            except:
                self.distance_to_minimum_path.append(None)
        else:
            self.distance_to_minimum_path.append(None)

        # Standard logging
        self.path.append(x.copy())
        self.f_path.append(f)
        self.grad_norm_path.append(grad_norm)
        self.timestamps.append(time.time() - self.start_time)
        self.previous_grad = grad.copy()

In [None]:
class OptimizationLogger:
    """Handles logging of complete optimization experiments"""
    def __init__(self, base_dir: str):
        self.base_dir = base_dir
        self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

    def log_run(self, result: OptimizationResult, experiment_num: int):
        """Enhanced log detailed results for a single optimization run"""
        log_dir = os.path.join(
            self.base_dir,
            result.function_name,
            f"{result.dimension}D",
            'first_order' if result.method in ['gradient_descent', 'sgd', 'sgd_momentum', 'momentum', 'rmsprop', 'adam'] else 'second_order',
            result.method,
            "results"
        )
        os.makedirs(log_dir, exist_ok=True)

        # Get the minimum length of all arrays
        min_length = min(
            len(result.timestamps),
            len(result.f_path),
            len(result.grad_norm_path),
            len(result.grad_cosine_sim_path),
            len(result.grad_angle_path),
            len(result.step_sizes),
            len(result.improvements),
            len(result.best_so_far),
            len(result.relative_improvements),
            len(result.distance_to_minimum_path) if result.distance_to_minimum_path is not None else float('inf')
        )

        # Create detailed step-by-step log with truncated arrays
        run_data = {
            'iteration': range(min_length),
            'timestamp': result.timestamps[:min_length],
            'function_value': result.f_path[:min_length],
            'gradient_norm': result.grad_norm_path[:min_length],
            'gradient_cosine_similarity': result.grad_cosine_sim_path[:min_length],
            'gradient_angle': result.grad_angle_path[:min_length],
            'step_size': result.step_sizes[:min_length],
            'improvement': result.improvements[:min_length],
            'best_so_far': result.best_so_far[:min_length],
            'relative_improvement': result.relative_improvements[:min_length],
            'distance_to_minimum': result.distance_to_minimum_path[:min_length] if result.distance_to_minimum_path is not None else None,
        }

        # Add parameter values for each dimension
        for i in range(result.dimension):
            run_data[f'x{i+1}'] = [p[i] for p in result.path[:min_length]]

        # Save detailed step-by-step log
        step_df = pd.DataFrame(run_data)
        step_df.to_csv(
            os.path.join(log_dir, f'run_{experiment_num}_steps_{self.timestamp}.csv'),
            index=False
        )

        # Calculate window size for segments
        window_size = max(len(result.f_path) // 10, 1)

        # Calculate segment means with None value handling
        segment_means = {
            'gradient_norm': [np.mean(result.grad_norm_path[i:i+window_size])
                            for i in range(0, len(result.grad_norm_path), window_size)],
            'cosine_similarity': [np.mean(result.grad_cosine_sim_path[i:i+window_size])
                                for i in range(1, len(result.grad_cosine_sim_path), window_size)],
            'gradient_angle': [np.mean(result.grad_angle_path[i:i+window_size])
                              for i in range(1, len(result.grad_angle_path), window_size)],
            'best_so_far': [np.mean(result.best_so_far[i:i+window_size])
                          for i in range(0, len(result.best_so_far), window_size)],
            'distance_to_minimum': ([np.mean([x for x in result.distance_to_minimum_path[i:i+window_size] if x is not None])
                                  for i in range(0, len(result.distance_to_minimum_path), window_size)]
                                  if result.distance_to_minimum_path and any(x is not None for x in result.distance_to_minimum_path)
                                  else None),
            'improvements': [np.mean(result.improvements[i:i+window_size])
                            for i in range(0, len(result.improvements), window_size)],
            'relative_improvements': [np.mean(result.relative_improvements[i:i+window_size])
                                    for i in range(0, len(result.relative_improvements), window_size)],
            'step_sizes': [np.mean(result.step_sizes[i:i+window_size])
                          for i in range(0, len(result.step_sizes), window_size)]
        }

        run_summary = {
            'experiment_num': experiment_num,
            'initial_value': result.f_initial,
            'final_value': result.f_final,
            'iterations': result.iterations,
            'runtime': result.runtime,
            'success': result.success,
            'initial_distance_to_minimum': result.initial_distance_to_minimum,
            'final_distance_to_minimum': result.final_distance_to_minimum,
            'f_error': result.f_error,
            'initial_gradient_norm': np.linalg.norm(result.grad_initial),
            'final_gradient_norm': np.linalg.norm(result.grad_final),

            # Calculate statistics across segments
            'mean_gradient_norm': np.mean(segment_means['gradient_norm']) if segment_means['gradient_norm'] else None,
            'std_gradient_norm': np.std(segment_means['gradient_norm']) if segment_means['gradient_norm'] else None,

            'mean_cosine_similarity': np.mean(segment_means['cosine_similarity']) if segment_means['cosine_similarity'] else None,
            'std_cosine_similarity': np.std(segment_means['cosine_similarity']) if segment_means['cosine_similarity'] else None,

            'mean_gradient_angle': np.mean(segment_means['gradient_angle']) if segment_means['gradient_angle'] else None,
            'std_gradient_angle': np.std(segment_means['gradient_angle']) if segment_means['gradient_angle'] else None,

            'mean_best_so_far': np.mean(segment_means['best_so_far']) if segment_means['best_so_far'] else None,
            'std_best_so_far': np.std(segment_means['best_so_far']) if segment_means['best_so_far'] else None,

            'mean_distance_to_minimum': np.mean(segment_means['distance_to_minimum']) if segment_means['distance_to_minimum'] else None,
            'std_distance_to_minimum': np.std(segment_means['distance_to_minimum']) if segment_means['distance_to_minimum'] else None,

            'mean_improvement': np.mean(segment_means['improvements']) if segment_means['improvements'] else None,
            'std_improvement': np.std(segment_means['improvements']) if segment_means['improvements'] else None,

            'mean_relative_improvement': np.mean(segment_means['relative_improvements']) if segment_means['relative_improvements'] else None,
            'std_relative_improvement': np.std(segment_means['relative_improvements']) if segment_means['relative_improvements'] else None,

            'mean_step_size': np.mean(segment_means['step_sizes']) if segment_means['step_sizes'] else None,
            'std_step_size': np.std(segment_means['step_sizes']) if segment_means['step_sizes'] else None
        }

        # Save run summary
        summary_df = pd.DataFrame([run_summary])
        summary_path = os.path.join(log_dir, f'run_summaries_{self.timestamp}.csv')

        if os.path.exists(summary_path):
            summary_df.to_csv(summary_path, mode='a', header=False, index=False)
        else:
            summary_df.to_csv(summary_path, index=False)

    def create_dimension_summary(self, function_name: str, dimension: int, results: List[OptimizationResult]):
        """Create summary statistics across all experiments for all methods at a given dimension"""
        # Group results by method first
        method_groups = {}
        for result in results:
            if result.method not in method_groups:
                method_groups[result.method] = []
            method_groups[result.method].append(result)

        summary_data = []

        for method, method_results in method_groups.items():
            # Calculate means for each run first
            run_means = {
                'best_so_far': [np.mean(r.best_so_far) for r in method_results if r.best_so_far],
                'distance_to_minimum_path': [np.mean(r.distance_to_minimum_path) for r in method_results if r.distance_to_minimum_path is not None],
                'improvements': [np.mean(r.improvements) for r in method_results if r.improvements],
                'relative_improvements': [np.mean(r.relative_improvements) for r in method_results if r.relative_improvements],
                'step_sizes': [np.mean(r.step_sizes) for r in method_results if r.step_sizes],
                'gradient_norms': [np.mean(r.grad_norm_path) for r in method_results if r.grad_norm_path],
                'cosine_similarities': [np.mean(r.grad_cosine_sim_path[1:]) for r in method_results if len(r.grad_cosine_sim_path) > 1],
                'gradient_angles': [np.mean(r.grad_angle_path[1:]) for r in method_results if len(r.grad_angle_path) > 1]
            }

            # Calculate means and stds across all experiments for this method
            summary_data.append({
                'function': function_name,
                'dimension': dimension,
                'method': method,
                'initial_value_mean': np.mean([r.f_initial for r in method_results]),
                'initial_value_std': np.std([r.f_initial for r in method_results]),
                'final_value_mean': np.mean([r.f_final for r in method_results]),
                'final_value_std': np.std([r.f_final for r in method_results]),
                'iterations_mean': np.mean([r.iterations for r in method_results]),
                'iterations_std': np.std([r.iterations for r in method_results]),
                'runtime_mean': np.mean([r.runtime for r in method_results]),
                'runtime_std': np.std([r.runtime for r in method_results]),
                'success_rate': np.mean([1 if r.success else 0 for r in method_results]),

                # Calculate statistics across runs' means
                'best_so_far_mean': np.mean(run_means['best_so_far']) if run_means['best_so_far'] else None,
                'best_so_far_std': np.std(run_means['best_so_far']) if run_means['best_so_far'] else None,

                'initial_distance_to_minimum_mean': np.mean([r.initial_distance_to_minimum for r in method_results]),  # Add this
                'initial_distance_to_minimum_std': np.std([r.initial_distance_to_minimum for r in method_results]),    # Add this
                'final_distance_to_minimum_mean': np.mean([r.final_distance_to_minimum for r in method_results]),      # Add this
                'final_distance_to_minimum_std': np.std([r.final_distance_to_minimum for r in method_results]),        # Add this
                'mean_distance_to_minimum': np.mean(run_means['distance_to_minimum_path']) if run_means['distance_to_minimum_path'] else None,  # Updated key
                'std_distance_to_minimum': np.std(run_means['distance_to_minimum_path']) if run_means['distance_to_minimum_path'] else None,    # Updated key

                'improvements_mean': np.mean(run_means['improvements']) if run_means['improvements'] else None,
                'improvements_std': np.std(run_means['improvements']) if run_means['improvements'] else None,

                'relative_improvements_mean': np.mean(run_means['relative_improvements']) if run_means['relative_improvements'] else None,
                'relative_improvements_std': np.std(run_means['relative_improvements']) if run_means['relative_improvements'] else None,

                'step_sizes_mean': np.mean(run_means['step_sizes']) if run_means['step_sizes'] else None,
                'step_sizes_std': np.std(run_means['step_sizes']) if run_means['step_sizes'] else None,

                'gradient_norm_mean': np.mean(run_means['gradient_norms']) if run_means['gradient_norms'] else None,
                'gradient_norm_std': np.std(run_means['gradient_norms']) if run_means['gradient_norms'] else None,

                'cosine_similarity_mean': np.mean(run_means['cosine_similarities']) if run_means['cosine_similarities'] else None,
                'cosine_similarity_std': np.std(run_means['cosine_similarities']) if run_means['cosine_similarities'] else None,

                'gradient_angle_mean': np.mean(run_means['gradient_angles']) if run_means['gradient_angles'] else None,
                'gradient_angle_std': np.std(run_means['gradient_angles']) if run_means['gradient_angles'] else None
            })

        # Save dimension summary with means and stds
        summary_df = pd.DataFrame(summary_data)
        summary_dir = os.path.join(self.base_dir, function_name, f"{dimension}D")
        os.makedirs(summary_dir, exist_ok=True)
        summary_df.to_csv(
            os.path.join(summary_dir, f'dimension_summary_{self.timestamp}.csv'),
            index=False
        )

    def create_method_summary(self, results: List[OptimizationResult]):
        """Create summary statistics for each optimization method across all dimensions"""
        for method in set(r.method for r in results):
            method_results = [r for r in results if r.method == method]

            summary_data = []
            for result in method_results:
                summary_data.append({
                    'function': result.function_name,
                    'dimension': result.dimension,
                    'final_value': result.f_final,
                    'iterations': result.iterations,
                    'runtime': result.runtime,
                    'success': result.success,
                    'initial_distance_to_minimum': result.initial_distance_to_minimum,  # Add this
                    'final_distance_to_minimum': result.final_distance_to_minimum,      # Changed from distance_to_minimum
                    'mean_distance_to_minimum': np.mean(result.distance_to_minimum_path) if result.distance_to_minimum_path is not None else None,  # Add this
                    'f_error': result.f_error,
                    'mean_cosine_similarity': np.mean(result.grad_cosine_sim_path[1:]),
                    'mean_gradient_angle': np.mean(result.grad_angle_path[1:]),
                    'convergence_rate': (result.f_path[-1] - result.f_path[0]) / len(result.f_path) if result.f_path else None
                })

            # Save method summary
            summary_df = pd.DataFrame(summary_data)
            method_dir = os.path.join(self.base_dir, 'method_summaries')
            os.makedirs(method_dir, exist_ok=True)
            summary_df.to_csv(
                os.path.join(method_dir, f'{method}_summary_{self.timestamp}.csv'),
                index=False
            )

# Experiment Executions

In [None]:
def run_optimization(f: Callable, grad: Callable, hess: Callable, x0: np.ndarray,
                    method: str, function_name: str) -> OptimizationResult:
    """Enhanced optimization runner with detailed metrics"""
    start_time = time.time()
    step_logger = StepLogger()

    # Calculate initial metrics
    f_initial = f(x0)
    grad_initial = grad(x0)

    # Get global minimum for this function
    x_min, _ = TestFunctions.get_global_minimum(function_name, len(x0))

    # Log initial point
    step_logger.log_iteration(x0, f_initial, grad_initial, global_minimum=x_min)

    def callback(xk):
        f_val = f(xk)
        grad_val = grad(xk)
        step_logger.log_iteration(xk, f_val, grad_val, global_minimum=x_min)

    try:
        # Run optimization with method-specific settings
        if method == 'BFGS':
            result = minimize(f, x0, method=method, jac=grad, callback=callback)
        elif method == 'newton-cg':
            result = minimize(f, x0, method=method, jac=grad, hess=hess, callback=callback)
        elif method in ['trust-exact', 'trust-krylov']:
            result = minimize(f, x0, method=method, jac=grad, hess=hess, callback=callback)
        else:
            raise ValueError(f"Unsupported method: {method}")

        # Calculate final gradient
        grad_final = grad(result.x)
        runtime = time.time() - start_time

        return OptimizationResult(
            x_final=result.x,
            f_final=result.fun,
            success=result.success,
            iterations=result.nit,
            runtime=runtime,
            path=step_logger.path,
            f_path=step_logger.f_path,
            grad_norm_path=step_logger.grad_norm_path,
            grad_cosine_sim_path=step_logger.grad_cosine_sim_path,
            grad_angle_path=step_logger.grad_angle_path,
            timestamps=step_logger.timestamps,
            method=method,
            dimension=len(x0),
            function_name=function_name,
            x_initial=x0,
            f_initial=f_initial,
            grad_initial=grad_initial,
            grad_final=grad_final,
            step_sizes=step_logger.step_sizes,
            improvements=step_logger.improvements,
            best_so_far=step_logger.best_so_far,
            relative_improvements=step_logger.relative_improvements,
            distance_to_minimum_path=step_logger.distance_to_minimum_path
        )

    except Exception as e:
        print(f"Optimization failed: {e}")
        return None

In [None]:
class ExperimentManager:
    """Manages multiple optimization experiments"""
    def __init__(self,
                 test_functions: Dict[str, Tuple[Callable, Callable, Callable]],
                 first_order_optimizers: Dict[str, object],
                 second_order_methods: List[str],
                 dimensions: List[int],
                 n_experiments: int = 50,
                 min_dist: float = 4,  # Default min distance
                 max_dist: float = 8,  # Default max distance
                 distance_constraints: Dict[str, Dict[str, float]] = None):  # New parameter
        self.test_functions = test_functions
        self.first_order_optimizers = first_order_optimizers
        self.second_order_methods = second_order_methods
        self.dimensions = dimensions
        self.n_experiments = n_experiments
        self.min_dist = min_dist
        self.max_dist = max_dist
        self.distance_constraints = distance_constraints or {}

    def generate_starting_points(self, dimension: int, function_name: str, seed: int = None) -> np.ndarray:
        """Generate random starting points with function-specific domain and distance constraints"""
        if seed is not None:
            np.random.seed(seed)

        # Define domain constraints for each function
        domain_constraints = {
            'michalewicz': (0, np.pi),  # Domain [0, π] for each dimension
            'ackley': (-32.768, 32.768),
            'rastrigin': (-5.12, 5.12),
            'schwefel': (-500, 500),
            'sphere': (-5.12, 5.12),
            'sum_squares': (-10, 10),
            'rosenbrock': (-2.048, 2.048)
        }

        # Use function-specific distance constraints if provided, otherwise use defaults
        min_dist = self.distance_constraints.get(function_name, {}).get('min', self.min_dist)
        max_dist = self.distance_constraints.get(function_name, {}).get('max', self.max_dist)

        starting_points = []

        # Get domain constraints for the specific function
        if function_name in domain_constraints:
            lower_bound, upper_bound = domain_constraints[function_name]

            for _ in range(self.n_experiments):
                if function_name == 'michalewicz':
                    # For Michalewicz, generate points uniformly within [0, π]
                    point = np.random.uniform(lower_bound, upper_bound, dimension)
                else:
                    # For other functions, use the distance-based approach
                    # but ensure it stays within domain by scaling
                    direction = np.random.randn(dimension)
                    direction = direction / np.linalg.norm(direction)
                    distance = np.random.uniform(min_dist, max_dist)
                    point = direction * distance

                    # Scale point to fit within domain if necessary
                    max_abs_coord = np.max(np.abs(point))
                    if max_abs_coord > abs(lower_bound):  # assuming symmetric bounds
                        scale_factor = abs(lower_bound) / max_abs_coord
                        point = point * scale_factor

                starting_points.append(point)
        else:
            # Default behavior for unknown functions
            for _ in range(self.n_experiments):
                direction = np.random.randn(dimension)
                direction = direction / np.linalg.norm(direction)
                distance = np.random.uniform(min_dist, max_dist)
                point = direction * distance
                starting_points.append(point)

        return np.array(starting_points)

    def run_experiments(self, base_dir: str = "optimization_results"):
        """Run all experiments with proper directory structure"""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        experiment_dir = os.path.join(base_dir, f"experiment_{timestamp}")
        experiment_logger = OptimizationLogger(experiment_dir)

        # For storing all results (needed for final summary)
        final_results = []

        for func_name, (f, grad, hess) in self.test_functions.items():
            print(f"\nTesting {func_name} function:")

            # Store function-level results
            function_results = []

            for dim in self.dimensions:
                print(f"\nDimension: {dim}")
                # Store dimension-level results
                dimension_results = []

                # Generate starting points for this dimension
                starting_points = self.generate_starting_points(dim, func_name)

                # Create directory structure
                func_dir = os.path.join(experiment_dir, func_name, f"{dim}D")
                first_order_dir = os.path.join(func_dir, "first_order")
                second_order_dir = os.path.join(func_dir, "second_order")
                results_dir = os.path.join(func_dir, "results")

                # Create all necessary directories
                os.makedirs(results_dir, exist_ok=True)
                os.makedirs(os.path.join(results_dir, "gradient_metrics"), exist_ok=True)

                for method_name in self.first_order_optimizers.keys():
                    method_dir = os.path.join(first_order_dir, method_name)
                    os.makedirs(os.path.join(method_dir, "results"), exist_ok=True)
                    os.makedirs(os.path.join(method_dir, "trajectories"), exist_ok=True)

                for method_name in self.second_order_methods:
                    method_dir = os.path.join(second_order_dir, method_name)
                    os.makedirs(os.path.join(method_dir, "results"), exist_ok=True)
                    os.makedirs(os.path.join(method_dir, "trajectories"), exist_ok=True)

                # Run experiments for each starting point
                for i, x0 in enumerate(starting_points):
                    print(f"\nExperiment {i+1}/{self.n_experiments}")

                    # Store results for this experiment
                    first_order_results = {}
                    second_order_results = {}

                    # First order methods
                    print("\nFirst-order methods:")
                    print("-" * 50)
                    for name, optimizer in self.first_order_optimizers.items():
                        print(f"Running {name}...")
                        try:
                            step_logger = StepLogger()
                            start_time = time.time()

                            # Initialize with first gradient
                            g_initial = grad(x0)
                            x_min, _ = TestFunctions.get_global_minimum(func_name, dim)
                            step_logger.log_iteration(x0, f(x0), g_initial, global_minimum=x_min)

                            def callback(x):
                                g = grad(x)
                                step_logger.log_iteration(x, f(x), g, global_minimum=x_min)

                            result = optimizer.optimize(f, grad, x0, func_name, callback=callback)
                            runtime = time.time() - start_time

                            opt_result = OptimizationResult(
                                x_final=result['x'],
                                f_final=result['fun'],
                                success=result['success'],
                                iterations=result['nit'],
                                runtime=runtime,
                                path=step_logger.path,
                                f_path=step_logger.f_path,
                                grad_norm_path=step_logger.grad_norm_path,
                                grad_cosine_sim_path=step_logger.grad_cosine_sim_path,
                                grad_angle_path=step_logger.grad_angle_path,
                                timestamps=step_logger.timestamps,
                                method=name,
                                dimension=dim,
                                function_name=func_name,
                                x_initial=x0,
                                f_initial=f(x0),
                                grad_initial=g_initial,
                                grad_final=grad(result['x']),
                                step_sizes=step_logger.step_sizes,
                                improvements=step_logger.improvements,
                                best_so_far=step_logger.best_so_far,
                                relative_improvements=step_logger.relative_improvements,
                                distance_to_minimum_path=step_logger.distance_to_minimum_path  # This is correct
                            )

                            first_order_results[name] = opt_result
                            dimension_results.append(opt_result)
                            function_results.append(opt_result)
                            final_results.append(opt_result)
                            experiment_logger.log_run(opt_result, i)

                            print(f"  Runtime: {runtime:.3f} seconds")
                            print(f"  Iterations: {result['nit']}")
                            print(f"  Final value: {result['fun']:.6f}")
                            print(f"  Success: {result['success']}")
                            print("Done")
                            print("-" * 20)
                            print()

                        except Exception as e:
                            print(f"Failed: {str(e)}")

                    # Generate plots for first-order methods
                    if first_order_results:
                        for name, result in first_order_results.items():
                            method_dir = os.path.join(first_order_dir, name)
                            self._generate_plots({name: result}, method_dir, f, i, dim)

                    # Second order methods
                    print("\nSecond-order methods:")
                    print("-" * 50)
                    for method in self.second_order_methods:
                        print(f"Running {method}...")
                        result = run_optimization(f, grad, hess, x0, method, func_name)
                        if result:
                            second_order_results[method] = result
                            dimension_results.append(result)
                            function_results.append(result)
                            final_results.append(result)
                            experiment_logger.log_run(result, i)
                            print(f"  Runtime: {result.runtime:.3f} seconds")
                            print(f"  Iterations: {result.iterations}")
                            print(f"  Final value: {result.f_final:.6f}")
                            print("Done")
                        else:
                            print("Failed")
                        print("-" * 20)
                        print()

                    # Generate plots for second-order methods
                    if second_order_results:
                        for name, result in second_order_results.items():
                            method_dir = os.path.join(second_order_dir, name)
                            self._generate_plots({name: result}, method_dir, f, i, dim)

                    # Generate combined plots for this experiment
                    all_results_this_batch = {**first_order_results, **second_order_results}
                    if all_results_this_batch:
                        try:
                            # Generate convergence plot with experiment number
                            convergence_filename = f'convergence_{func_name}_exp_{i}'
                            Visualizer.plot_convergence(
                                all_results_this_batch,
                                os.path.join(results_dir),
                                convergence_filename
                            )

                            # Generate all metrics plots
                            Visualizer.plot_all_metrics(
                                all_results_this_batch,
                                results_dir,
                                f'{func_name}_exp_{i}'
                            )
                        except Exception as e:
                            print(f"Warning: Could not create plots for experiment {i}: {str(e)}")

                    # Clear batch results to free memory
                    first_order_results.clear()
                    second_order_results.clear()

                # After all experiments for this dimension
                # Create dimension summary plots
                dimension_results_combined = {}
                for result in dimension_results:
                    method = result.method
                    if method not in dimension_results_combined:
                        dimension_results_combined[method] = []
                    dimension_results_combined[method].append(result)

                # Create summary plots by averaging results for each method
                if dimension_results_combined:
                    summary_results = {}
                    for method, results in dimension_results_combined.items():
                        try:
                            # Get the first result to access method-specific attributes
                            first_result = results[0]

                            # Calculate average paths and values
                            max_path_length = max(len(r.path) for r in results)
                            avg_path = []
                            for i in range(max_path_length):
                                valid_points = [r.path[i] for r in results if i < len(r.path)]
                                if valid_points:
                                    avg_path.append(np.mean(valid_points, axis=0))

                            # Create averaged result
                            avg_result = OptimizationResult(
                                x_final=np.mean([r.x_final for r in results], axis=0),
                                f_final=np.mean([r.f_final for r in results]),
                                success=np.mean([r.success for r in results]),
                                iterations=int(np.mean([r.iterations for r in results])),
                                runtime=np.mean([r.runtime for r in results]),
                                path=avg_path,
                                f_path=[np.mean([r.f_path[i] for r in results if i < len(r.f_path)])
                                      for i in range(max(len(r.f_path) for r in results))],
                                grad_norm_path=[np.mean([r.grad_norm_path[i] for r in results if i < len(r.grad_norm_path)])
                                              for i in range(max(len(r.grad_norm_path) for r in results))],
                                grad_cosine_sim_path=[np.mean([r.grad_cosine_sim_path[i] for r in results if i < len(r.grad_cosine_sim_path)])
                                                    for i in range(max(len(r.grad_cosine_sim_path) for r in results))],
                                grad_angle_path=[np.mean([r.grad_angle_path[i] for r in results if i < len(r.grad_angle_path)])
                                              for i in range(max(len(r.grad_angle_path) for r in results))],
                                step_sizes=[np.mean([r.step_sizes[i] for r in results if i < len(r.step_sizes)])
                                          for i in range(max(len(r.step_sizes) for r in results))],
                                improvements=[np.mean([r.improvements[i] for r in results if i < len(r.improvements)])
                                            for i in range(max(len(r.improvements) for r in results))],
                                best_so_far=[np.mean([r.best_so_far[i] for r in results if i < len(r.best_so_far)])
                                            for i in range(max(len(r.best_so_far) for r in results))],
                                relative_improvements=[np.mean([r.relative_improvements[i] for r in results if i < len(r.relative_improvements)])
                                                    for i in range(max(len(r.relative_improvements) for r in results))],
                                distance_to_minimum_path=[np.mean([r.distance_to_minimum_path[i] for r in results if i < len(r.distance_to_minimum_path)])
                                                      for i in range(max(len(r.distance_to_minimum_path) for r in results))],
                                method=method,
                                dimension=first_result.dimension,
                                function_name=first_result.function_name,
                                x_initial=first_result.x_initial,  # Use the first result's initial point
                                f_initial=np.mean([r.f_initial for r in results]),
                                grad_initial=np.mean([r.grad_initial for r in results], axis=0),
                                grad_final=np.mean([r.grad_final for r in results], axis=0)
                            )
                            summary_results[method] = avg_result
                        except Exception as e:
                            print(f"Warning: Could not create average result for method {method}: {str(e)}")
                            continue

                    # Create summary plots using averaged results
                    # Before generating plots, group results by method
                    dimension_results_by_method = {}
                    for result in dimension_results:
                        if result.method not in dimension_results_by_method:
                            dimension_results_by_method[result.method] = []
                        dimension_results_by_method[result.method].append(result)

                    # Update the trajectory plotting section in run_experiments:
                    if dimension_results_by_method:
                        try:
                            # Generate all metric plots using the new matrix layout
                            Visualizer.plot_all_metrics(dimension_results_by_method, results_dir, func_name)
                        except Exception as e:
                            print(f"Warning: Could not create summary plots: {str(e)}")
                        finally:
                            plt.close('all')  # Final cleanup

                    # Create dimension summary
                    experiment_logger.create_dimension_summary(func_name, dim, dimension_results)

                    # Save dimension results
                    results_df = pd.DataFrame([self._format_result(r, "dimension", i)
                                            for i, r in enumerate(dimension_results)])
                    results_df.to_csv(
                        os.path.join(results_dir, f'dimension_results_{timestamp}.csv'),
                        index=False
                    )

                    # Clear dimension results after saving
                    dimension_results.clear()
                    dimension_results_by_method.clear()

            # After all dimensions for this function, save function results
            results_df = pd.DataFrame([self._format_result(r, "function", i)
                                    for i, r in enumerate(function_results)])
            results_df.to_csv(
                os.path.join(experiment_dir, func_name, f'function_results_{timestamp}.csv'),
                index=False
            )

            # Clear function results after saving
            function_results.clear()

        # Save final results and generate statistics
        results_df = pd.DataFrame([self._format_result(r, "final", i)
                                for i, r in enumerate(final_results)])
        results_df.to_csv(os.path.join(experiment_dir, "all_results.csv"), index=False)
        generate_statistics(results_df, experiment_dir)

        # Clear final results
        final_results.clear()

    def _generate_plots(self, results: Dict[str, OptimizationResult],
                      base_dir: str, f: Callable, exp_num: int, dim: int):
        """Generate trajectory and convergence plots"""
        if not results:
            return

        results_dir = os.path.join(base_dir, "results")
        trajectory_dir = os.path.join(base_dir, "trajectories")

        os.makedirs(results_dir, exist_ok=True)
        os.makedirs(trajectory_dir, exist_ok=True)

        # Get function name from first result
        func_name = next(iter(results.values())).function_name

        # Convert results to the expected format (Dict[str, List[OptimizationResult]])
        results_list = {}
        for method, result in results.items():
            if result is not None:
                # Create a new list with the single result
                results_list[method] = [result]

        # Generate convergence plots if we have results to plot
        if results_list:
            try:
                Visualizer.plot_convergence(results_list, results_dir, func_name)
            except Exception as e:
                print(f"Warning: Could not create convergence plot: {str(e)}")

            try:
                Visualizer.plot_step_metrics(results_list, results_dir, func_name, exp_num)
            except Exception as e:
                print(f"Warning: Could not create step metrics plot: {str(e)}")

        # Generate 2D trajectories if applicable
        if dim == 2:
            for method, result in results.items():
                if result is not None:
                    try:
                        Visualizer.plot_2d_trajectory(f, result, trajectory_dir, exp_num)
                    except Exception as e:
                        print(f"Warning: Could not create trajectory plot for {method}: {str(e)}")

    def _format_result(self, result: OptimizationResult, opt_type: str, exp_num: int) -> dict:
        """Format OptimizationResult for DataFrame with error handling"""
        try:
            return {
                'function': result.function_name,
                'dimension': result.dimension,
                'experiment': exp_num,
                'method': result.method,
                'optimizer_type': opt_type,
                'start_distance': np.linalg.norm(result.x_initial),
                'final_value': result.f_final,
                'iterations': result.iterations,
                'runtime': result.runtime,
                'success': False if any(np.isinf(x) for x in result.f_path) else result.success,
                'distance_to_minimum': result.final_distance_to_minimum,  # Changed from distance_to_minimum
                'initial_distance_to_minimum': result.initial_distance_to_minimum,  # Add this
                'f_error': result.f_error,
                'initial_gradient_norm': np.linalg.norm(result.grad_initial),
                'final_gradient_norm': np.linalg.norm(result.grad_final),
                'computation_error': False,
                # Add new metrics
                'mean_best_so_far': np.mean(result.best_so_far) if result.best_so_far else None,
                'std_best_so_far': np.std(result.best_so_far) if result.best_so_far else None,
                'mean_distance_to_minimum': np.mean(result.distance_to_minimum_path) if result.distance_to_minimum_path is not None else None,
                'std_distance_to_minimum': np.std(result.distance_to_minimum_path) if result.distance_to_minimum_path is not None else None,
                'mean_improvement': np.mean(result.improvements) if result.improvements else None,
                'std_improvement': np.std(result.improvements) if result.improvements else None,
                'mean_relative_improvement': np.mean(result.relative_improvements) if result.relative_improvements else None,
                'std_relative_improvement': np.std(result.relative_improvements) if result.relative_improvements else None,
                'mean_step_size': np.mean(result.step_sizes) if result.step_sizes else None,
                'std_step_size': np.std(result.step_sizes) if result.step_sizes else None,
                'mean_gradient_norm': np.mean(result.grad_norm_path) if result.grad_norm_path else None,
                'std_gradient_norm': np.std(result.grad_norm_path) if result.grad_norm_path else None,
                'mean_cosine_similarity': np.mean(result.grad_cosine_sim_path[1:]) if len(result.grad_cosine_sim_path) > 1 else None,
                'std_cosine_similarity': np.std(result.grad_cosine_sim_path[1:]) if len(result.grad_cosine_sim_path) > 1 else None,
                'mean_gradient_angle': np.mean(result.grad_angle_path[1:]) if len(result.grad_angle_path) > 1 else None,
                'std_gradient_angle': np.std(result.grad_angle_path[1:]) if len(result.grad_angle_path) > 1 else None
            }
        except (OverflowError, ValueError, RuntimeError) as e:
            # Return a failed result entry
            return {
                'function': result.function_name,
                'dimension': result.dimension,
                'experiment': exp_num,
                'method': result.method,
                'optimizer_type': opt_type,
                'success': False,
                'computation_error': True,
                'error_message': str(e)
            }

def generate_statistics(results_df: pd.DataFrame, save_dir: str):
    """Generate comprehensive statistical visualizations for optimization results"""
    if len(results_df) == 0:
        return

    print("Available columns in DataFrame:", results_df.columns.tolist())

    # Define metrics with correct column names
    metrics = {
        'final_value': ['mean', 'std', 'min', 'max'],
        'runtime': ['mean', 'std'],
        'iterations': ['mean', 'std'],
        'success': 'mean',
        'distance_to_minimum': ['mean', 'std'],           # Per-timestep distance
        'final_distance_to_minimum': ['mean', 'std'],     # Final distance at end
        'initial_distance_to_minimum': ['mean', 'std'],   # Initial distance
        'mean_step_size': ['mean', 'std'],
        'mean_improvement': ['mean', 'std'],
        'mean_relative_improvement': ['mean', 'std'],
        'mean_gradient_norm': ['mean', 'std'],
        'mean_cosine_similarity': ['mean', 'std'],
        'mean_gradient_angle': ['mean', 'std']
    }

    # Only include metrics that exist in the DataFrame
    available_metrics = {
        col: metrics[col]
        for col in metrics.keys()
        if col in results_df.columns
    }

    # Group by method first, then calculate statistics across runs
    try:
        summary_stats = results_df.groupby(['method', 'function', 'dimension']).agg(available_metrics)
        summary_stats = summary_stats.round(4)

        # Save summary statistics
        os.makedirs(save_dir, exist_ok=True)
        summary_stats.to_csv(os.path.join(save_dir, 'summary_statistics.csv'))

        return summary_stats

    except Exception as e:
        print(f"Error generating statistics: {str(e)}")
        print("Available columns:", results_df.columns.tolist())
        return None

# Hyperparameter Experiments

In [None]:
class HyperparameterVisualizer:
    """Specialized visualizer for hyperparameter tuning experiments"""
    @staticmethod
    def plot_with_cleanup(plot_func):
        """Decorator to ensure proper figure cleanup"""
        def wrapper(*args, **kwargs):
            try:
                return plot_func(*args, **kwargs)
            finally:
                plt.close('all')
        return wrapper

    @staticmethod
    @plot_with_cleanup
    def plot_learning_curves(config_results: Dict[str, List[OptimizationResult]], save_dir: str,
                           function_name: str, params: dict):
        """Plot learning curves for a specific hyperparameter configuration"""
        plt.figure(figsize=(15, 10))

        # Process all runs for this configuration
        f_paths = [r.f_path for r in config_results]
        max_len = max(len(path) for path in f_paths)

        # Create arrays and fill with data
        values = np.full((len(f_paths), max_len), np.nan)
        for i, path in enumerate(f_paths):
            values[i, :len(path)] = path

        # Calculate statistics
        mean_values = np.nanmean(values, axis=0)
        std_values = np.nanstd(values, axis=0)

        # Plot
        plt.plot(mean_values, label='Mean value')
        plt.fill_between(range(len(mean_values)),
                        mean_values - std_values,
                        mean_values + std_values,
                        alpha=0.2)

        plt.title(f'Learning Curves for {function_name}\n{params}')
        plt.xlabel('Iteration')
        plt.ylabel('Function Value')
        plt.yscale('log')
        plt.grid(True)
        plt.legend()

        # Create clean parameter string for filename
        param_str = '_'.join(f"{k}_{v}" for k, v in params.items())
        plt.savefig(os.path.join(save_dir, f'learning_curves_{param_str}.png'))
        plt.close()

    @staticmethod
    @plot_with_cleanup
    def plot_hyperparameter_heatmap(results: List[dict], save_dir: str, optimizer: str,
                                  function_name: str, dimension: int):
        """Create heatmap visualizations for hyperparameter interactions"""
        if optimizer == "Adam":
            param_pairs = [
                ('lr', 'beta1'),
                ('lr', 'beta2'),
                ('beta1', 'beta2')
            ]
        else:  # SGD+Momentum
            param_pairs = [
                ('lr', 'momentum'),
                ('lr', 'noise_scale'),
                ('momentum', 'noise_scale')
            ]

        for param1, param2 in param_pairs:
            plt.figure(figsize=(12, 10))

            # Create 2D grid of results
            data = []
            for result in results:
                data.append({
                    param1: result['params'][param1],
                    param2: result['params'][param2],
                    'value': np.mean([r.f_final for r in result['results']])
                })

            # Convert to DataFrame and handle duplicates by averaging
            df = pd.DataFrame(data)
            pivot_table = df.groupby([param1, param2])['value'].mean().unstack()

            # Create heatmap
            sns.heatmap(pivot_table, annot=True, fmt='.2e', cmap='viridis')
            plt.title(f'{optimizer} {param1} vs {param2} on {function_name} ({dimension}D)')

            filename = f'heatmap_{optimizer}_{param1}_{param2}_{function_name}_{dimension}D.png'
            plt.savefig(os.path.join(save_dir, filename))
            plt.close()

    @staticmethod
    @plot_with_cleanup
    def plot_parameter_sensitivity(results: List[dict], save_dir: str, optimizer: str,
                                 function_name: str, dimension: int):
        """Plot sensitivity analysis for each hyperparameter"""
        plt.figure(figsize=(15, 10))

        params = list(results[0]['params'].keys())
        n_params = len(params)

        fig, axes = plt.subplots(1, n_params, figsize=(6*n_params, 5))

        for i, param in enumerate(params):
            param_values = []
            final_values = []

            for result in results:
                param_values.append(result['params'][param])
                final_values.append(np.mean([r.f_final for r in result['results']]))

            axes[i].scatter(param_values, final_values)
            axes[i].set_xlabel(param)
            axes[i].set_ylabel('Final Value')
            axes[i].set_yscale('log')
            axes[i].grid(True)

        plt.suptitle(f'{optimizer} Parameter Sensitivity on {function_name} ({dimension}D)')
        plt.tight_layout()

        filename = f'sensitivity_{optimizer}_{function_name}_{dimension}D.png'
        plt.savefig(os.path.join(save_dir, filename))
        plt.close()

    @staticmethod
    @plot_with_cleanup
    def plot_convergence_distribution(results: List[dict], save_dir: str, optimizer: str,
                                    function_name: str, dimension: int):
        """Plot distribution of convergence behaviors with proper handling of varying path lengths"""
        plt.figure(figsize=(15, 10))

        # Get best configuration
        best_config = min(results, key=lambda x: np.mean([r.f_final for r in x['results']]))

        # Get all paths and find maximum length
        raw_paths = [r.f_path for r in best_config['results']]
        max_len = max(len(path) for path in raw_paths)

        # Create padded array with NaN values
        values = np.full((len(raw_paths), max_len), np.nan)

        # Fill array with available values
        for i, path in enumerate(raw_paths):
            values[i, :len(path)] = path

        # Calculate statistics while ignoring NaN values
        median = np.nanmedian(values, axis=0)
        q25 = np.nanpercentile(values, 25, axis=0)
        q75 = np.nanpercentile(values, 75, axis=0)

        # Create x-axis values
        x = np.arange(len(median))

        # Plot
        plt.plot(x, median, label='Median', linewidth=2)
        plt.fill_between(x, q25, q75, alpha=0.2, label='IQR')

        plt.title(f'{optimizer} Convergence Distribution on {function_name} ({dimension}D)\n'
                f'Best configuration: {best_config["params"]}')
        plt.xlabel('Iteration')
        plt.ylabel('Function Value')
        plt.yscale('log')
        plt.grid(True)
        plt.legend()

        # Create directory if it doesn't exist
        os.makedirs(save_dir, exist_ok=True)

        filename = f'convergence_dist_{optimizer}_{function_name}_{dimension}D.png'
        plt.savefig(os.path.join(save_dir, filename))
        plt.close()

    @staticmethod
    def create_summary_plots(adam_results: List[dict], sgd_results: List[dict],
                           save_dir: str, function_name: str, dimension: int):
        """Create comprehensive summary plots for all experiments"""
        plots_dir = os.path.join(save_dir, 'summary_plots')
        os.makedirs(plots_dir, exist_ok=True)

        # Plot heatmaps
        HyperparameterVisualizer.plot_hyperparameter_heatmap(
            adam_results, plots_dir, "Adam", function_name, dimension)
        HyperparameterVisualizer.plot_hyperparameter_heatmap(
            sgd_results, plots_dir, "SGD+Momentum", function_name, dimension)

        # Plot sensitivity analysis
        HyperparameterVisualizer.plot_parameter_sensitivity(
            adam_results, plots_dir, "Adam", function_name, dimension)
        HyperparameterVisualizer.plot_parameter_sensitivity(
            sgd_results, plots_dir, "SGD+Momentum", function_name, dimension)

        # Plot convergence distributions
        HyperparameterVisualizer.plot_convergence_distribution(
            adam_results, plots_dir, "Adam", function_name, dimension)
        HyperparameterVisualizer.plot_convergence_distribution(
            sgd_results, plots_dir, "SGD+Momentum", function_name, dimension)

In [None]:
class HyperparameterExperimentManager:
    """Manages hyperparameter tuning experiments with function-specific constraints"""
    def __init__(self,
                 test_functions: Dict[str, Tuple[Callable, Callable, Callable]],
                 dimensions: List[int],
                 n_experiments: int = 10,
                 distance_constraints: Dict[str, Dict[str, float]] = None,
                 hyperparameter_budget: int = 100):
        self.test_functions = test_functions
        self.dimensions = dimensions
        self.n_experiments = n_experiments
        self.distance_constraints = distance_constraints or {}
        self.hyperparameter_budget = hyperparameter_budget

        # Define domain constraints for each function
        self.domain_constraints = {
            'ackley': (-32.768, 32.768),
            'rastrigin': (-5.12, 5.12),
            'schwefel': (-500, 500),
            'sphere': (-5.12, 5.12),
            'sum_squares': (-10, 10),
            'rosenbrock': (-2.048, 2.048)
        }

        # Define hyperparameter grids
        self.adam_grid = {
            'learning_rate': np.logspace(-4, -1, 10),  # 10 points between 1e-4 and 1e-1
            'beta1': [0.9, 0.95, 0.99],
            'beta2': [0.999, 0.9995, 0.9999],
            'epsilon': [1e-8, 1e-7, 1e-6]
        }

        self.sgd_momentum_grid = {
            'learning_rate': np.logspace(-4, -1, 10),  # 10 points between 1e-4 and 1e-1
            'momentum': [0.8, 0.9, 0.95, 0.99],
            'noise_scale': [0.001, 0.01, 0.1]
        }

    def generate_starting_points(self, dimension: int, function_name: str) -> np.ndarray:
        """Generate random starting points with function-specific constraints"""
        # Get function-specific distance constraints
        constraints = self.distance_constraints.get(function_name, {'min': 2, 'max': 5})
        min_dist = constraints['min']
        max_dist = constraints['max']

        # Get domain constraints
        domain_min, domain_max = self.domain_constraints.get(function_name, (-5, 5))

        starting_points = []
        for _ in range(self.n_experiments):
            # Generate point with specified distance from origin
            direction = np.random.randn(dimension)
            direction = direction / np.linalg.norm(direction)
            distance = np.random.uniform(min_dist, max_dist)
            point = direction * distance

            # Scale point to fit within domain if necessary
            max_abs_coord = np.max(np.abs(point))
            if max_abs_coord > abs(domain_min):
                scale_factor = abs(domain_min) / max_abs_coord
                point = point * scale_factor

            starting_points.append(point)

        return np.array(starting_points)

    def _generate_hyperparameter_combinations(self, optimizer: str) -> List[Dict]:
        """Generate hyperparameter combinations within budget"""
        if optimizer == "Adam":
            grid = self.adam_grid
            total_combinations = (len(grid['learning_rate']) *
                               len(grid['beta1']) *
                               len(grid['beta2']) *
                               len(grid['epsilon']))

            if total_combinations <= self.hyperparameter_budget:
                # If within budget, use all combinations
                combinations = [
                    {'lr': lr, 'beta1': b1, 'beta2': b2, 'epsilon': eps}
                    for lr in grid['learning_rate']
                    for b1 in grid['beta1']
                    for b2 in grid['beta2']
                    for eps in grid['epsilon']
                ]
            else:
                # Random sampling within budget
                combinations = []
                for _ in range(self.hyperparameter_budget):
                    combo = {
                        'lr': np.random.choice(grid['learning_rate']),
                        'beta1': np.random.choice(grid['beta1']),
                        'beta2': np.random.choice(grid['beta2']),
                        'epsilon': np.random.choice(grid['epsilon'])
                    }
                    combinations.append(combo)
        else:  # SGD+Momentum
            grid = self.sgd_momentum_grid
            total_combinations = (len(grid['learning_rate']) *
                               len(grid['momentum']) *
                               len(grid['noise_scale']))

            if total_combinations <= self.hyperparameter_budget:
                combinations = [
                    {'lr': lr, 'momentum': m, 'noise_scale': n}
                    for lr in grid['learning_rate']
                    for m in grid['momentum']
                    for n in grid['noise_scale']
                ]
            else:
                combinations = []
                for _ in range(self.hyperparameter_budget):
                    combo = {
                        'lr': np.random.choice(grid['learning_rate']),
                        'momentum': np.random.choice(grid['momentum']),
                        'noise_scale': np.random.choice(grid['noise_scale'])
                    }
                    combinations.append(combo)

        return combinations

    def run_hyperparameter_tuning(self, base_dir: str = "hyperparameter_tuning_results"):
        """Run hyperparameter tuning experiments"""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        experiment_dir = os.path.join(base_dir, f"tuning_{timestamp}")

        for func_name, (f, grad, _) in self.test_functions.items():
            print(f"\nTesting {func_name} function:")

            for dim in self.dimensions:
                print(f"\nDimension: {dim}")

                # Generate starting points with function-specific constraints
                starting_points = self.generate_starting_points(dim, func_name)

                # Create directory structure
                func_dir = os.path.join(experiment_dir, func_name, f"{dim}D")
                adam_dir = os.path.join(func_dir, "adam")
                sgd_momentum_dir = os.path.join(func_dir, "sgd_momentum")

                for d in [adam_dir, sgd_momentum_dir]:
                    os.makedirs(os.path.join(d, "results"), exist_ok=True)
                    os.makedirs(os.path.join(d, "plots"), exist_ok=True)

                # Test Adam configurations
                adam_results = self._test_optimizer_configs(
                    f, grad, starting_points, func_name, "Adam", adam_dir)

                # Test SGD+Momentum configurations
                sgd_results = self._test_optimizer_configs(
                    f, grad, starting_points, func_name, "SGD+Momentum", sgd_momentum_dir)

                # Save and plot results
                self._save_tuning_results(adam_results, sgd_results, func_dir, dim, func_name)

    def _test_optimizer_configs(self, f, grad, starting_points, func_name, optimizer_name, save_dir):
        """Test different optimizer configurations"""
        results = []
        hyperparameter_combinations = self._generate_hyperparameter_combinations(optimizer_name)

        total_configs = len(hyperparameter_combinations)
        for config_num, params in enumerate(hyperparameter_combinations, 1):
            print(f"\nTesting {optimizer_name} configuration {config_num}/{total_configs}")
            print(f"Parameters: {params}")

            if optimizer_name == "Adam":
                optimizer = Adam(
                    learning_rate=params['lr'],
                    beta1=params['beta1'],
                    beta2=params['beta2'],
                    epsilon=params['epsilon']
                )
            else:  # SGD+Momentum
                optimizer = SGDMomentum(
                    learning_rate=params['lr'],
                    momentum=params['momentum'],
                    noise_scale=params['noise_scale']
                )

            config_results = []
            for x0 in starting_points:
                result = self._run_single_optimization(optimizer, f, grad, x0, func_name)
                if result:
                    config_results.append(result)

            if config_results:
                results.append({
                    'params': params,
                    'results': config_results
                })

        return results

    def _run_single_optimization(self, optimizer, f, grad, x0, func_name):
        """Run a single optimization trial"""
        try:
            step_logger = StepLogger()
            start_time = time.time()

            # Get global minimum for this function
            x_min, _ = TestFunctions.get_global_minimum(func_name, len(x0))

            # Log initial point
            g_initial = grad(x0)
            step_logger.log_iteration(x0, f(x0), g_initial, global_minimum=x_min)

            def callback(x):
                g = grad(x)
                step_logger.log_iteration(x, f(x), g, global_minimum=x_min)

            result = optimizer.optimize(f, grad, x0, func_name, callback=callback)
            runtime = time.time() - start_time

            return OptimizationResult(
                x_final=result['x'],
                f_final=result['fun'],
                success=result['success'],
                iterations=result['nit'],
                runtime=runtime,
                path=step_logger.path,
                f_path=step_logger.f_path,
                grad_norm_path=step_logger.grad_norm_path,
                grad_cosine_sim_path=step_logger.grad_cosine_sim_path,
                grad_angle_path=step_logger.grad_angle_path,
                timestamps=step_logger.timestamps,
                method=optimizer.__class__.__name__,
                dimension=len(x0),
                function_name=func_name,
                x_initial=x0,
                f_initial=f(x0),
                grad_initial=g_initial,
                grad_final=grad(result['x']),
                step_sizes=step_logger.step_sizes,
                improvements=step_logger.improvements,
                best_so_far=step_logger.best_so_far,
                relative_improvements=step_logger.relative_improvements,
                distance_to_minimum_path=step_logger.distance_to_minimum_path
            )

        except Exception as e:
            print(f"Optimization failed: {str(e)}")
            return None

    def _save_tuning_results(self, adam_results, sgd_results, save_dir, dimension, func_name):
        """Save and visualize hyperparameter tuning results"""
        results_dir = os.path.join(save_dir, "tuning_results")
        os.makedirs(results_dir, exist_ok=True)

        # Process and save results
        for optimizer, results, name in [
            (Adam, adam_results, "Adam"),
            (SGDMomentum, sgd_results, "SGD+Momentum")
        ]:
            if results:
                summary_df = self._process_optimizer_results(results, name)
                summary_df.to_csv(
                    os.path.join(results_dir, f"{name.lower()}_tuning_results.csv"),
                    index=False
                )

        # Create visualization plots
        HyperparameterVisualizer.create_summary_plots(
            adam_results, sgd_results, results_dir, func_name, dimension
        )

    def _process_optimizer_results(self, optimizer_results, optimizer_name):
        """Process results for a single optimizer"""
        records = []

        for config in optimizer_results:
            params = config['params']
            results = config['results']

            # Calculate statistics across all runs with these parameters
            record = {
                'optimizer': optimizer_name,
                **params,
                'mean_final_value': np.mean([r.f_final for r in results]),
                'std_final_value': np.std([r.f_final for r in results]),
                'mean_iterations': np.mean([r.iterations for r in results]),
                'success_rate': np.mean([1 if r.success else 0 for r in results]),
                'mean_distance_to_minimum': np.mean([r.final_distance_to_minimum for r in results]),
                'std_distance_to_minimum': np.std([r.final_distance_to_minimum for r in results])
            }
            records.append(record)

        return pd.DataFrame(records)

    def _plot_hyperparameter_effects(self, adam_results, sgd_results, save_dir, dimension, func_name):
        """Create visualization plots for hyperparameter effects"""
        plots_dir = os.path.join(save_dir, "tuning_plots")
        os.makedirs(plots_dir, exist_ok=True)

        # Plot Adam hyperparameter effects
        self._plot_adam_hyperparameters(adam_results, plots_dir, dimension, func_name)

        # Plot SGD+Momentum hyperparameter effects
        self._plot_sgd_hyperparameters(sgd_results, plots_dir, dimension, func_name)

        # Plot comparison between best configurations
        self._plot_optimizer_comparison(adam_results, sgd_results, plots_dir, dimension, func_name)

    def _plot_adam_hyperparameters(self, results, save_dir, dimension, func_name):
        """Plot effects of Adam hyperparameters"""
        plt.figure(figsize=(15, 10))

        # Create subplots for each hyperparameter
        fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(20, 15))

        # Process results for plotting
        df = self._process_optimizer_results(results, "Adam")

        # Learning rate effect
        sns.boxplot(x='lr', y='mean_final_value', data=df, ax=ax1)
        ax1.set_title('Effect of Learning Rate')
        ax1.set_xlabel('Learning Rate')
        ax1.set_ylabel('Final Value')

        # Beta1 effect
        sns.boxplot(x='beta1', y='mean_final_value', data=df, ax=ax2)
        ax2.set_title('Effect of Beta1')
        ax2.set_xlabel('Beta1')
        ax2.set_ylabel('Final Value')

        # Beta2 effect
        sns.boxplot(x='beta2', y='mean_final_value', data=df, ax=ax3)
        ax3.set_title('Effect of Beta2')
        ax3.set_xlabel('Beta2')
        ax3.set_ylabel('Final Value')

        # Epsilon effect
        sns.boxplot(x='epsilon', y='mean_final_value', data=df, ax=ax4)
        ax4.set_title('Effect of Epsilon')
        ax4.set_xlabel('Epsilon')
        ax4.set_ylabel('Final Value')

        plt.suptitle(f'Adam Hyperparameter Effects on {func_name} ({dimension}D)')
        plt.tight_layout()
        plt.savefig(os.path.join(save_dir, f'adam_hyperparameters_{func_name}_{dimension}D.png'))
        plt.close()

    def _plot_sgd_hyperparameters(self, results, save_dir, dimension, func_name):
        """Plot effects of SGD+Momentum hyperparameters"""
        plt.figure(figsize=(15, 10))

        # Create subplots for each hyperparameter
        fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20, 6))

        # Process results for plotting
        df = self._process_optimizer_results(results, "SGD+Momentum")

        # Learning rate effect
        sns.boxplot(x='lr', y='mean_final_value', data=df, ax=ax1)
        ax1.set_title('Effect of Learning Rate')
        ax1.set_xlabel('Learning Rate')
        ax1.set_ylabel('Final Value')

        # Momentum effect
        sns.boxplot(x='momentum', y='mean_final_value', data=df, ax=ax2)
        ax2.set_title('Effect of Momentum')
        ax2.set_xlabel('Momentum')
        ax2.set_ylabel('Final Value')

        # Noise scale effect
        sns.boxplot(x='noise_scale', y='mean_final_value', data=df, ax=ax3)
        ax3.set_title('Effect of Noise Scale')
        ax3.set_xlabel('Noise Scale')
        ax3.set_ylabel('Final Value')

        plt.suptitle(f'SGD+Momentum Hyperparameter Effects on {func_name} ({dimension}D)')
        plt.tight_layout()
        plt.savefig(os.path.join(save_dir, f'sgd_momentum_hyperparameters_{func_name}_{dimension}D.png'))
        plt.close()

    def _plot_optimizer_comparison(self, adam_results, sgd_results, save_dir, dimension, func_name):
        """Plot comparison between best configurations of both optimizers"""
        plt.figure(figsize=(15, 10))

        # Process results
        adam_df = self._process_optimizer_results(adam_results, "Adam")
        sgd_df = self._process_optimizer_results(sgd_results, "SGD+Momentum")

        # Get best configurations based on mean final value
        best_adam = adam_df.loc[adam_df['mean_final_value'].idxmin()]
        best_sgd = sgd_df.loc[sgd_df['mean_final_value'].idxmin()]

        # Create comparison plot
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))

        # Plot convergence paths for best configurations
        best_adam_results = next(r['results'] for r in adam_results
                              if all(r['params'][k] == best_adam[k]
                              for k in ['lr', 'beta1', 'beta2', 'epsilon']))
        best_sgd_results = next(r['results'] for r in sgd_results
                              if all(r['params'][k] == best_sgd[k]
                              for k in ['lr', 'momentum', 'noise_scale']))

        # Plot mean convergence paths
        adam_paths = np.array([r.f_path for r in best_adam_results])
        sgd_paths = np.array([r.f_path for r in best_sgd_results])

        iterations = np.arange(min(adam_paths.shape[1], sgd_paths.shape[1]))

        ax1.plot(iterations, np.mean(adam_paths[:, :len(iterations)], axis=0),
                label=f'Adam (lr={best_adam["lr"]:.4f})')
        ax1.plot(iterations, np.mean(sgd_paths[:, :len(iterations)], axis=0),
                label=f'SGD+M (lr={best_sgd["lr"]:.4f}, m={best_sgd["momentum"]:.2f})')
        ax1.set_yscale('log')
        ax1.set_xlabel('Iteration')
        ax1.set_ylabel('Function Value (log scale)')
        ax1.set_title('Convergence Comparison (Best Configurations)')
        ax1.legend()
        ax1.grid(True)

        # Plot final value distributions
        data = {
            'Adam': [r.f_final for r in best_adam_results],
            'SGD+Momentum': [r.f_final for r in best_sgd_results]
        }
        sns.boxplot(data=data, ax=ax2)
        ax2.set_ylabel('Final Function Value')
        ax2.set_title('Final Value Distribution')
        ax2.grid(True)

        plt.suptitle(f'Best Configuration Comparison on {func_name} ({dimension}D)')
        plt.tight_layout()
        plt.savefig(os.path.join(save_dir, f'optimizer_comparison_{func_name}_{dimension}D.png'))
        plt.close()

        # Save best configurations
        best_configs = pd.DataFrame({
            'Optimizer': ['Adam', 'SGD+Momentum'],
            'Best Configuration': [
                f'lr={best_adam["lr"]:.4f}, b1={best_adam["beta1"]:.3f}, b2={best_adam["beta2"]:.4f}, eps={best_adam["epsilon"]:.1e}',
                f'lr={best_sgd["lr"]:.4f}, momentum={best_sgd["momentum"]:.3f}, noise={best_sgd["noise_scale"]:.3f}'
            ],
            'Mean Final Value': [best_adam['mean_final_value'], best_sgd['mean_final_value']],
            'Success Rate': [best_adam['success_rate'], best_sgd['success_rate']],
            'Mean Iterations': [best_adam['mean_iterations'], best_sgd['mean_iterations']]
        })

        best_configs.to_csv(os.path.join(save_dir, f'best_configurations_{func_name}_{dimension}D.csv'),
                          index=False)

# Github and main() function

In [None]:
import os
import getpass
from github import Github
import git
import json
from pathlib import Path
import time
import random

class AuthManager:
    def __init__(self):
        self.config_file = Path.home() / '.optimization_config'
        self.credentials = self.load_credentials()

    def load_credentials(self):
        if self.config_file.exists():
            try:
                with open(self.config_file, 'r') as f:
                    return json.load(f)
            except:
                return None
        return None

    def save_credentials(self, credentials):
        with open(self.config_file, 'w') as f:
            json.dump(credentials, f)

    def authenticate(self):
        if self.credentials:
            print("Found existing credentials. Would you like to use them? (y/n)")
            if input().lower() == 'y':
                return self.credentials

        print("\n=== GitHub Authentication ===")
        print("Please provide your GitHub credentials to continue.")
        print("Note: Your personal access token needs repo and workflow permissions.")
        print("\nDon't have a token? Create one at: https://github.com/settings/tokens")

        github_username = input("\nGitHub Username: ")
        github_token = getpass.getpass("Personal Access Token: ")
        repo_name = input("Repository Name (format: username/repo): ")

        # Verify credentials
        try:
            g = Github(github_token)
            user = g.get_user()
            _ = user.get_repo(repo_name.split('/')[1])

            credentials = {
                'username': github_username,
                'token': github_token,
                'repo': repo_name
            }

            print("\nAuthentication successful!")

            # Ask to save credentials
            print("Would you like to save these credentials for future use? (y/n)")
            if input().lower() == 'y':
                self.save_credentials(credentials)
                print("Credentials saved!")

            return credentials

        except Exception as e:
            print(f"\nAuthentication failed: {str(e)}")
            print("Please try again.")
            return self.authenticate()

class GitManager:
    def __init__(self, credentials):
        self.credentials = credentials
        self.token = credentials['token']
        self.repo_url = f"https://x-access-token:{self.token}@github.com/{credentials['repo']}.git"
        self.instance_id = f"optimization_{random.randint(1000, 9999)}"
        self.repo = None

    def setup_repo(self, local_path):
        """Initialize or clone the repository and create a new branch"""
        try:
            print(f"Creating directory: {local_path}")
            Path(local_path).mkdir(parents=True, exist_ok=True)

            # Configure git globally
            with git.Git().custom_environment(GIT_SSL_NO_VERIFY='true'):
                try:
                    print("Checking if directory is a git repository...")
                    self.repo = git.Repo(local_path)
                    print("Existing repository found")

                    # Update remote URL with credentials
                    origin = self.repo.remote('origin')
                    origin.set_url(self.repo_url)
                    print("Remote URL updated")

                except git.exc.InvalidGitRepositoryError:
                    print("Initializing new git repository...")
                    self.repo = git.Repo.init(local_path)
                    print("Repository initialized")

                    print("Adding remote origin...")
                    origin = self.repo.create_remote('origin', self.repo_url)
                    print("Remote added")

                    # Configure git credentials
                    config_writer = self.repo.config_writer()
                    config_writer.set_value("http", "sslVerify", "false")
                    config_writer.release()

                    print("Fetching from remote...")
                    origin.fetch()
                    print("Fetch completed")

                # Pull main branch to get latest changes
                print("Pulling latest changes from main...")
                origin.pull('main')
                print("Pull completed")

                # Create and checkout new branch
                print(f"Creating and checking out new branch: {self.instance_id}...")
                new_branch = self.repo.create_head(self.instance_id, origin.refs.main)
                new_branch.checkout()
                print(f"Successfully switched to branch: {self.instance_id}")

        except Exception as e:
            print(f"Error setting up repository: {str(e)}")
            raise

    def push_results(self, local_path):
        """Push results to GitHub with conflict resolution"""
        max_retries = 5
        retry_count = 0

        while retry_count < max_retries:
            try:
                print(f"\nAttempting to push results (attempt {retry_count + 1}/{max_retries})...")

                # Configure git environment for this operation
                env = {
                    'GIT_SSL_NO_VERIFY': 'true',
                    'GIT_TERMINAL_PROMPT': '0',
                    'GIT_USERNAME': 'x-access-token',
                    'GIT_PASSWORD': self.token
                }

                with self.repo.git.custom_environment(**env):
                    print("Adding new files...")
                    self.repo.index.add('*')
                    print("Files added")

                    print("Committing changes...")
                    self.repo.index.commit(f"Results update from {self.instance_id}")
                    print("Changes committed")

                    print("Pushing to remote...")
                    push_info = self.repo.remotes.origin.push(self.instance_id)
                    print("Push completed")

                    print("Creating pull request...")
                    self.create_pull_request()

                    print(f"\nResults successfully pushed to branch: {self.instance_id}")
                    print("Pull request created for review.")
                    break

            except git.exc.GitCommandError as e:
                print(f"Git error: {e}")
                retry_count += 1
                wait_time = random.uniform(1, 5)
                print(f"Waiting {wait_time:.2f} seconds before retry...")
                time.sleep(wait_time)

            except Exception as e:
                print(f"Unexpected error: {e}")
                print(f"Error type: {type(e)}")
                break

    def create_pull_request(self):
        """Create a pull request using GitHub API"""
        try:
            g = Github(self.token)
            repo = g.get_repo(self.credentials['repo'])

            pr = repo.create_pull(
                title=f"Results update from {self.instance_id}",
                body="Automated results update from optimization experiment",
                head=self.instance_id,
                base="main"
            )
            print(f"Created PR: {pr.html_url}")
        except Exception as e:
            print(f"Failed to create PR: {e}")

def setup_experiment():
    """Initial setup and authentication"""
    print("\n=== Optimization Experiment Setup ===")

    # Authenticate
    auth_manager = AuthManager()
    credentials = auth_manager.authenticate()

    if not credentials:
        print("Authentication failed. Cannot continue.")
        return None

    return credentials


In [None]:
def main():
    # Get credentials
    credentials = setup_experiment()
    if not credentials:
        return

    # Initialize Git manager with hyperparameter tuning branch prefix
    git_manager = GitManager(credentials)
    git_manager.instance_id = f"hyperparameter_{random.randint(1000, 9999)}"  # Modified branch naming

    # Setup local repository
    local_path = "hyperparameter_tuning_results"  # Changed base directory
    try:
        git_manager.setup_repo(local_path)
    except Exception as e:
        print(f"Failed to setup repository: {e}")
        return

    # Configuration parameters - you can modify these as needed
    config = {
        'dimensions': [2, 5, 10],  # Test different dimensions
        'n_experiments': 20,       # Number of experiments per configuration
        'rng_domain': {           # Domain for initial conditions
            'min_dist': 2,        # Minimum distance from origin
            'max_dist': 10        # Maximum distance from origin
        }
    }

    # Add all test functions
    test_functions = {
        'ackley': (
            TestFunctions.ackley,
            TestFunctions.ackley_gradient,
            TestFunctions.ackley_hessian
        ),
        'rastrigin': (
            TestFunctions.rastrigin,
            TestFunctions.rastrigin_gradient,
            TestFunctions.rastrigin_hessian
        ),
        'schwefel': (
            TestFunctions.schwefel,
            TestFunctions.schwefel_gradient,
            TestFunctions.schwefel_hessian
        ),
        'sphere': (
            TestFunctions.sphere,
            TestFunctions.sphere_gradient,
            TestFunctions.sphere_hessian
        ),
        'sum_squares': (
            TestFunctions.sum_squares,
            TestFunctions.sum_squares_gradient,
            TestFunctions.sum_squares_hessian
        ),
        'rosenbrock': (
            TestFunctions.rosenbrock,
            TestFunctions.rosenbrock_gradient,
            TestFunctions.rosenbrock_hessian
        ),
    }

    # Create hyperparameter experiment manager
    experiment = HyperparameterExperimentManager(
        test_functions=test_functions,
        dimensions=config['dimensions'],
        n_experiments=config['n_experiments']
    )

    # Run hyperparameter tuning experiments
    print("\nStarting hyperparameter tuning experiments...")
    print(f"Dimensions to test: {config['dimensions']}")
    print(f"Number of experiments per configuration: {config['n_experiments']}")
    print(f"Initial condition domain: [{config['rng_domain']['min_dist']}, {config['rng_domain']['max_dist']}]")
    print("\nRunning experiments...")

    experiment.run_hyperparameter_tuning(base_dir=local_path)

    # Push results to GitHub
    print("\nPushing results to GitHub...")
    git_manager.push_results(local_path)

if __name__ == "__main__":
    main()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Parameters: {'lr': 0.1, 'beta1': 0.99, 'beta2': 0.9995, 'epsilon': 1e-06}

Testing Adam configuration 43/100
Parameters: {'lr': 0.01, 'beta1': 0.9, 'beta2': 0.9999, 'epsilon': 1e-08}

Testing Adam configuration 44/100
Parameters: {'lr': 0.00046415888336127773, 'beta1': 0.95, 'beta2': 0.999, 'epsilon': 1e-06}

Testing Adam configuration 45/100
Parameters: {'lr': 0.01, 'beta1': 0.9, 'beta2': 0.9999, 'epsilon': 1e-08}

Testing Adam configuration 46/100
Parameters: {'lr': 0.0001, 'beta1': 0.9, 'beta2': 0.9995, 'epsilon': 1e-08}

Testing Adam configuration 47/100
Parameters: {'lr': 0.046415888336127774, 'beta1': 0.95, 'beta2': 0.9995, 'epsilon': 1e-07}

Testing Adam configuration 48/100
Parameters: {'lr': 0.021544346900318822, 'beta1': 0.99, 'beta2': 0.9999, 'epsilon': 1e-08}

Testing Adam configuration 49/100
Parameters: {'lr': 0.004641588833612777, 'beta1': 0.99, 'beta2': 0.9995, 'epsilon': 1e-08}

Testing Adam configuration

  return np.sum(100.0 * (x[1:] - x[:-1]**2)**2 + (1 - x[:-1])**2)
  cosine_sim = np.dot(grad_current, grad_previous) / (np.linalg.norm(grad_current) * np.linalg.norm(grad_previous))
  grad[0] = -400 * x[0] * (x[1] - x[0]**2) - 2 * (1 - x[0])
  grad[-1] = 200 * (x[-1] - x[-2]**2)
  return np.sum(100.0 * (x[1:] - x[:-1]**2)**2 + (1 - x[:-1])**2)
  grad[0] = -400 * x[0] * (x[1] - x[0]**2) - 2 * (1 - x[0])
  grad[-1] = 200 * (x[-1] - x[-2]**2)



Testing SGD+Momentum configuration 2/100
Parameters: {'lr': 0.004641588833612777, 'momentum': 0.99, 'noise_scale': 0.01}


  return (f_previous - f_current) / abs(f_previous) * 100
  grad[0] = -400 * x[0] * (x[1] - x[0]**2) - 2 * (1 - x[0])
  v = self.momentum * v - self.learning_rate * g



Testing SGD+Momentum configuration 3/100
Parameters: {'lr': 0.00021544346900318845, 'momentum': 0.99, 'noise_scale': 0.001}

Testing SGD+Momentum configuration 4/100
Parameters: {'lr': 0.01, 'momentum': 0.95, 'noise_scale': 0.01}

Testing SGD+Momentum configuration 5/100
Parameters: {'lr': 0.001, 'momentum': 0.8, 'noise_scale': 0.001}

Testing SGD+Momentum configuration 6/100
Parameters: {'lr': 0.00046415888336127773, 'momentum': 0.8, 'noise_scale': 0.1}

Testing SGD+Momentum configuration 7/100
Parameters: {'lr': 0.1, 'momentum': 0.99, 'noise_scale': 0.01}


  return np.sum(100.0 * (x[1:] - x[:-1]**2)**2 + (1 - x[:-1])**2)



Testing SGD+Momentum configuration 8/100
Parameters: {'lr': 0.001, 'momentum': 0.9, 'noise_scale': 0.01}

Testing SGD+Momentum configuration 9/100
Parameters: {'lr': 0.1, 'momentum': 0.95, 'noise_scale': 0.01}

Testing SGD+Momentum configuration 10/100
Parameters: {'lr': 0.004641588833612777, 'momentum': 0.8, 'noise_scale': 0.01}

Testing SGD+Momentum configuration 11/100
Parameters: {'lr': 0.00021544346900318845, 'momentum': 0.95, 'noise_scale': 0.001}

Testing SGD+Momentum configuration 12/100
Parameters: {'lr': 0.021544346900318822, 'momentum': 0.95, 'noise_scale': 0.01}

Testing SGD+Momentum configuration 13/100
Parameters: {'lr': 0.01, 'momentum': 0.95, 'noise_scale': 0.01}

Testing SGD+Momentum configuration 14/100
Parameters: {'lr': 0.002154434690031882, 'momentum': 0.8, 'noise_scale': 0.01}

Testing SGD+Momentum configuration 15/100
Parameters: {'lr': 0.004641588833612777, 'momentum': 0.8, 'noise_scale': 0.1}

Testing SGD+Momentum configuration 16/100
Parameters: {'lr': 0.001,

  grad[-1] = 200 * (x[-1] - x[-2]**2)



Testing SGD+Momentum configuration 30/100
Parameters: {'lr': 0.004641588833612777, 'momentum': 0.9, 'noise_scale': 0.1}

Testing SGD+Momentum configuration 31/100
Parameters: {'lr': 0.00021544346900318845, 'momentum': 0.99, 'noise_scale': 0.01}

Testing SGD+Momentum configuration 32/100
Parameters: {'lr': 0.046415888336127774, 'momentum': 0.8, 'noise_scale': 0.1}

Testing SGD+Momentum configuration 33/100
Parameters: {'lr': 0.00046415888336127773, 'momentum': 0.99, 'noise_scale': 0.01}

Testing SGD+Momentum configuration 34/100
Parameters: {'lr': 0.01, 'momentum': 0.95, 'noise_scale': 0.1}

Testing SGD+Momentum configuration 35/100
Parameters: {'lr': 0.046415888336127774, 'momentum': 0.8, 'noise_scale': 0.1}

Testing SGD+Momentum configuration 36/100
Parameters: {'lr': 0.001, 'momentum': 0.9, 'noise_scale': 0.01}

Testing SGD+Momentum configuration 37/100
Parameters: {'lr': 0.021544346900318822, 'momentum': 0.95, 'noise_scale': 0.01}

Testing SGD+Momentum configuration 38/100
Paramete

  return np.sum(100.0 * (x[1:] - x[:-1]**2)**2 + (1 - x[:-1])**2)
  cosine_sim = np.dot(grad_current, grad_previous) / (np.linalg.norm(grad_current) * np.linalg.norm(grad_previous))
  grad[1:-1] = 200 * (x[1:-1] - x[:-2]**2) - 400 * x[1:-1] * (x[2:] - x[1:-1]**2) - 2 * (1 - x[1:-1])
  grad[1:-1] = 200 * (x[1:-1] - x[:-2]**2) - 400 * x[1:-1] * (x[2:] - x[1:-1]**2) - 2 * (1 - x[1:-1])
  grad[1:-1] = 200 * (x[1:-1] - x[:-2]**2) - 400 * x[1:-1] * (x[2:] - x[1:-1]**2) - 2 * (1 - x[1:-1])
  return np.sum(100.0 * (x[1:] - x[:-1]**2)**2 + (1 - x[:-1])**2)
  grad[0] = -400 * x[0] * (x[1] - x[0]**2) - 2 * (1 - x[0])
  grad[0] = -400 * x[0] * (x[1] - x[0]**2) - 2 * (1 - x[0])
  grad[0] = -400 * x[0] * (x[1] - x[0]**2) - 2 * (1 - x[0])
  grad[-1] = 200 * (x[-1] - x[-2]**2)
  return (f_previous - f_current) / abs(f_previous) * 100
  v = self.momentum * v - self.learning_rate * g
  grad[-1] = 200 * (x[-1] - x[-2]**2)



Testing SGD+Momentum configuration 2/100
Parameters: {'lr': 0.00046415888336127773, 'momentum': 0.99, 'noise_scale': 0.1}


  return np.sum(100.0 * (x[1:] - x[:-1]**2)**2 + (1 - x[:-1])**2)



Testing SGD+Momentum configuration 3/100
Parameters: {'lr': 0.002154434690031882, 'momentum': 0.99, 'noise_scale': 0.01}

Testing SGD+Momentum configuration 4/100
Parameters: {'lr': 0.002154434690031882, 'momentum': 0.99, 'noise_scale': 0.01}

Testing SGD+Momentum configuration 5/100
Parameters: {'lr': 0.01, 'momentum': 0.99, 'noise_scale': 0.1}

Testing SGD+Momentum configuration 6/100
Parameters: {'lr': 0.001, 'momentum': 0.95, 'noise_scale': 0.1}

Testing SGD+Momentum configuration 7/100
Parameters: {'lr': 0.00021544346900318845, 'momentum': 0.95, 'noise_scale': 0.001}

Testing SGD+Momentum configuration 8/100
Parameters: {'lr': 0.001, 'momentum': 0.99, 'noise_scale': 0.1}

Testing SGD+Momentum configuration 9/100
Parameters: {'lr': 0.046415888336127774, 'momentum': 0.95, 'noise_scale': 0.001}

Testing SGD+Momentum configuration 10/100
Parameters: {'lr': 0.1, 'momentum': 0.9, 'noise_scale': 0.001}

Testing SGD+Momentum configuration 11/100
Parameters: {'lr': 0.046415888336127774, '

  median = np.nanmedian(values, axis=0)
  diff_b_a = subtract(b, a)
  return function_base._ureduce(a,



Dimension: 10

Testing Adam configuration 1/100
Parameters: {'lr': 0.002154434690031882, 'beta1': 0.95, 'beta2': 0.9995, 'epsilon': 1e-07}

Testing Adam configuration 2/100
Parameters: {'lr': 0.0001, 'beta1': 0.9, 'beta2': 0.9995, 'epsilon': 1e-08}

Testing Adam configuration 3/100
Parameters: {'lr': 0.00021544346900318845, 'beta1': 0.9, 'beta2': 0.9999, 'epsilon': 1e-06}

Testing Adam configuration 4/100
Parameters: {'lr': 0.046415888336127774, 'beta1': 0.99, 'beta2': 0.999, 'epsilon': 1e-08}

Testing Adam configuration 5/100
Parameters: {'lr': 0.0001, 'beta1': 0.9, 'beta2': 0.9995, 'epsilon': 1e-07}

Testing Adam configuration 6/100
Parameters: {'lr': 0.1, 'beta1': 0.95, 'beta2': 0.9999, 'epsilon': 1e-08}

Testing Adam configuration 7/100
Parameters: {'lr': 0.00046415888336127773, 'beta1': 0.99, 'beta2': 0.999, 'epsilon': 1e-08}

Testing Adam configuration 8/100
Parameters: {'lr': 0.046415888336127774, 'beta1': 0.9, 'beta2': 0.9999, 'epsilon': 1e-08}

Testing Adam configuration 9/10

  return np.sum(100.0 * (x[1:] - x[:-1]**2)**2 + (1 - x[:-1])**2)
  cosine_sim = np.dot(grad_current, grad_previous) / (np.linalg.norm(grad_current) * np.linalg.norm(grad_previous))
  return (f_previous - f_current) / abs(f_previous) * 100
  grad[0] = -400 * x[0] * (x[1] - x[0]**2) - 2 * (1 - x[0])
  grad[1:-1] = 200 * (x[1:-1] - x[:-2]**2) - 400 * x[1:-1] * (x[2:] - x[1:-1]**2) - 2 * (1 - x[1:-1])
  grad[1:-1] = 200 * (x[1:-1] - x[:-2]**2) - 400 * x[1:-1] * (x[2:] - x[1:-1]**2) - 2 * (1 - x[1:-1])
  grad[1:-1] = 200 * (x[1:-1] - x[:-2]**2) - 400 * x[1:-1] * (x[2:] - x[1:-1]**2) - 2 * (1 - x[1:-1])
  return np.sum(100.0 * (x[1:] - x[:-1]**2)**2 + (1 - x[:-1])**2)
  grad[0] = -400 * x[0] * (x[1] - x[0]**2) - 2 * (1 - x[0])
  grad[-1] = 200 * (x[-1] - x[-2]**2)
  grad[-1] = 200 * (x[-1] - x[-2]**2)
  grad[0] = -400 * x[0] * (x[1] - x[0]**2) - 2 * (1 - x[0])
  v = self.momentum * v - self.learning_rate * g
  return np.sum(100.0 * (x[1:] - x[:-1]**2)**2 + (1 - x[:-1])**2)



Testing SGD+Momentum configuration 2/100
Parameters: {'lr': 0.004641588833612777, 'momentum': 0.99, 'noise_scale': 0.01}

Testing SGD+Momentum configuration 3/100
Parameters: {'lr': 0.00021544346900318845, 'momentum': 0.99, 'noise_scale': 0.01}

Testing SGD+Momentum configuration 4/100
Parameters: {'lr': 0.004641588833612777, 'momentum': 0.95, 'noise_scale': 0.01}

Testing SGD+Momentum configuration 5/100
Parameters: {'lr': 0.002154434690031882, 'momentum': 0.8, 'noise_scale': 0.001}

Testing SGD+Momentum configuration 6/100
Parameters: {'lr': 0.004641588833612777, 'momentum': 0.8, 'noise_scale': 0.001}

Testing SGD+Momentum configuration 7/100
Parameters: {'lr': 0.002154434690031882, 'momentum': 0.9, 'noise_scale': 0.001}


  grad[-1] = 200 * (x[-1] - x[-2]**2)



Testing SGD+Momentum configuration 8/100
Parameters: {'lr': 0.021544346900318822, 'momentum': 0.9, 'noise_scale': 0.001}

Testing SGD+Momentum configuration 9/100
Parameters: {'lr': 0.1, 'momentum': 0.95, 'noise_scale': 0.001}

Testing SGD+Momentum configuration 10/100
Parameters: {'lr': 0.1, 'momentum': 0.9, 'noise_scale': 0.1}

Testing SGD+Momentum configuration 11/100
Parameters: {'lr': 0.021544346900318822, 'momentum': 0.9, 'noise_scale': 0.01}

Testing SGD+Momentum configuration 12/100
Parameters: {'lr': 0.046415888336127774, 'momentum': 0.95, 'noise_scale': 0.01}

Testing SGD+Momentum configuration 13/100
Parameters: {'lr': 0.046415888336127774, 'momentum': 0.9, 'noise_scale': 0.01}

Testing SGD+Momentum configuration 14/100
Parameters: {'lr': 0.01, 'momentum': 0.8, 'noise_scale': 0.01}

Testing SGD+Momentum configuration 15/100
Parameters: {'lr': 0.00046415888336127773, 'momentum': 0.9, 'noise_scale': 0.1}

Testing SGD+Momentum configuration 16/100
Parameters: {'lr': 0.0001, 'm

  median = np.nanmedian(values, axis=0)
  diff_b_a = subtract(b, a)
  return function_base._ureduce(a,



Pushing results to GitHub...

Attempting to push results (attempt 1/5)...
Adding new files...
Files added
Committing changes...
Changes committed
Pushing to remote...
Push completed
Creating pull request...
Created PR: https://github.com/Ice-Citron/AAH-IA/pull/18

Results successfully pushed to branch: hyperparameter_7204
Pull request created for review.
