# INITIAL TEST

In [22]:
!pip install seaborn
!pip install gitpython PyGithub



In [23]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from dataclasses import dataclass
from typing import Callable, List, Dict, Optional, Tuple
import os
import json
import time
from datetime import datetime
import pandas as pd
import psutil
import seaborn as sns




class OptimizationResult:
    """Enhanced optimization result storage"""
    def __init__(self, **kwargs):
        self.x_final = kwargs.get('x_final')
        self.f_final = kwargs.get('f_final')
        self.success = kwargs.get('success')
        self.iterations = kwargs.get('iterations')
        self.runtime = kwargs.get('runtime')
        self.path = kwargs.get('path', [])
        self.f_path = kwargs.get('f_path', [])
        self.grad_norm_path = kwargs.get('grad_norm_path', [])
        self.timestamps = kwargs.get('timestamps', [])
        self.memory_usage = kwargs.get('memory_usage', [])
        self.flops_per_step = kwargs.get('flops_per_step', [])
        self.method = kwargs.get('method')
        self.dimension = kwargs.get('dimension')
        self.function_name = kwargs.get('function_name')
        self.x_initial = kwargs.get('x_initial')
        self.f_initial = kwargs.get('f_initial')
        self.grad_initial = kwargs.get('grad_initial')
        self.grad_final = kwargs.get('grad_final')

        # Calculate distance from global minimum
        x_min, f_min = TestFunctions.get_global_minimum(self.function_name, self.dimension)
        if x_min is not None and f_min is not None:
            self.distance_to_minimum = np.linalg.norm(self.x_final - x_min)
            self.f_error = abs(self.f_final - f_min)
        else:
            self.distance_to_minimum = None
            self.f_error = None


class FLOPCounter:
    """Tracks floating point operations"""
    def __init__(self):
        self.flops = 0
        self.operation_counts = {
            'add': 0,
            'multiply': 0,
            'divide': 0,
            'sqrt': 0,
            'exp': 0,
            'log': 0,
            'trig': 0
        }

    def add_flops(self, operation: str, count: int = 1):
        self.operation_counts[operation] += count
        # Update total FLOPS based on operation weight
        weights = {
            'add': 1,
            'multiply': 1,
            'divide': 4,
            'sqrt': 8,
            'exp': 10,
            'log': 10,
            'trig': 15
        }
        self.flops += weights[operation] * count

    def get_summary(self) -> dict:
        return {
            'total_flops': self.flops,
            'operations': self.operation_counts
        }

class TestFunctions:
    """Test functions that work with any dimension"""
    @staticmethod
    def get_global_minimum(func_name: str, dimension: int = 2) -> tuple:
        """Get global minimum for a given function and dimension"""
        global_minima = {
            'ackley': (np.zeros(dimension), 0.0),
            'rastrigin': (np.zeros(dimension), 0.0),
            'rosenbrock': (np.ones(dimension), 0.0),
            'sphere': (np.zeros(dimension), 0.0),
            'michalewicz': (None, None),  # Varies with dimension
        }
        return global_minima.get(func_name, (None, None))

    @staticmethod
    def ackley(x: np.ndarray) -> float:
        """Ackley function for n dimensions"""
        n = len(x)
        sum_sq = np.sum(x**2)
        sum_cos = np.sum(np.cos(2 * np.pi * x))
        return (-20 * np.exp(-0.2 * np.sqrt(sum_sq / n))
                - np.exp(sum_cos / n)
                + 20 + np.e)

    @staticmethod
    def ackley_gradient(x: np.ndarray) -> np.ndarray:
        """Gradient of Ackley function"""
        n = len(x)
        sum_sq = np.sum(x**2)
        sum_cos = np.sum(np.cos(2 * np.pi * x))

        term1 = (20 * 0.2 / np.sqrt(n * sum_sq)) * np.exp(-0.2 * np.sqrt(sum_sq / n)) * x
        term2 = (2 * np.pi / n) * np.exp(sum_cos / n) * np.sin(2 * np.pi * x)
        return term1 + term2

    @staticmethod
    def ackley_hessian(x: np.ndarray) -> np.ndarray:
        """Numerical approximation of Ackley Hessian"""
        eps = 1e-8
        n = len(x)
        H = np.zeros((n, n))
        grad = TestFunctions.ackley_gradient

        for i in range(n):
            for j in range(n):
                x_ij = x.copy()
                x_ij[i] += eps
                x_ij[j] += eps
                H[i,j] = (grad(x_ij)[i] - grad(x)[i]) / eps

        return (H + H.T) / 2  # Ensure symmetry

    @staticmethod
    def rastrigin(x: np.ndarray) -> float:
        """Rastrigin function for n dimensions"""
        n = len(x)
        return 10 * n + np.sum(x**2 - 10 * np.cos(2 * np.pi * x))

    @staticmethod
    def rastrigin_gradient(x: np.ndarray) -> np.ndarray:
        """Gradient of Rastrigin function"""
        return 2 * x + 20 * np.pi * np.sin(2 * np.pi * x)

    @staticmethod
    def rastrigin_hessian(x: np.ndarray) -> np.ndarray:
        """Hessian of Rastrigin function"""
        n = len(x)
        return 2 * np.eye(n) + 40 * np.pi**2 * np.diag(np.cos(2 * np.pi * x))

    @staticmethod
    def sphere(x: np.ndarray) -> float:
        """Sphere function for n dimensions"""
        return np.sum(x**2)

    @staticmethod
    def sphere_gradient(x: np.ndarray) -> np.ndarray:
        """Gradient of Sphere function"""
        return 2 * x

    @staticmethod
    def sphere_hessian(x: np.ndarray) -> np.ndarray:
        """Hessian of Sphere function"""
        n = len(x)
        return 2 * np.eye(n)

    @staticmethod
    def rosenbrock(x: np.ndarray) -> float:
        """Rosenbrock function for n dimensions"""
        return np.sum(100.0 * (x[1:] - x[:-1]**2)**2 + (1 - x[:-1])**2)

    @staticmethod
    def rosenbrock_gradient(x: np.ndarray) -> np.ndarray:
        """Gradient of Rosenbrock function"""
        n = len(x)
        grad = np.zeros(n)
        grad[0] = -400 * x[0] * (x[1] - x[0]**2) - 2 * (1 - x[0])
        grad[-1] = 200 * (x[-1] - x[-2]**2)
        if n > 2:
            grad[1:-1] = 200 * (x[1:-1] - x[:-2]**2) - 400 * x[1:-1] * (x[2:] - x[1:-1]**2) - 2 * (1 - x[1:-1])
        return grad

    @staticmethod
    def rosenbrock_hessian(x: np.ndarray) -> np.ndarray:
        """Numerical approximation of Rosenbrock Hessian"""
        eps = 1e-8
        n = len(x)
        H = np.zeros((n, n))
        grad = TestFunctions.rosenbrock_gradient

        for i in range(n):
            for j in range(n):
                x_ij = x.copy()
                x_ij[i] += eps
                x_ij[j] += eps
                H[i,j] = (grad(x_ij)[i] - grad(x)[i]) / eps

        return (H + H.T) / 2  # Ensure symmetry

class OptimizationLogger:
    """Handles logging of optimization progress"""
    def __init__(self, method: str, function_name: str, dimension: int):
        self.method = method
        self.function_name = function_name
        self.dimension = dimension
        self.reset()

    def reset(self):
        self.path = []
        self.f_path = []
        self.grad_norm_path = []
        self.step_sizes = []
        self.memory_usage = []
        self.timestamps = []
        self.start_time = time.time()

    def log_iteration(self, x: np.ndarray, f: float, grad_norm: float, step_size: float):
        self.path.append(x.copy())
        self.f_path.append(f)
        self.grad_norm_path.append(grad_norm)
        self.step_sizes.append(step_size)
        self.memory_usage.append(self.get_memory_usage())
        self.timestamps.append(time.time() - self.start_time)

    @staticmethod
    def get_memory_usage() -> float:
        """Get current memory usage in MB"""
        import psutil
        process = psutil.Process()
        return process.memory_info().rss / 1024 / 1024

    def save_logs(self, base_dir: str):
        """Save optimization logs to CSV"""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        log_dir = os.path.join(base_dir, self.function_name, str(self.dimension) + "D", self.method)
        os.makedirs(log_dir, exist_ok=True)

        log_data = {
            'iteration': range(len(self.path)),
            'function_value': self.f_path,
            'gradient_norm': self.grad_norm_path,
            'step_size': self.step_sizes,
            'memory_mb': self.memory_usage,
            'runtime_seconds': self.timestamps
        }

        # Add parameter values
        for i in range(self.dimension):
            log_data[f'x{i+1}'] = [p[i] for p in self.path]

        df = pd.DataFrame(log_data)
        df.to_csv(os.path.join(log_dir, f'optimization_log_{timestamp}.csv'), index=False)

class Visualizer:
    """Enhanced visualization capabilities"""
    @staticmethod
    def plot_optimization_summary(results: Dict[str, OptimizationResult], save_dir: str, function_name: str):
        """Plot summary comparing initial and final states"""
        if not results:
            return

        fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

        methods = list(results.keys())
        x = np.arange(len(methods))

        # Fix the ticks warning by setting them explicitly
        for ax in [ax1, ax2, ax3, ax4]:
            ax.set_xticks(x)
            ax.set_xticklabels(methods, rotation=45)

        # Function Values Plot
        initial_values = [result.f_initial for result in results.values()]
        final_values = [result.f_final for result in results.values()]
        width = 0.35

        ax1.bar(x - width/2, initial_values, width, label='Initial', color='lightcoral')
        ax1.bar(x + width/2, final_values, width, label='Final', color='lightgreen')
        ax1.set_ylabel('Function Value')
        ax1.set_title('Initial vs Final Function Values')
        ax1.legend()
        ax1.grid(True)

        # Add global minimum line if available
        _, f_min = TestFunctions.get_global_minimum(function_name, results[methods[0]].dimension)
        if f_min is not None:
            ax1.axhline(y=f_min, color='r', linestyle='--', label=f'Global Min ({f_min})')
            ax1.legend()

        # Gradient Norms Plot
        initial_grads = [np.linalg.norm(result.grad_initial) for result in results.values()]
        final_grads = [np.linalg.norm(result.grad_final) for result in results.values()]

        ax2.bar(x - width/2, initial_grads, width, label='Initial', color='lightcoral')
        ax2.bar(x + width/2, final_grads, width, label='Final', color='lightgreen')
        ax2.set_ylabel('Gradient Norm')
        ax2.set_title('Initial vs Final Gradient Norms')
        ax2.legend()
        ax2.grid(True)

        # Runtime Comparison
        runtimes = [result.runtime for result in results.values()]
        ax3.bar(methods, runtimes, color='skyblue')
        ax3.set_ylabel('Runtime (seconds)')
        ax3.set_title('Total Runtime by Method')
        ax3.grid(True)

        # Iterations Comparison
        iterations = [result.iterations for result in results.values()]
        ax4.bar(methods, iterations, color='lightgreen')
        ax4.set_ylabel('Number of Iterations')
        ax4.set_title('Total Iterations by Method')
        ax4.grid(True)

        plt.tight_layout()
        plt.savefig(os.path.join(save_dir, f'optimization_summary_{function_name}.png'), dpi=300, bbox_inches='tight')
        plt.close()

    @staticmethod
    def plot_convergence(results: Dict[str, OptimizationResult], save_dir: str, function_name: str):
        """Plot convergence with enhanced information"""
        if not results:  # Skip if no results
            return

        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
        plotted_something = False

        # Get global minimum if available
        _, f_min = TestFunctions.get_global_minimum(function_name)
        f_min_text = f"(Global min: {f_min})" if f_min is not None else ""

        # Function value convergence
        for method, result in results.items():
            if result and hasattr(result, 'f_path') and result.f_path:  # Check if result and f_path exist
                ax1.semilogy(result.f_path, label=f"{method}")
                plotted_something = True

        if plotted_something:
            ax1.set_xlabel('Iteration')
            ax1.set_ylabel('Function Value (log scale)')
            ax1.set_title(f'Function Value Convergence {f_min_text}')
            ax1.legend()
            ax1.grid(True)

        plotted_something = False
        # Gradient norm convergence
        for method, result in results.items():
            if result and hasattr(result, 'grad_norm_path') and result.grad_norm_path:
                ax2.semilogy(result.grad_norm_path, label=f"{method}")
                plotted_something = True

        if plotted_something:
            ax2.set_xlabel('Iteration')
            ax2.set_ylabel('Gradient Norm (log scale)')
            ax2.set_title('Gradient Norm Convergence')
            ax2.legend()
            ax2.grid(True)

        plt.tight_layout()
        plt.savefig(os.path.join(save_dir, f'convergence_{function_name}.png'), dpi=300)
        plt.close()

    @staticmethod
    def plot_2d_trajectory(f: Callable, result: OptimizationResult, save_dir: str,
                          experiment_num: int = None, equal_aspect: bool = True):
        """Plot optimization trajectory for 2D problems with two variants - with and without global minimum"""
        if result.dimension != 2:
            return

        # Even more reduced resolution for better performance
        grid_size = 50  # Reduced from 100 to 50

        for show_global_min in [True, False]:  # Create both variants
            plt.figure(figsize=(12, 10))

            # Get path bounds and include (0,0)
            path = np.array(result.path)
            x_min_traj = min(float(path[:, 0].min()), 0)
            x_max_traj = max(float(path[:, 0].max()), 0)
            y_min_traj = min(float(path[:, 1].min()), 0)
            y_max_traj = max(float(path[:, 1].max()), 0)

            # Add margin to bounds
            margin = 0.1
            x_range = x_max_traj - x_min_traj
            y_range = y_max_traj - y_min_traj

            if equal_aspect:
                # Make ranges equal by expanding the smaller one
                max_range = max(x_range, y_range)
                x_center = (x_max_traj + x_min_traj) / 2
                y_center = (y_max_traj + y_min_traj) / 2
                x_min_traj = x_center - max_range/2
                x_max_traj = x_center + max_range/2
                y_min_traj = y_center - max_range/2
                y_max_traj = y_center + max_range/2
                x_range = y_range = max_range

            plot_x_min = x_min_traj - margin * x_range
            plot_x_max = x_max_traj + margin * x_range
            plot_y_min = y_min_traj - margin * y_range
            plot_y_max = y_max_traj + margin * y_range

            # Create contour plot with reduced resolution
            x = np.linspace(plot_x_min, plot_x_max, grid_size)
            y = np.linspace(plot_y_min, plot_y_max, grid_size)
            X, Y = np.meshgrid(x, y)

            # Vectorized computation of Z values
            points = np.column_stack((X.ravel(), Y.ravel()))
            Z = np.array([f(point) for point in points]).reshape(X.shape)

            # Reduced number of contour levels
            global_max = float(Z.max())
            global_levels = np.linspace(0, global_max, 15)  # Further reduced from 20 to 15

            # Plot contours with reduced detail
            contour = plt.contour(X, Y, Z, levels=global_levels, cmap='viridis', alpha=0.7)
            plt.colorbar(contour, label='Function Value')

            # Plot trajectory
            plt.plot(path[:, 0], path[:, 1], 'r.-', label='Optimization Path',
                    linewidth=1, markersize=2, zorder=5)
            plt.plot(path[0, 0], path[0, 1], 'go', label='Start',
                    markersize=8, zorder=6)
            plt.plot(path[-1, 0], path[-1, 1], 'ro', label='End',
                    markersize=8, zorder=6)

            # Only plot global minimum in the first variant
            if show_global_min:
                x_min, f_min = TestFunctions.get_global_minimum(result.function_name)
                if x_min is not None:
                    plt.plot(x_min[0], x_min[1], 'k*', label='Global Minimum',
                            markersize=10, zorder=6)

            plt.xlim(float(plot_x_min), float(plot_x_max))
            plt.ylim(float(plot_y_min), float(plot_y_max))

            if equal_aspect:
                plt.gca().set_aspect('equal')

            plt.grid(True)

            plt.title(f'{result.function_name} - {result.method}\n'
                    f'Final value: {result.f_final:.6f}\n'
                    f'Iterations: {result.iterations}')
            plt.xlabel('x₁')
            plt.ylabel('x₂')
            plt.legend()

            # Include experiment number and variant in filename
            experiment_suffix = f'_exp{experiment_num}' if experiment_num is not None else ''
            variant_suffix = '_with_global_min' if show_global_min else '_path_only'
            aspect_suffix = '_equal_aspect' if equal_aspect else ''
            filename = f'trajectory_{result.function_name}_{result.method}{experiment_suffix}{variant_suffix}{aspect_suffix}.png'

            # Save with reduced DPI
            plt.savefig(os.path.join(save_dir, filename), dpi=150, bbox_inches='tight')
            plt.close()

    @staticmethod
    def plot_computational_metrics(results: Dict[str, OptimizationResult], save_dir: str):
        """Plot computational metrics over time"""
        if not results:  # Skip if no results
            return

        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
        plotted_memory = False
        plotted_flops = False

        # Memory usage over time
        for method, result in results.items():
            if result and hasattr(result, 'timestamps') and hasattr(result, 'memory_usage'):
                if result.timestamps and result.memory_usage:  # Check if data exists
                    ax1.plot(result.timestamps, result.memory_usage, label=method)
                    plotted_memory = True

        ax1.set_xlabel('Time (seconds)')
        ax1.set_ylabel('Memory Usage (MB)')
        ax1.set_title('Memory Usage Over Time')
        if plotted_memory:  # Only create legend if we plotted something
            ax1.legend()
        ax1.grid(True)

        # FLOPS over time
        for method, result in results.items():
            if result and hasattr(result, 'timestamps') and hasattr(result, 'flops_per_step'):
                if result.timestamps and result.flops_per_step:  # Check if data exists
                    cumulative_flops = np.cumsum(result.flops_per_step)
                    ax2.plot(result.timestamps, cumulative_flops, label=method)
                    plotted_flops = True

        ax2.set_xlabel('Time (seconds)')
        ax2.set_ylabel('Cumulative FLOPS')
        ax2.set_title('Computational Cost Over Time')
        if plotted_flops:  # Only create legend if we plotted something
            ax2.legend()
        ax2.grid(True)

        plt.tight_layout()
        plt.savefig(os.path.join(save_dir, 'computational_metrics.png'), dpi=300)
        plt.close()

def get_memory_usage() -> float:
    """Get current memory usage in MB"""
    process = psutil.Process()
    return process.memory_info().rss / 1024 / 1024

def run_optimization(f: Callable,
                    grad: Callable,
                    hess: Callable,
                    x0: np.ndarray,
                    method: str,
                    function_name: str) -> OptimizationResult:
    """Enhanced optimization runner with detailed metrics"""
    start_time = time.time()
    flop_counter = FLOPCounter()

    # Calculate initial metrics
    f_initial = f(x0)
    grad_initial = grad(x0)

    # Storage for metrics
    path = [x0.copy()]  # Start with initial point
    f_path = [f_initial]
    grad_norm_path = [np.linalg.norm(grad_initial)]
    timestamps = [0.0]
    memory_usage = [get_memory_usage()]
    flops_per_step = [0]

    def callback(xk):
        current_time = time.time() - start_time

        # Calculate metrics
        f_val = f(xk)
        grad_val = grad(xk)
        grad_norm = np.linalg.norm(grad_val)

        # Store metrics
        path.append(xk.copy())
        f_path.append(f_val)
        grad_norm_path.append(grad_norm)
        timestamps.append(current_time)
        memory_usage.append(get_memory_usage())
        flops_per_step.append(flop_counter.flops)

    try:
        # Run optimization with method-specific settings
        if method == 'BFGS':
            result = minimize(f, x0, method=method, jac=grad, callback=callback)
        elif method == 'newton-cg':
            result = minimize(f, x0, method=method, jac=grad, hess=hess, callback=callback)
        elif method in ['trust-exact', 'trust-krylov']:
            result = minimize(f, x0, method=method, jac=grad, hess=hess, callback=callback)
        else:
            raise ValueError(f"Unsupported method: {method}")

        # Calculate final gradient
        grad_final = grad(result.x)
        runtime = time.time() - start_time

        """
        # Print optimization results with better formatting
        print(f"\n{method} Results:")
        print("-" * 50)
        print(f"{'Initial function value:':<25} {f_initial:>12.6f}")
        print(f"{'Final function value:':<25} {result.fun:>12.6f}")
        print(f"{'Number of iterations:':<25} {result.nit:>12d}")
        print(f"{'Runtime:':<25} {runtime:>12.4f} seconds")
        print(f"{'Initial gradient norm:':<25} {np.linalg.norm(grad_initial):>12.6f}")
        print(f"{'Final gradient norm:':<25} {np.linalg.norm(grad_final):>12.6f}")
        print("-" * 50)
        print()
        """

        return OptimizationResult(
            x_final=result.x,
            f_final=result.fun,
            success=result.success,
            iterations=result.nit,
            runtime=runtime,
            path=path,
            f_path=f_path,
            grad_norm_path=grad_norm_path,
            timestamps=timestamps,
            memory_usage=memory_usage,
            flops_per_step=flops_per_step,
            method=method,
            dimension=len(x0),
            function_name=function_name,
            x_initial=x0,
            f_initial=f_initial,
            grad_initial=grad_initial,
            grad_final=grad_final
        )

    except Exception as e:
        print(f"Optimization failed: {e}")
        return None

In [24]:
class GradientDescent:
    """Basic gradient descent optimizer"""
    def __init__(self, learning_rate=0.01):
        self.learning_rate = learning_rate

    def optimize(self, f: Callable, grad: Callable, x0: np.ndarray,
                max_iter=1000, tol=1e-6, callback=None) -> dict:
        x = x0.copy()
        path = [x.copy()]
        f_path = [f(x)]
        grad_path = [np.linalg.norm(grad(x))]

        for i in range(max_iter):
            g = grad(x)
            x = x - self.learning_rate * g

            path.append(x.copy())
            f_path.append(f(x))
            grad_norm = np.linalg.norm(g)
            grad_path.append(grad_norm)

            if callback:
                callback(x)

            if grad_norm < tol:
                break

        return {
            'x': x,
            'fun': f(x),
            'success': grad_norm < tol,
            'nit': i + 1,
            'path': path,
            'f_path': f_path,
            'grad_path': grad_path,
            'grad_final': grad(x)
        }

class SGD:
    """Stochastic Gradient Descent optimizer"""
    def __init__(self, learning_rate=0.01):
        self.learning_rate = learning_rate

    def optimize(self, f: Callable, grad: Callable, x0: np.ndarray,
                max_iter=1000, tol=1e-6, callback=None) -> dict:
        x = x0.copy()
        path = [x.copy()]
        f_path = [f(x)]
        grad_path = [np.linalg.norm(grad(x))]

        for i in range(max_iter):
            g = grad(x)
            # Add small random noise to simulate stochasticity
            noise = np.random.normal(0, 0.01, size=x.shape)
            g = g + noise
            x = x - self.learning_rate * g

            path.append(x.copy())
            f_path.append(f(x))
            grad_norm = np.linalg.norm(g)
            grad_path.append(grad_norm)

            if callback:
                callback(x)

            if grad_norm < tol:
                break

        return {
            'x': x,
            'fun': f(x),
            'success': grad_norm < tol,
            'nit': i + 1,
            'path': path,
            'f_path': f_path,
            'grad_path': grad_path,
            'grad_final': grad(x)
        }

class MomentumGD:
    """Gradient Descent with Momentum"""
    def __init__(self, learning_rate=0.01, momentum=0.9):
        self.learning_rate = learning_rate
        self.momentum = momentum

    def optimize(self, f: Callable, grad: Callable, x0: np.ndarray,
                max_iter=1000, tol=1e-6, callback=None) -> dict:
        x = x0.copy()
        v = np.zeros_like(x)
        path = [x.copy()]
        f_path = [f(x)]
        grad_path = [np.linalg.norm(grad(x))]

        for i in range(max_iter):
            g = grad(x)
            v = self.momentum * v - self.learning_rate * g
            x = x + v

            path.append(x.copy())
            f_path.append(f(x))
            grad_norm = np.linalg.norm(g)
            grad_path.append(grad_norm)

            if callback:
                callback(x)

            if grad_norm < tol:
                break

        return {
            'x': x,
            'fun': f(x),
            'success': grad_norm < tol,
            'nit': i + 1,
            'path': path,
            'f_path': f_path,
            'grad_path': grad_path,
            'grad_final': grad(x)
        }

class RMSprop:
    """RMSprop optimizer"""
    def __init__(self, learning_rate=0.01, decay_rate=0.9, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.decay_rate = decay_rate
        self.epsilon = epsilon

    def optimize(self, f: Callable, grad: Callable, x0: np.ndarray,
                max_iter=1000, tol=1e-6, callback=None) -> dict:
        x = x0.copy()
        v = np.zeros_like(x)
        path = [x.copy()]
        f_path = [f(x)]
        grad_path = [np.linalg.norm(grad(x))]

        for i in range(max_iter):
            g = grad(x)
            v = self.decay_rate * v + (1 - self.decay_rate) * g**2
            x = x - self.learning_rate * g / (np.sqrt(v) + self.epsilon)

            path.append(x.copy())
            f_path.append(f(x))
            grad_norm = np.linalg.norm(g)
            grad_path.append(grad_norm)

            if callback:
                callback(x)

            if grad_norm < tol:
                break

        return {
            'x': x,
            'fun': f(x),
            'success': grad_norm < tol,
            'nit': i + 1,
            'path': path,
            'f_path': f_path,
            'grad_path': grad_path,
            'grad_final': grad(x)
        }

class Adam:
    """Adam optimizer"""
    def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon

    def optimize(self, f: Callable, grad: Callable, x0: np.ndarray,
                max_iter=1000, tol=1e-6, callback=None) -> dict:
        x = x0.copy()
        m = np.zeros_like(x)
        v = np.zeros_like(x)
        path = [x.copy()]
        f_path = [f(x)]
        grad_path = [np.linalg.norm(grad(x))]

        for i in range(max_iter):
            g = grad(x)
            m = self.beta1 * m + (1 - self.beta1) * g
            v = self.beta2 * v + (1 - self.beta2) * g**2

            # Bias correction
            m_hat = m / (1 - self.beta1**(i + 1))
            v_hat = v / (1 - self.beta2**(i + 1))

            x = x - self.learning_rate * m_hat / (np.sqrt(v_hat) + self.epsilon)

            path.append(x.copy())
            f_path.append(f(x))
            grad_norm = np.linalg.norm(g)
            grad_path.append(grad_norm)

            if callback:
                callback(x)

            if grad_norm < tol:
                break

        return {
            'x': x,
            'fun': f(x),
            'success': grad_norm < tol,
            'nit': i + 1,
            'path': path,
            'f_path': f_path,
            'grad_path': grad_path,
            'grad_final': grad(x)
        }

In [25]:
class ExperimentManager:
    """Manages multiple optimization experiments"""
    def __init__(self,
                 test_functions: Dict[str, Tuple[Callable, Callable, Callable]],
                 first_order_optimizers: Dict[str, object],
                 second_order_methods: List[str],
                 dimensions: List[int],
                 n_experiments: int = 50,
                 min_dist: float = 100,
                 max_dist: float = 1000):
        self.test_functions = test_functions
        self.first_order_optimizers = first_order_optimizers
        self.second_order_methods = second_order_methods
        self.dimensions = dimensions
        self.n_experiments = n_experiments
        self.min_dist = min_dist
        self.max_dist = max_dist

    def generate_starting_points(self, dimension: int, seed: int = None) -> np.ndarray:
        """Generate random starting points with specified distance from origin"""
        if seed is not None:
            np.random.seed(seed)

        starting_points = []

        for _ in range(self.n_experiments):
            direction = np.random.randn(dimension)
            direction = direction / np.linalg.norm(direction)
            distance = np.random.uniform(self.min_dist, self.max_dist)
            point = direction * distance
            starting_points.append(point)

        return np.array(starting_points)

    def run_experiments(self, base_dir: str = "optimization_results"):
        """Run all experiments with proper directory structure"""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        experiment_dir = os.path.join(base_dir, f"experiment_{timestamp}")
        all_results = []

        for func_name, (f, grad, hess) in self.test_functions.items():
            print(f"\nTesting {func_name} function:")

            for dim in self.dimensions:
                print(f"\nDimension: {dim}")

                # Generate starting points for this dimension
                starting_points = self.generate_starting_points(dim)

                # Create directory structure
                func_dir = os.path.join(experiment_dir, func_name, f"{dim}D")
                first_order_dir = os.path.join(func_dir, "first_order")
                second_order_dir = os.path.join(func_dir, "second_order")

                for directory in [first_order_dir, second_order_dir]:
                    os.makedirs(os.path.join(directory, "results"), exist_ok=True)
                    os.makedirs(os.path.join(directory, "trajectories"), exist_ok=True)

                # Run experiments for each starting point
                for i, x0 in enumerate(starting_points):
                    print(f"\nExperiment {i+1}/{self.n_experiments}")

                    # First order methods
                    first_order_results = {}
                    print("\nFirst-order methods:")
                    print("-" * 50)
                    for name, optimizer in self.first_order_optimizers.items():
                        print(f"Running {name}...")
                        try:
                            # Create logger for this optimization run
                            logger = OptimizationLogger(name, func_name, dim)

                            # Create wrapper function for callback
                            def callback(xk):
                                f_val = f(xk)
                                grad_val = grad(xk)
                                grad_norm = np.linalg.norm(grad_val)
                                step_size = 0.0  # We don't track step size for these methods
                                logger.log_iteration(xk, f_val, grad_norm, step_size)

                            start_time = time.time()
                            result = optimizer.optimize(f, grad, x0, callback=callback)
                            runtime = time.time() - start_time

                            print(f"  Runtime: {runtime:.3f} seconds")
                            print(f"  Iterations: {result['nit']}")
                            print(f"  Final value: {result['fun']:.6f}")
                            print(f"  Success: {result['success']}")
                            print(f"  Final gradient norm: {np.linalg.norm(result['grad_final']):.6f}")

                            opt_result = OptimizationResult(
                                x_final=result['x'],
                                f_final=result['fun'],
                                success=result['success'],
                                iterations=result['nit'],
                                runtime=time.time() - logger.start_time,
                                path=result['path'],
                                f_path=result['f_path'],
                                grad_norm_path=result['grad_path'],
                                timestamps=logger.timestamps,
                                memory_usage=logger.memory_usage,
                                method=name,
                                dimension=dim,
                                function_name=func_name,
                                x_initial=x0,
                                f_initial=f(x0),
                                grad_initial=grad(x0),
                                grad_final=result['grad_final']
                            )

                            first_order_results[name] = opt_result
                            all_results.append(self._format_result(opt_result, "first_order", i))
                            print("Done")

                            print("-" * 20)
                            print()
                        except Exception as e:
                            print(f"Failed: {str(e)}")

                    # Second order methods
                    second_order_results = {}
                    print("\nSecond-order methods:")
                    print("-" * 50)
                    for method in self.second_order_methods:
                        print(f"Running {method}...")
                        try:
                            result = run_optimization(f, grad, hess, x0, method, func_name)

                            print(f"  Runtime: {result.runtime:.3f} seconds")
                            print(f"  Iterations: {result.iterations}")
                            print(f"  Final value: {result.f_final:.6f}")
                            print(f"  Success: {result.success}")
                            print(f"  Final gradient norm: {np.linalg.norm(result.grad_final):.6f}")

                            if result is not None:
                                second_order_results[method] = result
                                all_results.append(self._format_result(result, "second_order", i))
                                print("Done")
                            else:
                                print("Failed")

                            print("-" * 20)
                            print()
                        except Exception as e:
                            print(f"Failed: {str(e)}")

                    # Generate plots
                    self._generate_plots(first_order_results, first_order_dir, f, i, dim)
                    self._generate_plots(second_order_results, second_order_dir, f, i, dim)

                # Generate statistical plots for this dimension
                for results, dir_path in [(first_order_results, first_order_dir),
                                        (second_order_results, second_order_dir)]:
                    if results:
                        results_df = pd.DataFrame([r for r in all_results
                                                if r['function'] == func_name and
                                                r['dimension'] == dim and
                                                r['method'] in results.keys()])
                        generate_statistics(results_df, dir_path)

        # Save complete results and generate overall statistics
        results_df = pd.DataFrame(all_results)
        results_df.to_csv(os.path.join(experiment_dir, "all_results.csv"), index=False)
        generate_statistics(results_df, experiment_dir)

    def _generate_plots(self, results: Dict[str, OptimizationResult],
                   base_dir: str, f: Callable, exp_num: int, dim: int):
        """Generate all plots for a set of results"""
        if not results:
            return

        results_dir = os.path.join(base_dir, "results")
        trajectory_dir = os.path.join(base_dir, "trajectories")

        # Only generate plots if we have results
        if results:
            # Get function name from first result
            func_name = next(iter(results.values())).function_name

            # Generate convergence plots
            if any(hasattr(r, 'f_path') and r.f_path for r in results.values()):
                Visualizer.plot_convergence(results, results_dir, f"{func_name}_exp{exp_num}")

            # Generate computational metrics
            if any(hasattr(r, 'timestamps') and r.timestamps for r in results.values()):
                Visualizer.plot_computational_metrics(results, results_dir)

            # Generate optimization summary
            if results:
                Visualizer.plot_optimization_summary(results, results_dir, f"{func_name}_exp{exp_num}")

            # Generate 2D trajectories if applicable
            if dim == 2:
                for method, result in results.items():
                    if result is not None:
                        try:
                            Visualizer.plot_2d_trajectory(f, result, trajectory_dir, exp_num)
                        except Exception as e:
                            print(f"Error plotting trajectory for {method}: {e}")

    def _format_result(self, result: OptimizationResult, opt_type: str, exp_num: int) -> dict:
        """Format OptimizationResult for DataFrame"""
        return {
            'function': result.function_name,
            'dimension': result.dimension,
            'experiment': exp_num,
            'method': result.method,
            'optimizer_type': opt_type,
            'start_distance': np.linalg.norm(result.x_initial),
            'final_value': result.f_final,
            'iterations': result.iterations,
            'runtime': result.runtime,
            'success': result.success,
            'distance_to_minimum': result.distance_to_minimum,
            'f_error': result.f_error,
            'initial_gradient_norm': np.linalg.norm(result.grad_initial),
            'final_gradient_norm': np.linalg.norm(result.grad_final)
        }


import warnings

def generate_statistics(results_df: pd.DataFrame, save_dir: str):
    """Generate comprehensive statistical visualizations for optimization results"""
    # Set style and color palette
    plt.style.use('default')
    colors = ['#FF9999', '#66B2FF', '#99FF99', '#FFCC99', '#FF99CC', '#99CCFF']

    if len(results_df) == 0:
        return

    # 1. Distribution of final values (Violin Plot)
    plt.figure(figsize=(12, 6))
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        sns.violinplot(data=results_df, x='method', y='final_value', color=colors[0])
    plt.title('Distribution of Final Values by Method')
    plt.xlabel('Optimization Method')
    plt.ylabel('Final Function Value')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, 'final_values_distribution.png'), dpi=300, bbox_inches='tight')
    plt.close()

    # 2. Success rates with confidence intervals
    plt.figure(figsize=(10, 6))
    success_data = results_df.groupby('method')['success'].agg(['mean', 'count'])
    success_data['ci'] = 1.96 * np.sqrt(success_data['mean'] * (1 - success_data['mean']) / success_data['count'])

    # Use bar plot instead of success_data['mean'].plot
    plt.bar(range(len(success_data)), success_data['mean'], yerr=success_data['ci'], capsize=5, color=colors[1])
    plt.xticks(range(len(success_data)), success_data.index, rotation=45)
    plt.title('Success Rates by Method with 95% Confidence Intervals')
    plt.ylabel('Success Rate')
    plt.xlabel('Method')
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, 'success_rates.png'), dpi=300, bbox_inches='tight')
    plt.close()

    # 3. Runtime comparison (Box Plot)
    plt.figure(figsize=(12, 6))
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        sns.boxplot(data=results_df, x='method', y='runtime', color=colors[2])
    plt.title('Runtime Distribution by Method')
    plt.xlabel('Method')
    plt.ylabel('Runtime (seconds)')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, 'runtime_distribution.png'), dpi=300, bbox_inches='tight')
    plt.close()

    # 4. Convergence efficiency (Iterations vs Final Value)
    plt.figure(figsize=(10, 6))
    methods = results_df['method'].unique()
    for i, method in enumerate(methods):
        mask = results_df['method'] == method
        plt.scatter(results_df[mask]['iterations'],
                   results_df[mask]['final_value'],
                   label=method,
                   color=colors[i % len(colors)],
                   alpha=0.7)
    plt.title('Convergence Efficiency')
    plt.xlabel('Number of Iterations')
    plt.ylabel('Final Function Value')
    plt.yscale('log')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, 'convergence_efficiency.png'), dpi=300, bbox_inches='tight')
    plt.close()

    # 5. Distance to minimum vs Runtime
    if 'distance_to_minimum' in results_df.columns:
        plt.figure(figsize=(10, 6))
        for i, method in enumerate(methods):
            mask = results_df['method'] == method
            plt.scatter(results_df[mask]['runtime'],
                       results_df[mask]['distance_to_minimum'],
                       label=method,
                       color=colors[i % len(colors)],
                       alpha=0.7)
        plt.title('Distance to Minimum vs Runtime')
        plt.xlabel('Runtime (seconds)')
        plt.ylabel('Distance to Global Minimum')
        plt.yscale('log')
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()
        plt.savefig(os.path.join(save_dir, 'distance_vs_runtime.png'), dpi=300, bbox_inches='tight')
        plt.close()

    # Generate summary statistics table
    summary_stats = results_df.groupby('method').agg({
        'final_value': ['mean', 'std', 'min', 'max'],
        'runtime': ['mean', 'std'],
        'iterations': ['mean', 'std'],
        'success': 'mean',
        'distance_to_minimum': ['mean', 'std'] if 'distance_to_minimum' in results_df.columns else None
    }).round(4)

    # Save summary statistics
    summary_stats.to_csv(os.path.join(save_dir, 'summary_statistics.csv'))

    return summary_stats

# Github and main() function

In [26]:
import os
import getpass
from github import Github
import git
import json
from pathlib import Path
import time
import random

class AuthManager:
    def __init__(self):
        self.config_file = Path.home() / '.optimization_config'
        self.credentials = self.load_credentials()

    def load_credentials(self):
        if self.config_file.exists():
            try:
                with open(self.config_file, 'r') as f:
                    return json.load(f)
            except:
                return None
        return None

    def save_credentials(self, credentials):
        with open(self.config_file, 'w') as f:
            json.dump(credentials, f)

    def authenticate(self):
        if self.credentials:
            print("Found existing credentials. Would you like to use them? (y/n)")
            if input().lower() == 'y':
                return self.credentials

        print("\n=== GitHub Authentication ===")
        print("Please provide your GitHub credentials to continue.")
        print("Note: Your personal access token needs repo and workflow permissions.")
        print("\nDon't have a token? Create one at: https://github.com/settings/tokens")

        github_username = input("\nGitHub Username: ")
        github_token = getpass.getpass("Personal Access Token: ")
        repo_name = input("Repository Name (format: username/repo): ")

        # Verify credentials
        try:
            g = Github(github_token)
            user = g.get_user()
            _ = user.get_repo(repo_name.split('/')[1])

            credentials = {
                'username': github_username,
                'token': github_token,
                'repo': repo_name
            }

            print("\nAuthentication successful!")

            # Ask to save credentials
            print("Would you like to save these credentials for future use? (y/n)")
            if input().lower() == 'y':
                self.save_credentials(credentials)
                print("Credentials saved!")

            return credentials

        except Exception as e:
            print(f"\nAuthentication failed: {str(e)}")
            print("Please try again.")
            return self.authenticate()

class GitManager:
    def __init__(self, credentials):
        self.credentials = credentials
        # Format the URL with the token in a different way
        self.token = credentials['token']
        self.repo_url = f"https://x-access-token:{self.token}@github.com/{credentials['repo']}.git"
        self.instance_id = f"optimization_{random.randint(1000, 9999)}"
        self.repo = None

    def setup_repo(self, local_path):
        """Initialize or clone the repository"""
        try:
            print(f"Creating directory: {local_path}")
            Path(local_path).mkdir(parents=True, exist_ok=True)

            # Configure git globally
            with git.Git().custom_environment(GIT_SSL_NO_VERIFY='true'):
                try:
                    print("Checking if directory is a git repository...")
                    self.repo = git.Repo(local_path)
                    print("Existing repository found")

                    # Update remote URL with credentials
                    origin = self.repo.remote('origin')
                    origin.set_url(self.repo_url)
                    print("Remote URL updated")

                except git.exc.InvalidGitRepositoryError:
                    print("Initializing new git repository...")
                    self.repo = git.Repo.init(local_path)
                    print("Repository initialized")

                    print("Adding remote origin...")
                    origin = self.repo.create_remote('origin', self.repo_url)
                    print("Remote added")

                    # Configure git credentials
                    config_writer = self.repo.config_writer()
                    config_writer.set_value("http", "sslVerify", "false")
                    config_writer.release()

                    print("Fetching from remote...")
                    origin.fetch()
                    print("Fetch completed")

                    print("Setting up main branch...")
                    if 'main' not in self.repo.refs:
                        self.repo.create_head('main', origin.refs.main)
                    self.repo.heads.main.set_tracking_branch(origin.refs.main)
                    self.repo.heads.main.checkout()
                    print("Main branch setup completed")

                    print("Pulling latest changes...")
                    origin.pull('main')
                    print("Pull completed")

        except Exception as e:
            print(f"Error setting up repository: {str(e)}")
            raise

    def push_results(self, local_path):
        """Push results to GitHub with conflict resolution"""
        max_retries = 5
        retry_count = 0

        while retry_count < max_retries:
            try:
                print(f"\nAttempting to push results (attempt {retry_count + 1}/{max_retries})...")

                # Configure git environment for this operation
                env = {
                    'GIT_SSL_NO_VERIFY': 'true',
                    'GIT_TERMINAL_PROMPT': '0',
                    'GIT_USERNAME': 'x-access-token',
                    'GIT_PASSWORD': self.token
                }

                with self.repo.git.custom_environment(**env):
                    print("Creating new branch...")
                    current = self.repo.create_head(self.instance_id)
                    current.checkout()
                    print(f"Created and checked out branch: {self.instance_id}")

                    print("Pulling latest changes from main...")
                    self.repo.remotes.origin.pull('main')
                    print("Pull completed")

                    print("Adding new files...")
                    self.repo.index.add('*')
                    print("Files added")

                    print("Committing changes...")
                    self.repo.index.commit(f"Results update from {self.instance_id}")
                    print("Changes committed")

                    print("Pushing to remote...")
                    push_info = self.repo.remotes.origin.push(self.instance_id)
                    print("Push completed")

                    print("Creating pull request...")
                    self.create_pull_request()

                    print(f"\nResults successfully pushed to branch: {self.instance_id}")
                    print("Pull request created for review.")
                    break

            except git.exc.GitCommandError as e:
                print(f"Git error: {e}")
                retry_count += 1
                wait_time = random.uniform(1, 5)
                print(f"Waiting {wait_time:.2f} seconds before retry...")
                time.sleep(wait_time)

            except Exception as e:
                print(f"Unexpected error: {e}")
                print(f"Error type: {type(e)}")
                break

    def create_pull_request(self):
        """Create a pull request using GitHub API"""
        try:
            g = Github(self.token)
            repo = g.get_repo(self.credentials['repo'])

            pr = repo.create_pull(
                title=f"Results update from {self.instance_id}",
                body="Automated results update from optimization experiment",
                head=self.instance_id,
                base="main"
            )
            print(f"Created PR: {pr.html_url}")
        except Exception as e:
            print(f"Failed to create PR: {e}")

def setup_experiment():
    """Initial setup and authentication"""
    print("\n=== Optimization Experiment Setup ===")

    # Authenticate
    auth_manager = AuthManager()
    credentials = auth_manager.authenticate()

    if not credentials:
        print("Authentication failed. Cannot continue.")
        return None

    return credentials


In [None]:
def main():
    # Get credentials
    credentials = setup_experiment()
    if not credentials:
        return

    # Initialize Git manager
    git_manager = GitManager(credentials)

    # Setup local repository
    local_path = "optimization_results"
    try:
        git_manager.setup_repo(local_path)
    except Exception as e:
        print(f"Failed to setup repository: {e}")
        return

    test_functions = {
        'ackley': (
            TestFunctions.ackley,
            TestFunctions.ackley_gradient,
            TestFunctions.ackley_hessian
        )

    }
    """,
        'rastrigin': (
            TestFunctions.rastrigin,
            TestFunctions.rastrigin_gradient,
            TestFunctions.rastrigin_hessian
        ),
        'sphere': (
            TestFunctions.sphere,
            TestFunctions.sphere_gradient,
            TestFunctions.sphere_hessian
        ),
        'rosenbrock': (
            TestFunctions.rosenbrock,
            TestFunctions.rosenbrock_gradient,
            TestFunctions.rosenbrock_hessian
        )"""

    # First-order optimizers with their configurations
    first_order_optimizers = {
        'gradient_descent': GradientDescent(learning_rate=0.01),
        'sgd': SGD(learning_rate=0.01),
        'momentum': MomentumGD(learning_rate=0.01, momentum=0.9),
        'rmsprop': RMSprop(learning_rate=0.01, decay_rate=0.9),
        'adam': Adam(learning_rate=0.001)
    }

    # Second-order methods
    # second_order_methods = ['BFGS', 'newton-cg', 'trust-exact', 'trust-krylov']
    second_order_methods = ["trust-exact", "trust-krylov"]

    dimensions = [32, 128] # [2, 4, 8, 16, 32, 64, 128]

    # Create experiment manager
    experiment = ExperimentManager(
        test_functions=test_functions,
        first_order_optimizers=first_order_optimizers,
        second_order_methods=second_order_methods,
        dimensions=dimensions,
        n_experiments=1,
        min_dist=5,
        max_dist=64
    )

    # Run experiments
    experiment.run_experiments()

    # Push results to GitHub
    print("\nPushing results to GitHub...")
    git_manager.push_results(local_path)

if __name__ == "__main__":
    main()


=== Optimization Experiment Setup ===
Found existing credentials. Would you like to use them? (y/n)
y
Creating directory: optimization_results
Checking if directory is a git repository...
Existing repository found
Remote URL updated

Testing ackley function:

Dimension: 32

Experiment 1/1

First-order methods:
--------------------------------------------------
Running gradient_descent...
  Runtime: 0.102 seconds
  Iterations: 553
  Final value: 3.186968
  Success: True
  Final gradient norm: 0.000001
Done
--------------------

Running sgd...
  Runtime: 0.190 seconds
  Iterations: 1000
  Final value: 3.186976
  Success: False
  Final gradient norm: 0.007089
Done
--------------------

Running momentum...
  Runtime: 0.056 seconds
  Iterations: 295
  Final value: 3.186968
  Success: True
  Final gradient norm: 0.000001
Done
--------------------

Running rmsprop...
  Runtime: 0.190 seconds
  Iterations: 1000
  Final value: 3.188302
  Success: False
  Final gradient norm: 0.094410
Done
----