# Example Optimiser

In [34]:
import os 
import sys
import json
sys.path.append(os.path.abspath(".."))

from velopix_wrappers.parameter_optimisers import optimiserBase
from velopix_wrappers.velopix_pipeline import TrackFollowingPipeline, GraphDFSPipeline, SearchByTripletTriePipeline
from typing import Any, Dict
import random
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C


## Implement the optimiser child class

In [35]:
from typing import Any, Dict
import random
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

class ExampleOptimiser(optimiserBase):
    def __init__(self, learning_rate, max_iterations=100, target_score=0.3):
        super().__init__()
        self.learning_rate = learning_rate
        self.max_iterations = max_iterations
        self.target_score = target_score
        self.best_score = float("inf")
        self.current_iteration = 0

        # To store previous evaluations
        self.X = []  # Parameter sets (input)
        self.Y = []  # Objective function results (output)

        # Gaussian Process initialization
        kernel = C(1.0, (1e-4, 1e1)) * RBF(1.0, (1e-4, 1e1))
        self.gpr = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10)

    def init(self) -> Dict[str, Any]:
        """
        Initializes the optimization process by setting an initial parameter map.
        """
        pMap = self._algorithm.get_config()  # Get a copy of the parameter schema

        initial_param_set = {}

        for key, (expected_type, _) in pMap.items():
            if expected_type == float:
                initial_param_set[key] = random.uniform(0, 1)  # Random float between 0 and 1
            elif expected_type == int:
                initial_param_set[key] = random.randint(0, 10)  # Random integer between 0 and 10
            elif expected_type == bool:
                initial_param_set[key] = random.choice([True, False])  # Random boolean
            elif expected_type == list:
                initial_param_set[key] = []  # Assign an empty list (or populate it if needed)

        # Evaluate the initial point
        self.X.append(list(initial_param_set.values()))
        self.Y.append(self.objective_func(initial_param_set))

        print(f"Initial parameter set: {initial_param_set}")

        return initial_param_set

    def next(self) -> Dict[str, Any]:
        """
        Uses Bayesian Optimization to generate the next parameter map by predicting the next best set.
        """
        if len(self.X) > 1:
            # Fit Gaussian Process model
            self.gpr.fit(self.X, self.Y)

        # Generate the next set of parameters using Bayesian Optimization
        next_param_set = self._predict_next()

        self.X.append(list(next_param_set.values()))  # Add the new point to X
        self.Y.append(self.objective_func(next_param_set))  # Evaluate the next point and add to Y

        return next_param_set

    def _predict_next(self) -> Dict[str, Any]:
        """
        Predicts the next parameter set using the Gaussian Process model.
        """
        # For simplicity, we'll randomly sample a set of points to evaluate and pick the one with the best acquisition value.
        pMap = self._algorithm.get_config()
        param_space = []

        for key, (expected_type, _) in pMap.items():
            if expected_type == float:
                param_space.append(np.linspace(0, 1, 10))  # 10 candidates for floats
            elif expected_type == int:
                param_space.append(np.arange(0, 11))  # 10 candidates for integers
            elif expected_type == bool:
                param_space.append([True, False])  # 2 candidates for boolean
            elif expected_type == list:
                param_space.append([[]])  # Simplified for now

        # Now you can use these candidates to query the acquisition function (simplified)
        best_candidate = None
        best_acquisition_value = float('inf')
        
        for candidate in self._generate_candidates(param_space):
            X_candidate = np.array(candidate).reshape(1, -1)
            acquisition_value = self._acquisition_function(X_candidate)
            if acquisition_value < best_acquisition_value:
                best_candidate = candidate
                best_acquisition_value = acquisition_value

        # Convert the best candidate back into the parameter set format
        pMap = self._algorithm.get_config()
        next_param_set = {key: value for key, value in zip(pMap.keys(), best_candidate)}

        print(f"Next parameter set: {next_param_set}")

        return next_param_set

    def _generate_candidates(self, param_space):
        """
        Generates candidates from the parameter space.
        """
        # In this case, we're just randomly sampling from the parameter space for simplicity
        candidates = list(np.array(np.meshgrid(*param_space)).T.reshape(-1, len(param_space)))
        return candidates

    def _acquisition_function(self, X_candidate):
        """
        Acquisition function to guide the optimization.
        For simplicity, using a simple negative expected improvement here.
        """
        mean, std = self.gpr.predict(X_candidate, return_std=True)
        return -mean  # Expected improvement simplification: pick the most uncertain area

    def objective_func(self, param_set: Dict[str, Any]) -> float:
        """
        Converts the results of an experiment into a numeric score.
        In this example, we simulate a loss function that we aim to minimize.
        """
        # Fake evaluation function
        # Use the actual parameters and compute the score from the experiment
        return abs(self.learning_rate - 0.05) + random.uniform(0, 0.01)

    def is_finished(self) -> bool:
        """
        Determines if the optimization process is finished.
        In this case, it stops after `max_iterations` iterations or the target score is reached.
        """
        return self.best_score < self.target_score or self.current_iteration >= self.max_iterations


**Load event data**

In [36]:
events = []
n_files = 100

for i in range(0, n_files):
    if i == 51:
        """
        There's an issue with event 51 -> module_prefix_sum contains value 79 twice resulting in and indexing error when loading the event
        """
        print(f"Skipping problematic file: velo_event_{i}.json")
    else:    
        print(f"Loading file: velo_event_{i}.json")
        event_file = open(os.path.join("../DB/raw", f"velo_event_{i}.json"))
        json_data = json.loads(event_file.read())
        events.append(json_data)
        event_file.close()

Loading file: velo_event_0.json
Loading file: velo_event_1.json
Loading file: velo_event_2.json
Loading file: velo_event_3.json
Loading file: velo_event_4.json
Loading file: velo_event_5.json
Loading file: velo_event_6.json
Loading file: velo_event_7.json
Loading file: velo_event_8.json
Loading file: velo_event_9.json
Loading file: velo_event_10.json
Loading file: velo_event_11.json
Loading file: velo_event_12.json
Loading file: velo_event_13.json
Loading file: velo_event_14.json
Loading file: velo_event_15.json
Loading file: velo_event_16.json
Loading file: velo_event_17.json
Loading file: velo_event_18.json
Loading file: velo_event_19.json
Loading file: velo_event_20.json
Loading file: velo_event_21.json
Loading file: velo_event_22.json
Loading file: velo_event_23.json
Loading file: velo_event_24.json
Loading file: velo_event_25.json
Loading file: velo_event_26.json
Loading file: velo_event_27.json
Loading file: velo_event_28.json
Loading file: velo_event_29.json
Loading file: velo_e

In [37]:
pipeline = TrackFollowingPipeline(events=events, intra_node=False)

In [38]:
Optimiser = ExampleOptimiser(learning_rate=0.05)
optimal_parameters = pipeline.optimise_parameters(Optimiser, max_runs=10) # DO NOT remove max_runs, chances are that this will run forever

Initial parameter set: {'x_slope': 0.2060155271473174, 'y_slope': 0.7105788288748318, 'x_tol': 0.40135974686221565, 'y_tol': 0.6578517963786719, 'scatter': 0.5910565458299496}


Optimising:  10%|█         | 1/10 [00:01<00:14,  1.63s/it]

Next parameter set: {'x_slope': np.float64(0.0), 'y_slope': np.float64(0.0), 'x_tol': np.float64(0.0), 'y_tol': np.float64(0.0), 'scatter': np.float64(0.0)}




Next parameter set: {'x_slope': np.float64(0.0), 'y_slope': np.float64(0.0), 'x_tol': np.float64(0.0), 'y_tol': np.float64(0.0), 'scatter': np.float64(0.0)}


Optimising:  30%|███       | 3/10 [00:18<00:43,  6.27s/it]


KeyboardInterrupt: 

In [None]:
print(optimal_parameters) # Note these are just here for example...

{}
