# Example Optimiser

In [3]:
import os
import sys
import json
sys.path.append(os.path.abspath(".."))
from velopix_wrappers.parameter_optimisers import optimiserBase
from velopix_wrappers.velopix_pipeline import TrackFollowingPipeline, GraphDFSPipeline, SearchByTripletTriePipeline
from typing import Any, Dict
import numpy as np
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

In [None]:
from typing import Any, Dict
import random
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from scipy.stats import norm 

class BayesianOptimiser(optimiserBase):
    def __init__(self, learning_rate, n_candidates, acq_xi, acq_kappa, max_iterations=100, n_initial=5, seed=42):
        super().__init__(Objective="min", auto_eval={"autoEval": True, "nested": False, "weights": [3,5,10]})
        self.learning_rate = learning_rate
        self.n_initial  = n_initial
        self.n_candidates = n_candidates
        self.current_iteration = 0
        self.max_iterations = max_iterations
        self.random_state = np.random.RandomState(seed)

        self.X = []  # Parameter sets (input)
        self.Y = []  # Objective function results (output)

        # Gaussian Process initialization
        
        kernel = C(1.0, (1e-4,1e1)) * RBF(1.0, (1e-4,1e1))
        self.gpr = GaussianProcessRegressor(
            kernel=kernel,
            normalize_y=True,
            alpha=1e-6,
            n_restarts_optimizer=10,
            random_state=self.random_state
        )

        self.acq_xi = acq_xi
        self.acq_kappa = acq_kappa
    
    def init(self) -> Dict[str, Any]:
        """ 
        It provides the initial set of parameters to be evaluated. 
        This set is generated randomly within the bounds.
        """

        params_config = self._algorithm.get_config()
        bounds = self._algorithm.get_bounds()

        param_map = {}

        for key, (expected_type, _) in params_config.items():
            low, high = bounds[key]
            if expected_type is float:
                param_map[key] = float(self.random_state.uniform(low, high))
            elif expected_type is int:
                param_map[key] = int(self.random_state.randint(low, high + 1))
            elif expected_type is bool:
                param_map[key] = bool(self.random_state.choice([False, True]))
            elif expected_type is list:
                param_map[key] = []  
            else:
                raise NotImplementedError(f"Unsupported type: {type}")
        print(f"Initial parameters: {param_map}")
        self.prev_config = param_map
        return param_map
 
    def add_run(self, results) -> None:
        self.run = results
        if self.auto_evaluate:
            self._evaluate_run(weight=self.weights, nested=self.nested)
        print(f"Results: {results}")


    def is_finished(self) -> bool:
        if self.current_iteration >= self.max_iterations:
            print("Max iterations reached. Stopping optimization.")
            return True
    
    def next(self) -> Dict[str, Any]:
        """
        1) Record the last run’s data into X, Y
        2) If <2 points, just sample a random map
        3) Otherwise fit GP, score n_candidates by EI, pick best
        4) Return the next param_map
        """
        # 1) record last point
        x_prev = self._dict_to_vector(self.prev_config)
        print(f"Previous parameters: {x_prev}")
        self.X.append(x_prev)
        y_prev = self.score_history[-1]    # score just recorded by add_run()
        self.Y.append(y_prev)

        # 2) bootstrap with one more random draw
        if len(self.X) < 2:
            next_map = self._sample_random_map()
        else:
            # 3) fit GP
            self.gpr.fit(np.vstack(self.X), np.array(self.Y))

            # generate and score candidates
            candidates = [self._sample_random_map() 
                          for _ in range(self.n_candidates)]
            acq_vals = []
            for pm in candidates:
                x_c = self._dict_to_vector(pm)
                mu, sigma = self.gpr.predict(x_c, return_std=True)
                # Expected Improvement (EI) for minimization
                z  = (self.best_score - mu) / sigma
                ei = sigma * (z * norm.cdf(z) + norm.pdf(z))
                acq_vals.append(ei)

            # 4) pick best
            best_idx = int(np.argmax(acq_vals))
            next_map = candidates[best_idx]

        # 5) update iteration count and prev_config
        self.current_iteration += 1
        self.prev_config = next_map
        print(f"Next parameters: {next_map}")
        return next_map

    # You’ll also need these helpers (if you haven’t already):
    def _sample_random_map(self) -> Dict[str,Any]:
        schema = self._algorithm.get_config()
        bounds = self._algorithm.get_bounds()
        pm = {}
        for key, (typ, _) in schema.items():
            low, high = bounds[key]
            if typ is float:
                pm[key] = float(self.random_state.uniform(low,high))
            elif typ is int:
                pm[key] = int(self.random_state.randint(low, high+1))
            elif typ is bool:
                pm[key] = bool(self.random_state.choice([False,True]))
            else:
                pm[key] = []  # or your own logic for lists/categoricals
        return pm

    def _dict_to_vector(self, pm: Dict[str,Any]) -> np.ndarray:
        schema = self._algorithm.get_config()
        xs = []
        for key, (typ, _) in schema.items():
            v = pm[key]
            if typ is bool:
                xs.append(1.0 if v else 0.0)
            elif typ in (int, float):
                xs.append(float(v))
            else:
                xs.append(float(len(v)))  # e.g. length for lists
        return np.array(xs).reshape(1,-1)
    
    def objective_func(self, w: list[float], nested: bool = False) -> float:
        if nested:
            return self.intra_event_objective(w)
        return self.event_objective(w)
                

In [5]:
events = []
n_files = 100

for i in range(0, n_files):
    if i == 51:
        """
        There's an issue with event 51 -> module_prefix_sum contains value 79 twice resulting in and indexing error when loading the event
        """
        print(f"Skipping problematic file: velo_event_{i}.json")
    else:    
        print(f"Loading file: velo_event_{i}.json")
        event_file = open(os.path.join("../DB/raw", f"velo_event_{i}.json"))
        json_data = json.loads(event_file.read())
        events.append(json_data)
        event_file.close()

Loading file: velo_event_0.json
Loading file: velo_event_1.json
Loading file: velo_event_2.json
Loading file: velo_event_3.json
Loading file: velo_event_4.json
Loading file: velo_event_5.json
Loading file: velo_event_6.json
Loading file: velo_event_7.json
Loading file: velo_event_8.json
Loading file: velo_event_9.json
Loading file: velo_event_10.json
Loading file: velo_event_11.json
Loading file: velo_event_12.json
Loading file: velo_event_13.json
Loading file: velo_event_14.json
Loading file: velo_event_15.json
Loading file: velo_event_16.json
Loading file: velo_event_17.json
Loading file: velo_event_18.json
Loading file: velo_event_19.json
Loading file: velo_event_20.json
Loading file: velo_event_21.json
Loading file: velo_event_22.json
Loading file: velo_event_23.json
Loading file: velo_event_24.json
Loading file: velo_event_25.json
Loading file: velo_event_26.json
Loading file: velo_event_27.json
Loading file: velo_event_28.json
Loading file: velo_event_29.json
Loading file: velo_e

**Load event data**

In [6]:
pipeline = TrackFollowingPipeline(events=events, intra_node=False)
Optimiser = BayesianOptimiser(learning_rate=0.05, max_iterations=100, n_candidates=5, acq_xi=0.01, acq_kappa=1.96, n_initial=5, seed=None)
optimal_parameters = pipeline.optimise_parameters(Optimiser, max_runs=10) # DO NOT remove max_runs, chances are that this will run forever (NO, I do what I want :-|)

Initial parameters: {'x_slope': 0.3191210094715857, 'y_slope': 0.9150354205589739, 'x_tol': 0.5810088531405578, 'y_tol': 0.6371303162093983, 'scatter': 0.335646984499045}


Optimising:   0%|          | 0/10 [00:06<?, ?it/s]


AttributeError: 'BayesianOptimiser' object has no attribute 'run'

In [None]:
best_params = Optimiser.get_optimised_pMap()

best_params = Optimiser.best_config
best_score  = Optimiser.best_score

print("Best parameters found:", best_params)
print("Best objective score:", best_score)

Best parameters found: {'x_slope': 0.5700103267037683, 'y_slope': 0.24124649990096503, 'x_tol': 0.42154328464175794, 'y_tol': 0.4524089914293094, 'scatter': 0.24767414959260253}
Best objective score: 63.84538962043942
