In [9]:
import os 
import sys
import json
sys.path.append(os.path.abspath(".."))
from velopix_wrappers.optimizers import BaseOptimizer
from velopix_wrappers.velopix_pipeline import TrackFollowingPipeline, GraphDFSPipeline, SearchByTripletTriePipeline
from typing import Any, Dict
import numpy as np

from typing import Any, Dict, Literal, List
import random
import numpy as np
from velopix_wrappers.optimizers import BaseOptimizer, pMap
from copy import deepcopy

In [10]:
class Node(object):
    def __init__(self, bounds, parent: Any = None):
        self.sum_reward = 0
        self.visited = 0
        self.children = []
        self.parent = parent
        self.bounds = bounds

    def add_child(self, child: Any):
        self.children.append(child)

In [11]:
class PolyHoot(BaseOptimizer):
    def __init__(
        self,
        alfa,
        eta,
        epsilon,
        max_iterations: int = 100,
        objective: Literal["min", "max"] = "min",
        weights: list[float]=[1.0, 1.0, 1.0, -10.0]
    ):
        super().__init__(objective=objective, auto_eval={"autoEval": True, "nested": True, "weights": weights})
        self.max_iterations = max_iterations
        self.current_iteration = 0
        self.alfa = alfa
        self.epsilon = epsilon
        self.eta = eta


    def init(self) -> pMap:
        """
        Initializes with a random point within bounds.
        """

        self.best_score = 0


        self.cfg = self._algorithm.get_config()
        self.bounds = self._algorithm.get_bounds()

        self.root = Node(bounds=self.bounds)  # Root node with no bounds
        self.nodes = [self.root]
        self.current_node = self.root

        self.param_num = 0

        for key, (typ, _) in self.cfg.items():
            if typ is not bool:
                self.param_num += 1

        self.nu = 4 * self.param_num
        self.ro = 1 / (4 * self.param_num)



        
        self.current_iteration += 1

        #pregenerate trees of bounds
        for key, (typ, _) in self.cfg.items():
            if typ is bool:
                current_leaves = [node for node in self.nodes if len(node.children) == 0]
                for node in current_leaves: #NOTE: I think this is an infite loop no since we keep adding nodes to the nodes list in the loop we keep adding children forever no?
                    #also changed logic since we were overriding the original bounds. (unless you changed it already then no, since current leaves is a copy of the current nodes)
                    bounds_false = deepcopy(node.bounds)
                    bounds_false[key] = False #TODO should this be an int or a true bool to ask group.. (changing  it to bool)
                    node1 = Node(bounds=bounds_false, parent=node)
                    
                    bounds_true = deepcopy(node.bounds)
                    bounds_true[key] = True #TODO should this be an int or a true bool to ask group..
                    node2 = Node(bounds=bounds_true, parent=node)
                    
                    #I think we also want to add these as children to the parent node yeah? or in this case the current node (yes indeed)
                    node.children.append(node1)
                    node.children.append(node2)
                    
                    #leave unchanged.. (I don't think we ever use the node list again, but just in case)
                    self.nodes.append(node1)
                    self.nodes.append(node2)


        self.current_node = self.root
        depth = 0

        while len(self.current_node.children):
            self.current_node.visited += 1
            self.current_node = node.children[0]
            depth += 1


        self.current_node.visited += 1

        new_bounds = self.returnBounds(self.current_node.bounds)

        self.current_node.add_child(Node(bounds=new_bounds[0], parent=self.current_node))
        self.current_node.add_child(Node(bounds=new_bounds[1], parent=self.current_node))


        pmap = self.returnPmap(self.current_node.bounds)

        # print(f"Bounds: {self.current_node.bounds}\n pmap: {pmap}\n")

        return pmap
    


    def next(self) -> pMap:
        """
        Evaluates the current configuration and returns a new one.
        """
        self.current_iteration += 1


        # Evaluate the current configuration (from previous init/next call)
        #score = self.objective_func([1.0, 1.0, 1.0, -10.0])
        #so we already increased the count of the nodes but we still want to backprop the scors which should happen after a run so at the start of 
        # next we get the last score in history and trace back up the stack accordingly or just use the score above but faster to get the score from history??? TODO ask team
        #TODO:backprop here FIXED?!?!
        if hasattr(self, 'score_history') and len(self.score_history) > 0:
            score = self.score_history[-1]
            # print(score)
            current = self.current_node
            while current is not None:
                current.sum_reward += score
                current = current.parent
        
        
        # print(f"score: {self.current_node.sum_reward}\n")

        if self.current_node.sum_reward < self.best_score:
            self.best_score = self.current_node.sum_reward
        
        node = self.root
        depth = 0



        while len(node.children):
            node.visited += 1
            if node.children[0].visited == 0:
                node = node.children[0]
            elif node.children[1].visited == 0:
                node = node.children[1]
            else:
                node1_score = (-node.children[0].sum_reward / node.children[0].visited) + (self.current_iteration ** (self.alfa/self.epsilon)) * (node.children[0].visited ** (self.eta - 1)) + (self.nu * (self.ro ** depth))
                node2_score = (-node.children[1].sum_reward / node.children[1].visited) + (self.current_iteration ** (self.alfa/self.epsilon)) * (node.children[1].visited ** (self.eta - 1)) + (self.nu * (self.ro ** depth))

                if node1_score > node2_score:
                    node = node.children[0]
                else:
                    node = node.children[1]

            depth += 1

        node.visited += 1

        new_bounds = self.returnBounds(node.bounds)

        node.add_child(Node(bounds=new_bounds[0], parent=node))
        node.add_child(Node(bounds=new_bounds[1], parent=node))


        pmap = self.returnPmap(node.bounds)
        #keep track of the leaf node we expanded and rolled out for the backprop we we do next again.
        self.current_node = node

        # print(f"Bounds: {self.current_node.bounds}\n pmap: {pmap}\n")
            
        return pmap
    

    def is_finished(self) -> bool:
        """Determines if optimization is complete."""
        #TODO: possibly add a check for reaching target score
        #TODO check with team but I think here we also want to perform backprop since if were finished we wont performn the last next we need to backprop so we do it here instead (maybe, depends when this function is called by the pipeline(TODO: check it), either way, if we don't it would just mean we skip the last iteration, which should not be that big of a problem)
        finished = self.current_iteration >= self.max_iterations
        if finished:
            if hasattr(self, 'score_history') and len(self.score_history) > 0:
                score = self.score_history[-1]
                current = self.current_node
                while current is not None:
                    current.sum_reward += score
                    current = current.parent


        print(f"score: {self.best_score}\n")
                
        return finished
    

    #return pmap based on bounds (split in middle) (rollout phase)
    def returnPmap (self, bounds: Dict[str, Any]) -> Dict[str, Any]:
        new_pmap = {}
        
        for key, (typ, _) in self.cfg.items():
            if typ is float:
                new_pmap[key] = (bounds[key][0] + bounds[key][1]) / 2
            elif typ is int:
                new_pmap[key] = (int)((bounds[key][0] + bounds[key][1]) / 2)
            elif typ is bool:
                new_pmap[key] = bounds[key]
            
        return new_pmap


    def returnBounds(self, bounds: Dict[str, Any]) -> List [Dict[str, Any]]: #TODO we currently split each of the bounds in half but we only want to split the axis with the 
                                                                                            # currently largest diameter (yes, should be done)

                                                                                            # also its late my brain is cooked but should we skip over the keys that 
                                                                                            # are boolean as we currently try to split these? (I added that now, just in case)

                                                                                            # we also never noremalize anywhere. (will do that after lunch, will be done impleicetly (how do you write that?) when calculating biggest bound)

        map = self.returnPmap(bounds)
        new_bounds = [deepcopy(bounds), deepcopy(bounds)]

        
        # find biggest bound
        max_key = None
        max_relative_diff = -1

        for key, (typ, _) in self.cfg.items():
            if typ is not bool:
                original_range = self.bounds[key][1] - self.bounds[key][0]
                current_range = bounds[key][1] - bounds[key][0]
                relative_diff = current_range / original_range if original_range != 0 else 0

                if relative_diff > max_relative_diff:
                    max_relative_diff = relative_diff
                    max_key = key

        # Split only the key with the biggest relative size
        if max_key is not None:
            low, high = new_bounds[0][max_key]
            new_bounds[0][max_key] = (low, map[max_key])

            low, high = new_bounds[1][max_key]
            new_bounds[1][max_key] = (map[max_key], high)

            
        return new_bounds
            

In [12]:
events = []
n_files = 150

for i in range(0, n_files):
    if i == 51:
        """
        There's an issue with event 51 -> module_prefix_sum contains value 79 twice resulting in and indexing error when loading the event
        """
        print(f"Skipping problematic file: velo_event_{i}.json")
    else:    
        print(f"Loading file: velo_event_{i}.json")
        event_file = open(os.path.join("../DB/raw", f"velo_event_{i}.json"))
        json_data = json.loads(event_file.read())
        events.append(json_data) # type: ignore
        event_file.close()

Loading file: velo_event_0.json
Loading file: velo_event_1.json
Loading file: velo_event_2.json
Loading file: velo_event_3.json
Loading file: velo_event_4.json
Loading file: velo_event_5.json
Loading file: velo_event_6.json
Loading file: velo_event_7.json
Loading file: velo_event_8.json
Loading file: velo_event_9.json
Loading file: velo_event_10.json
Loading file: velo_event_11.json
Loading file: velo_event_12.json
Loading file: velo_event_13.json
Loading file: velo_event_14.json
Loading file: velo_event_15.json
Loading file: velo_event_16.json
Loading file: velo_event_17.json
Loading file: velo_event_18.json
Loading file: velo_event_19.json
Loading file: velo_event_20.json
Loading file: velo_event_21.json
Loading file: velo_event_22.json
Loading file: velo_event_23.json
Loading file: velo_event_24.json
Loading file: velo_event_25.json
Loading file: velo_event_26.json
Loading file: velo_event_27.json
Loading file: velo_event_28.json
Loading file: velo_event_29.json
Loading file: velo_e

In [13]:
pipeline = SearchByTripletTriePipeline(events=events, intra_node=True) # type: ignore 

In [14]:
Optimiser = PolyHoot(max_iterations=3, objective="min", weights=[0.4, 0.2, 0.5, -7.0], alfa=6, eta=3, epsilon=10) # type: ignore
optimal_parameters = pipeline.optimise_parameters(Optimiser, max_runs=1000) # DO NOT remove max_runs, chances are that this will run forever

Optimising:   0%|          | 1/1000 [00:01<27:55,  1.68s/it]

=== Objective Function Debug Info ===
time_rate: 0.6982319355010986
ghost_rate: 46.264244060852256
num_tracks: 85913
penalty: 0
weights: [0.4, 0.2, 0.5, -7.0]
nested: True
clone_rate: 2.5294441899266245
terms (used in weighted sum): (0.6982319355010986, np.float64(2.5294441899266245), 46.264244060852256, 85913)
score (with penalty): -601367.0826963574
score: -601367.0826963574

=== Objective Function Debug Info ===
time_rate: 0.6964550018310547
ghost_rate: 37.43027197266928
num_tracks: 74934
penalty: 0
weights: [0.4, 0.2, 0.5, -7.0]
nested: True


Optimising:   0%|          | 2/1000 [00:03<28:07,  1.69s/it]

clone_rate: 2.6589630965404
terms (used in weighted sum): (0.6964550018310547, np.float64(2.6589630965404), 37.43027197266928, 74934)
score (with penalty): -524518.4744893936
score: -601367.0826963574

=== Objective Function Debug Info ===
time_rate: 0.8743531703948975
ghost_rate: 50.66330924272649
num_tracks: 91737
penalty: 0
weights: [0.4, 0.2, 0.5, -7.0]
nested: True


Optimising:   0%|          | 3/1000 [00:05<31:16,  1.88s/it]

clone_rate: 2.533860259331406
terms (used in weighted sum): (0.8743531703948975, np.float64(2.533860259331406), 50.66330924272649, 91737)
score (with penalty): -642132.8118320586
score: -642132.8118320586

Finsihed condition met, exiting...





In [None]:
Optimiser.history

{'7565c058-3c28-45f1-8614-69422701c214': {'params': {},
  'score': np.float64(-601367.0826963574),
  'meta': {'total_tracks': 85913,
   'total_ghosts': 39747,
   'overall_ghost_rate': 46.264244060852256,
   'event_avg_ghost_rate': 40.49136947750126,
   'categories': [{'label': 'velo',
     'n_reco': 39696,
     'n_particles': 42418,
     'recoeffT': 93.58291291432882,
     'avg_recoeff': 93.58291291432882,
     'n_clones': 1410,
     'clone_percentage': 3.551995163240629,
     'purityT': 98.37139780657667,
     'avg_purity': 98.1540991902834,
     'avg_hiteff': 97.6368404986826,
     'hit_eff_percentage': 96.46731007717716},
    {'label': 'long>5GeV',
     'n_reco': 7631,
     'n_particles': 7746,
     'recoeffT': 98.51536276788019,
     'avg_recoeff': 98.5153627678802,
     'n_clones': 242,
     'clone_percentage': 3.1712750622461012,
     'purityT': 98.23008075092194,
     'avg_purity': 99.10714285714286,
     'avg_hiteff': 98.1436420722135,
     'hit_eff_percentage': 96.628175556304

In [15]:
print(optimal_parameters)

{'scatter': 0.75, 'min_strong_track_length': 10, 'allowed_missed_modules': 2}


In [16]:
print(optimal_parameters)

{'scatter': 0.75, 'min_strong_track_length': 10, 'allowed_missed_modules': 2}
