diff --git a/logicnet/validator/__init__.py b/logicnet/validator/__init__.py index 8c151a08..e50eb322 100644 --- a/logicnet/validator/__init__.py +++ b/logicnet/validator/__init__.py @@ -1,9 +1,10 @@ from .challenger.challenger import LogicChallenger -from .miner_manager import MinerManager +from .miner_manager import MinerManager, MinerInfo from .rewarder import LogicRewarder __all__ = [ "MinerManager", "LogicChallenger", "LogicRewarder", + "MinerInfo", ] diff --git a/logicnet/validator/miner_manager.py b/logicnet/validator/miner_manager.py index 9e941887..27e0c035 100644 --- a/logicnet/validator/miner_manager.py +++ b/logicnet/validator/miner_manager.py @@ -7,7 +7,7 @@ ) import traceback -NO_OF_RECENT_SCORES = 10 +NO_OF_RECENT_SCORES = 20 class MinerInfo: @@ -17,6 +17,7 @@ def __init__( scores: list[float] = [], epoch_volume: int = 42, reward_scale: float = 0.0, + reward_logs: list[dict] = [], *args, **kwargs, ): @@ -33,6 +34,7 @@ def __init__( self.rate_limit = 0 self.category: str = category self.reward_scale: float = reward_scale + self.reward_logs = [] def __str__(self): return str(self.to_dict()) + "\n" @@ -49,6 +51,7 @@ def to_dict(self): "epoch_volume": self.epoch_volume, "rate_limit": self.rate_limit, "reward_scale": self.reward_scale, + "reward_logs": self.reward_logs, } @@ -132,15 +135,17 @@ def get_miner_uids(self, category: str): ] return available_uids - def update_scores(self, uids, rewards): + def update_scores(self, uids, rewards, reward_logs): """ Update miner's scores with new rewards """ - for uid, reward in zip(uids, rewards): + for uid, reward, reward_log in zip(uids, rewards, reward_logs): self.all_uids_info[uid].scores.append(reward) self.all_uids_info[uid].scores = self.all_uids_info[uid].scores[ -NO_OF_RECENT_SCORES: ] + self.all_uids_info[uid].reward_logs.append(reward_log) + self.all_uids_info[uid].reward_logs = self.all_uids_info[uid].reward_logs[-NO_OF_RECENT_SCORES:] def get_on_chain_weights(self, category) -> torch.Tensor: """ @@ -163,10 +168,9 @@ def get_model_specific_weights(self, category, normalize=True): """ model_specific_weights = torch.zeros(len(self.all_uids)) for uid in self.get_miner_uids(category): - num_past_to_check = 10 model_specific_weights[int(uid)] = ( - sum(self.all_uids_info[uid].scores[-num_past_to_check:]) - / num_past_to_check + sum(self.all_uids_info[uid].scores[-NO_OF_RECENT_SCORES:]) + / NO_OF_RECENT_SCORES ) model_specific_weights = torch.clamp(model_specific_weights, 0, 1) if normalize: diff --git a/logicnet/validator/rewarder.py b/logicnet/validator/rewarder.py index 63f412a2..04c7e384 100644 --- a/logicnet/validator/rewarder.py +++ b/logicnet/validator/rewarder.py @@ -44,6 +44,7 @@ def __call__(self, uids, responses: list[LogicSynapse], base_synapse: LogicSynap uid for uid, response in zip(uids, responses) if not response.is_success ] invalid_rewards = [0 for _ in invalid_uids] + reward_logs = [] valid_rewards = [] if valid_uids: ref_ground_truth: str = self._get_ground_truth( @@ -63,6 +64,13 @@ def __call__(self, uids, responses: list[LogicSynapse], base_synapse: LogicSynap + CORRECTNESS_WEIGHT * correctness[i] + PROCESSING_TIME_WEIGHT * min(process_times[i] / timeout, 1) ) + reward_logs.append( + { + "similarity": similarities[i], + "correctness": correctness[i], + "process_time": process_times[i], + } + ) # Scale up the reward reward = reward / 2 + 0.5 bt.logging.debug( @@ -72,7 +80,7 @@ def __call__(self, uids, responses: list[LogicSynapse], base_synapse: LogicSynap total_uids = valid_uids + invalid_uids rewards = valid_rewards + invalid_rewards - return total_uids, rewards + return total_uids, rewards, reward_logs def _get_correctness( self, base_synapse: LogicSynapse, responses: list[LogicSynapse] diff --git a/neurons/validator/validator.py b/neurons/validator/validator.py index adea7e52..5b4f8343 100644 --- a/neurons/validator/validator.py +++ b/neurons/validator/validator.py @@ -5,7 +5,7 @@ from logicnet.base.validator import BaseValidatorNeuron from neurons.validator.validator_proxy import ValidatorProxy import logicnet as ln -from logicnet.validator import MinerManager, LogicChallenger, LogicRewarder +from logicnet.validator import MinerManager, LogicChallenger, LogicRewarder, MinerInfo import traceback import threading from neurons.validator.core.serving_queue import QueryQueue @@ -152,7 +152,7 @@ def async_query_and_reward( ) if reward_uids: - uids, rewards = self.categories[category]["rewarder"]( + uids, rewards, reward_logs = self.categories[category]["rewarder"]( reward_uids, reward_responses, base_synapse ) @@ -165,7 +165,7 @@ def async_query_and_reward( bt.logging.info(f"Scored responses: {rewards}") - self.miner_manager.update_scores(uids, rewards) + self.miner_manager.update_scores(uids, rewards, reward_logs) def prepare_challenge(self, uids_should_rewards, category): """ @@ -241,7 +241,9 @@ def load_state(self): bt.logging.info("Loading validator state from: " + path) state = torch.load(path) self.step = state["step"] - self.miner_manager.all_uids_info = state["all_uids_info"] + all_uids_info = state["all_uids_info"] + for k, v in all_uids_info.items(): + self.miner_manager.all_uids_info[k] = MinerInfo(**v) bt.logging.info("Successfully loaded state") except Exception as e: self.step = 0