diff --git a/docs/VALIDATOR.md b/docs/VALIDATOR.md index 3df37aa4..9bda942e 100644 --- a/docs/VALIDATOR.md +++ b/docs/VALIDATOR.md @@ -37,7 +37,6 @@ We recommend using Together.AI to run the Validator, as it simplifies setup and #### Prerequisites: - **Account on Together.AI**: [Sign up here](https://together.ai/). -- **Account on Hugging Face**: [Sign up here](https://huggingface.co/). - **API Key**: Obtain from the Together.AI dashboard. - **Python 3.10** - **PM2 Process Manager**: For running and managing the Validator process. *OPTIONAL* @@ -71,7 +70,6 @@ We recommend using Together.AI to run the Validator, as it simplifies setup and 4. **Set Up the `.env` File** ```bash echo "TOGETHER_API_KEY=your_together_ai_api_key" > .env - echo "HF_TOKEN=your_hugging_face_token" >> .env ``` 5. **Select a Model** @@ -133,9 +131,6 @@ We recommend using Together.AI to run the Validator, as it simplifies setup and - Ensure your `TOGETHER_API_KEY` is correctly set and sourced: - Check the `.env` file: `cat .env` - Verify the API key is loaded: `echo $TOGETHER_API_KEY` -- Ensure your `HF_TOKEN` is correctly set and sourced: - - Check the `.env` file: `cat .env` - - Verify the API key is loaded: `echo $HF_TOKEN` - The `--llm_client.base_url` should be `https://api.together.xyz/v1`. - Match `--llm_client.model` with the **Model ID** from Together.AI. @@ -182,12 +177,7 @@ This method involves self-hosting a vLLM r to run the Validator locally. It requ ``` *Adjust the model, port, and host as needed.* -5. **Set Up the `.env` File** - ```bash - echo "HF_TOKEN=your_hugging_face_token" > .env - ``` - -6. **Run the Validator with Self-Hosted LLM** +5. **Run the Validator with Self-Hosted LLM** - **Activate Virtual Environment**: ```bash . main/bin/activate @@ -204,7 +194,7 @@ This method involves self-hosting a vLLM r to run the Validator locally. It requ --logging.debug ``` -7. **(Optional) Enable Public Access** +6. **(Optional) Enable Public Access** ```bash --axon.port "your-public-open-port" ``` @@ -236,7 +226,6 @@ This method involves self-hosting a vLLM r to run the Validator locally. It requ - **Common Issues**: - **API Key Not Found**: Ensure `.env` is sourced and `TOGETHER_API_KEY` is set. - - **HF Token Not Found**: Ensure `.env` is sourced and `HF_TOKEN` is set. - **Model ID Incorrect**: Verify the `--llm_client.model` matches the Together.AI Model ID. - **Connection Errors**: Check internet connectivity and Together.AI service status. diff --git a/logicnet/utils/config.py b/logicnet/utils/config.py index 00a4ccf4..f98013bc 100644 --- a/logicnet/utils/config.py +++ b/logicnet/utils/config.py @@ -189,13 +189,6 @@ def add_args(cls, parser): default="xyz", ) - parser.add_argument( - "--dataset_weight", - type=str, - help="The weight of the dataset", - default="40,10,10,10,10,10,10", - ) - else: parser.add_argument( "--miner.category", diff --git a/logicnet/validator/challenger/challenger.py b/logicnet/validator/challenger/challenger.py index 3cc89495..f4e592cc 100644 --- a/logicnet/validator/challenger/challenger.py +++ b/logicnet/validator/challenger/challenger.py @@ -1,5 +1,4 @@ # Challenge for Synthetic Request -import os import openai import random from logicnet.protocol import LogicSynapse @@ -7,147 +6,48 @@ from .human_noise import get_condition from .math_generator.topics import TOPICS as topics import mathgenerator -from datasets import load_dataset -DATASET_WEIGHT = [40,10,10,10,10,10,10] class LogicChallenger: - def __init__(self, base_url: str, api_key: str, model: str, dataset_weight: list): + def __init__(self, base_url: str, api_key: str, model: str): bt.logging.info( - f"Initializing Logic Challenger with model: {model}, base URL: {base_url}." + f"Logic Challenger initialized with model: {model}, base_url: {base_url}" ) self.model = model self.openai_client = openai.OpenAI(base_url=base_url, api_key=api_key) - self.dataset_weight = [float(weight) for weight in dataset_weight.split(',')] def __call__(self, synapse: LogicSynapse) -> LogicSynapse: self.get_challenge(synapse) return synapse def get_challenge(self, synapse: LogicSynapse): - # Get an atom logic problem - atom_logic_question, atom_logic_answer = self.get_atom_logic_problem() - if atom_logic_question is None or atom_logic_answer is None: - bt.logging.error("Unable to retrieve atom logic problem. Retrying...") - atom_logic_question, atom_logic_answer = self.get_atom_logic_problem() - - # Revise the problem + logic_problem = self.get_atom_math_problem(synapse) conditions: dict = get_condition() - revised_logic_question: str = self.get_revised_logic_question( - atom_logic_question, conditions + revised_logic_question: str = self.get_revised_math_question( + logic_problem, conditions ) - - # Set the synapse with the atom problem - synapse.raw_logic_question = atom_logic_question - synapse.ground_truth_answer = str(atom_logic_answer).replace("$", "").strip() synapse.logic_question = revised_logic_question - def get_atom_logic_problem(self) -> str: - resources = ['mathgenerator', 'zebralogicbench-grid', 'zebralogicbench-mc', 'ultrainteract', 'gsm8k', 'mmlustem', 'satmath'] - if len(self.dataset_weight) == 7: - selected_resource = random.choices(resources, weights=self.dataset_weight, k=1)[0] - else: - bt.logging.warning("Invalid dataset weight configuration provided. Using default weights.") - selected_resource = random.choices(resources, weights=DATASET_WEIGHT, k=1)[0] - - try: - # Select an atom question and answer from the Mathgenerator - if selected_resource == 'mathgenerator': - selected_topic = random.choice(topics) - subtopic = selected_topic["subtopic"] - topic = selected_topic["topic"] - atom_question, atom_answer = eval(f"mathgenerator.{topic}.{subtopic}()") - if atom_question is None or atom_answer is None: - raise ValueError("Failed to get atom logic problem") - bt.logging.debug("Generating math problem using Mathgenerator.") - subtopic = subtopic.replace("_", " ").capitalize() - topic = topic.replace("_", " ").capitalize() - atom_question = atom_question.replace("$", "").strip() - atom_question = f"Find the solution of this math problem:\n---\nTopic: {topic}, Subtopic: {subtopic}.\n{atom_question}\n---\n" - - # Select an atom question and answer from the ZebraLogicBench grid_mode - elif selected_resource == 'zebralogicbench-grid': - ds_grid = load_dataset("allenai/ZebraLogicBench-private", "grid_mode", token=os.environ.get('HF_TOKEN')) - bt.logging.debug("Generating problem using ZebraLogicBench (grid mode).") - data_set_grid = ds_grid['test'] - bt.logging.info(f"Loaded ZebraLogicBench (grid_mode) dataset with {len(data_set_grid['puzzle'])} entries") - random_index = random.randint(0, len(data_set_grid['puzzle']) - 1) - puzzle = data_set_grid['puzzle'][random_index] - answer = data_set_grid['solution'][random_index] - atom_question = f"Find the solution of this problem:\n---\n{puzzle}\n---\n" - atom_answer = answer - - # Select an atom question and answer from the ZebraLogicBench mc_mode - elif selected_resource == 'zebralogicbench-mc': - ds_mc = load_dataset("allenai/ZebraLogicBench-private", "mc_mode", token=os.environ.get('HF_TOKEN')) - bt.logging.debug("Generating problem using ZebraLogicBench (multiple choice mode).") - data_set_mc = ds_mc['test'] - bt.logging.info(f"Loaded ZebraLogicBench (mc_mode) dataset with {len(data_set_mc['puzzle'])} entries") - random_index = random.randint(0, len(data_set_mc['puzzle']) - 1) - puzzle = data_set_mc['puzzle'][random_index] - question = data_set_mc['question'][random_index] - answer = data_set_mc['answer'][random_index] - atom_question = f"Find the solution of this problem:\n---\n{puzzle}\n---\n{question}\n---\n" - atom_answer = answer + def get_atom_math_problem(self, synapse: LogicSynapse) -> str: + selected_topic = random.choice(topics) + subtopic = selected_topic["subtopic"] + topic = selected_topic["topic"] + bt.logging.debug(f"Using {mathgenerator.__name__} to generate math problem") + atom_problem, atom_answer = eval(f"mathgenerator.{topic}.{subtopic}()") + subtopic = subtopic.replace("_", " ").capitalize() + topic = topic.replace("_", " ").capitalize() + atom_problem = atom_problem.replace("$", "").strip() + atom_problem = f"Find the solution of this math problem:\n---\nTopic: {topic}, Subtopic: {subtopic}.\n{atom_problem}\n---\n" + bt.logging.debug(f"Generated atom math problem: {atom_problem}") + synapse.raw_logic_question = atom_problem - # Select an atom question and answer from the UltraInteract - elif selected_resource == 'ultrainteract': - ds = load_dataset("openbmb/UltraInteract_sft") - bt.logging.debug("Generating problem using UltraInteract dataset.") - data_set = ds['train'] - bt.logging.info(f"Loaded UltraInteract dataset with {len(data_set['instruction'])} entries") - random_index = random.randint(0, len(data_set['instruction']) - 1) - instruction = data_set['instruction'][random_index] - response = data_set['response'][random_index] - atom_question = f"Find the solution of this instruction:\n---\n{instruction}\n---\n" - atom_answer = response - - # Select an atom question and answer from the GSM8K - elif selected_resource == 'gsm8k': - ds = load_dataset("openai/gsm8k", "main") - bt.logging.debug("Generating problem using GSM8K dataset.") - data_set = ds['train'] - bt.logging.info(f"Loaded GSM8K dataset with {len(data_set['question'])} entries") - random_index = random.randint(0, len(data_set['question']) - 1) - question = data_set['question'][random_index] - answer = data_set['answer'][random_index] - atom_question = f"Find the solution of this question:\n---\n{question}\n---\n" - atom_answer = answer + synapse.ground_truth_answer = str(atom_answer).replace("$", "").strip() - # Select an atom question and answer from the MMLU-STEM - elif selected_resource == 'mmlustem': - ds = load_dataset("TIGER-Lab/MMLU-STEM") - bt.logging.debug("Generating problem using MMLU-STEM dataset.") - data_set = ds['test'] - bt.logging.info(f"Loaded MMLU-STEM dataset with {len(data_set['question'])} entries") - random_index = random.randint(0, len(data_set['question']) - 1) - question = data_set['question'][random_index] - answer_id = data_set['answer'][random_index] - answer_choice = data_set['choices'][random_index] - atom_question = f"Find the solution of this question:\n---\n{question}\n---\n" - atom_answer = answer_choice[answer_id] + bt.logging.debug(f"Generated atom math answer: {atom_answer}") - # Select an atom question and answer from the SAT Math - elif selected_resource == 'satmath': - ds = load_dataset("mcaleste/sat_multiple_choice_math_may_23") - bt.logging.debug("Generating problem using SAT Math dataset.") - data_set = ds['train'] - bt.logging.info(f"Loaded SAT Math dataset with {len(data_set['Question'])} entries") - random_index = random.randint(0, len(data_set['Question']) - 1) - question = data_set['Question'][random_index] - possible_answers = data_set['Possible Answers'][random_index] - answer_id = data_set['Answer'][random_index] - atom_question = f"Find the solution of this question:\n---\n{question}\n---\n" - atom_answer = self.get_answer_value(possible_answers, answer_id) + return atom_problem - except Exception as e: - bt.logging.error(f"Error accessing dataset {selected_resource}: {e}. Attempting to load an alternative dataset.") - # Retry with a different dataset - return self.get_atom_logic_problem() - - return atom_question, atom_answer - - def get_revised_logic_question(self, logic_question: str, conditions: dict) -> str: + def get_revised_math_question(self, math_problem: str, conditions: dict) -> str: # prompt = "Please paraphrase by adding word or expression to this question as if you were a {profile} who is {mood} and write in a {tone} tone. You can use incorrect grammar, typo or add more context! Don't add your solution! Just say the revised version, you don't need to be polite.".format( # **conditions # ) @@ -158,6 +58,7 @@ def get_revised_logic_question(self, logic_question: str, conditions: dict) -> s "Do not include the solution or add unnecessary context." ).format(**conditions) + bt.logging.debug(f"Revising prompt: {prompt}") # messages = [ # { @@ -180,7 +81,7 @@ def get_revised_logic_question(self, logic_question: str, conditions: dict) -> s "ensuring the question sounds natural and appropriate for that individual." ), }, - {"role": "assistant", "content": logic_question}, + {"role": "assistant", "content": math_problem}, {"role": "user", "content": prompt}, ] @@ -192,13 +93,6 @@ def get_revised_logic_question(self, logic_question: str, conditions: dict) -> s ) response = response.choices[0].message.content.strip() + bt.logging.debug(f"Generated revised math question: {response}") return response - - - def get_answer_value(self, possible_answers, answer): - # Get the value of the answer from the possible answers - options = possible_answers.split() - for i, option in enumerate(options): - if option.startswith(answer + ")"): - return options[i + 1] - return None # Return None if the answer is not found \ No newline at end of file + \ No newline at end of file diff --git a/logicnet/validator/miner_manager.py b/logicnet/validator/miner_manager.py index 06630ad3..9dfa6edf 100644 --- a/logicnet/validator/miner_manager.py +++ b/logicnet/validator/miner_manager.py @@ -128,6 +128,7 @@ def get_miner_uids(self, category: str): """ Get miner uids based on category, useful if subnet has multiple categories """ + print(self.all_uids_info) available_uids = [ int(uid) for uid in self.all_uids_info.keys() diff --git a/neurons/validator/validator.py b/neurons/validator/validator.py index ebf13d06..f929f0a3 100644 --- a/neurons/validator/validator.py +++ b/neurons/validator/validator.py @@ -23,7 +23,6 @@ def init_category(config=None): config.llm_client.base_url, config.llm_client.key, config.llm_client.model, - config.dataset_weight, ), "rewarder": LogicRewarder( config.llm_client.base_url, @@ -127,6 +126,11 @@ def forward(self): # Update scores on chain self.update_scores_on_chain() self.save_state() + bt.logging.info( + "\033[1;32m✅ Loop completed, uids info:\n" + + str(self.miner_manager.all_uids_info).replace("},", "},\n") + + "\033[0m" + ) self.store_miner_infomation() actual_time_taken = time.time() - loop_start @@ -158,7 +162,6 @@ def async_query_and_reward( continue base_synapse = synapse.copy() synapse = synapse.miner_synapse() - bt.logging.info(f"\033[1;34m🧠 Synapse to be sent to miners: {synapse}\033[0m") axons = [self.metagraph.axons[int(uid)] for uid in uids] bt.logging.debug(f"\033[1;34m🧠 Axon: {axons}\033[0m") responses = dendrite.query( @@ -167,10 +170,9 @@ def async_query_and_reward( deserialize=False, timeout=self.categories[category]["timeout"], ) - # for response, uid in zip(responses, uids): - # bt.logging.debug( - # f"\033[1;34m🧠 Miner response for {uid}: {response.logic_answer}\033[0m" - # ) + bt.logging.debug( + f"\033[1;34m🧠 Miner response: {responses[0].logic_answer}\033[0m" + ) reward_responses = [ response for response, should_reward in zip(responses, should_rewards) diff --git a/requirements.txt b/requirements.txt index 54752a79..55e44089 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,5 +11,4 @@ sentence-transformers==3.0.1 python-dotenv==1.0.1 sympy wandb -datasets git+https://github.com/lukew3/mathgenerator.git