Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions docs/VALIDATOR.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ We recommend using Together.AI to run the Validator, as it simplifies setup and
#### Prerequisites:

- **Account on Together.AI**: [Sign up here](https://together.ai/).
- **Account on Hugging Face**: [Sign up here](https://huggingface.co/).
- **API Key**: Obtain from the Together.AI dashboard.
- **Python 3.10**
- **PM2 Process Manager**: For running and managing the Validator process. *OPTIONAL*
Expand Down Expand Up @@ -70,6 +71,7 @@ We recommend using Together.AI to run the Validator, as it simplifies setup and
4. **Set Up the `.env` File**
```bash
echo "TOGETHER_API_KEY=your_together_ai_api_key" > .env
echo "HF_TOKEN=your_hugging_face_token" >> .env
```

5. **Select a Model**
Expand Down Expand Up @@ -131,6 +133,9 @@ We recommend using Together.AI to run the Validator, as it simplifies setup and
- Ensure your `TOGETHER_API_KEY` is correctly set and sourced:
- Check the `.env` file: `cat .env`
- Verify the API key is loaded: `echo $TOGETHER_API_KEY`
- Ensure your `HF_TOKEN` is correctly set and sourced:
- Check the `.env` file: `cat .env`
- Verify the API key is loaded: `echo $HF_TOKEN`
- The `--llm_client.base_url` should be `https://api.together.xyz/v1`.
- Match `--llm_client.model` with the **Model ID** from Together.AI.

Expand Down Expand Up @@ -177,7 +182,12 @@ This method involves self-hosting a vLLM r to run the Validator locally. It requ
```
*Adjust the model, port, and host as needed.*

5. **Run the Validator with Self-Hosted LLM**
5. **Set Up the `.env` File**
```bash
echo "HF_TOKEN=your_hugging_face_token" > .env
```

6. **Run the Validator with Self-Hosted LLM**
- **Activate Virtual Environment**:
```bash
. main/bin/activate
Expand All @@ -194,7 +204,7 @@ This method involves self-hosting a vLLM r to run the Validator locally. It requ
--logging.debug
```

6. **(Optional) Enable Public Access**
7. **(Optional) Enable Public Access**
```bash
--axon.port "your-public-open-port"
```
Expand Down Expand Up @@ -226,11 +236,12 @@ This method involves self-hosting a vLLM r to run the Validator locally. It requ

- **Common Issues**:
- **API Key Not Found**: Ensure `.env` is sourced and `TOGETHER_API_KEY` is set.
- **HF Token Not Found**: Ensure `.env` is sourced and `HF_TOKEN` is set.
- **Model ID Incorrect**: Verify the `--llm_client.model` matches the Together.AI Model ID.
- **Connection Errors**: Check internet connectivity and Together.AI service status.

- **Contact Support**: Reach out to the LogicNet support team for assistance.

---

Happy Validating!
Happy Validating!
9 changes: 8 additions & 1 deletion logicnet/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,13 @@ def add_args(cls, parser):
default="xyz",
)

parser.add_argument(
"--dataset_weight",
type=str,
help="The weight of the dataset",
default="40,10,10,10,10,10,10",
)

else:
parser.add_argument(
"--miner.category",
Expand Down Expand Up @@ -250,4 +257,4 @@ def config(cls):
bt.logging.add_args(parser)
bt.axon.add_args(parser)
cls.add_args(parser)
return bt.config(parser)
return bt.config(parser)
156 changes: 131 additions & 25 deletions logicnet/validator/challenger/challenger.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,153 @@
# Challenge for Synthetic Request
import os
import openai
import random
from logicnet.protocol import LogicSynapse
import bittensor as bt
from .human_noise import get_condition
from .math_generator.topics import TOPICS as topics
import mathgenerator
from datasets import load_dataset

DATASET_WEIGHT = [40,10,10,10,10,10,10]

class LogicChallenger:
def __init__(self, base_url: str, api_key: str, model: str):
def __init__(self, base_url: str, api_key: str, model: str, dataset_weight: list):
bt.logging.info(
f"Logic Challenger initialized with model: {model}, base_url: {base_url}"
f"Initializing Logic Challenger with model: {model}, base URL: {base_url}."
)
self.model = model
self.openai_client = openai.OpenAI(base_url=base_url, api_key=api_key)
self.dataset_weight = [float(weight) for weight in dataset_weight.split(',')]

def __call__(self, synapse: LogicSynapse) -> LogicSynapse:
self.get_challenge(synapse)
return synapse

def get_challenge(self, synapse: LogicSynapse):
logic_problem = self.get_atom_math_problem(synapse)
# Get an atom logic problem
atom_logic_question, atom_logic_answer = self.get_atom_logic_problem()
if atom_logic_question is None or atom_logic_answer is None:
bt.logging.error("Unable to retrieve atom logic problem. Retrying...")
atom_logic_question, atom_logic_answer = self.get_atom_logic_problem()

# Revise the problem
conditions: dict = get_condition()
revised_logic_question: str = self.get_revised_math_question(
logic_problem, conditions
revised_logic_question: str = self.get_revised_logic_question(
atom_logic_question, conditions
)

# Set the synapse with the atom problem
synapse.raw_logic_question = atom_logic_question
synapse.ground_truth_answer = str(atom_logic_answer).replace("$", "").strip()
synapse.logic_question = revised_logic_question

def get_atom_math_problem(self, synapse: LogicSynapse) -> str:
selected_topic = random.choice(topics)
subtopic = selected_topic["subtopic"]
topic = selected_topic["topic"]
bt.logging.debug(f"Using {mathgenerator.__name__} to generate math problem")
atom_problem, atom_answer = eval(f"mathgenerator.{topic}.{subtopic}()")
subtopic = subtopic.replace("_", " ").capitalize()
topic = topic.replace("_", " ").capitalize()
atom_problem = atom_problem.replace("$", "").strip()
atom_problem = f"Find the solution of this math problem:\n---\nTopic: {topic}, Subtopic: {subtopic}.\n{atom_problem}\n---\n"
bt.logging.debug(f"Generated atom math problem: {atom_problem}")
synapse.raw_logic_question = atom_problem
def get_atom_logic_problem(self) -> str:
resources = ['mathgenerator', 'zebralogicbench-grid', 'zebralogicbench-mc', 'ultrainteract', 'gsm8k', 'mmlustem', 'satmath']
if len(self.dataset_weight) == 7:
selected_resource = random.choices(resources, weights=self.dataset_weight, k=1)[0]
else:
bt.logging.warning("Invalid dataset weight configuration provided. Using default weights.")
selected_resource = random.choices(resources, weights=DATASET_WEIGHT, k=1)[0]

try:
# Select an atom question and answer from the Mathgenerator
if selected_resource == 'mathgenerator':
selected_topic = random.choice(topics)
subtopic = selected_topic["subtopic"]
topic = selected_topic["topic"]
atom_question, atom_answer = eval(f"mathgenerator.{topic}.{subtopic}()")
if atom_question is None or atom_answer is None:
raise ValueError("Failed to get atom logic problem")
bt.logging.debug("Generating math problem using Mathgenerator.")
subtopic = subtopic.replace("_", " ").capitalize()
topic = topic.replace("_", " ").capitalize()
atom_question = atom_question.replace("$", "").strip()
atom_question = f"Find the solution of this math problem:\n---\nTopic: {topic}, Subtopic: {subtopic}.\n{atom_question}\n---\n"

# Select an atom question and answer from the ZebraLogicBench grid_mode
elif selected_resource == 'zebralogicbench-grid':
ds_grid = load_dataset("allenai/ZebraLogicBench-private", "grid_mode", token=os.environ.get('HF_TOKEN'))
bt.logging.debug("Generating problem using ZebraLogicBench (grid mode).")
data_set_grid = ds_grid['test']
bt.logging.info(f"Loaded ZebraLogicBench (grid_mode) dataset with {len(data_set_grid['puzzle'])} entries")
random_index = random.randint(0, len(data_set_grid['puzzle']) - 1)
puzzle = data_set_grid['puzzle'][random_index]
answer = data_set_grid['solution'][random_index]
atom_question = f"Find the solution of this problem:\n---\n{puzzle}\n---\n"
atom_answer = answer

# Select an atom question and answer from the ZebraLogicBench mc_mode
elif selected_resource == 'zebralogicbench-mc':
ds_mc = load_dataset("allenai/ZebraLogicBench-private", "mc_mode", token=os.environ.get('HF_TOKEN'))
bt.logging.debug("Generating problem using ZebraLogicBench (multiple choice mode).")
data_set_mc = ds_mc['test']
bt.logging.info(f"Loaded ZebraLogicBench (mc_mode) dataset with {len(data_set_mc['puzzle'])} entries")
random_index = random.randint(0, len(data_set_mc['puzzle']) - 1)
puzzle = data_set_mc['puzzle'][random_index]
question = data_set_mc['question'][random_index]
answer = data_set_mc['answer'][random_index]
atom_question = f"Find the solution of this problem:\n---\n{puzzle}\n---\n{question}\n---\n"
atom_answer = answer

synapse.ground_truth_answer = str(atom_answer).replace("$", "").strip()
# Select an atom question and answer from the UltraInteract
elif selected_resource == 'ultrainteract':
ds = load_dataset("openbmb/UltraInteract_sft")
bt.logging.debug("Generating problem using UltraInteract dataset.")
data_set = ds['train']
bt.logging.info(f"Loaded UltraInteract dataset with {len(data_set['instruction'])} entries")
random_index = random.randint(0, len(data_set['instruction']) - 1)
instruction = data_set['instruction'][random_index]
response = data_set['response'][random_index]
atom_question = f"Find the solution of this instruction:\n---\n{instruction}\n---\n"
atom_answer = response

# Select an atom question and answer from the GSM8K
elif selected_resource == 'gsm8k':
ds = load_dataset("openai/gsm8k", "main")
bt.logging.debug("Generating problem using GSM8K dataset.")
data_set = ds['train']
bt.logging.info(f"Loaded GSM8K dataset with {len(data_set['question'])} entries")
random_index = random.randint(0, len(data_set['question']) - 1)
question = data_set['question'][random_index]
answer = data_set['answer'][random_index]
atom_question = f"Find the solution of this question:\n---\n{question}\n---\n"
atom_answer = answer

bt.logging.debug(f"Generated atom math answer: {atom_answer}")
# Select an atom question and answer from the MMLU-STEM
elif selected_resource == 'mmlustem':
ds = load_dataset("TIGER-Lab/MMLU-STEM")
bt.logging.debug("Generating problem using MMLU-STEM dataset.")
data_set = ds['test']
bt.logging.info(f"Loaded MMLU-STEM dataset with {len(data_set['question'])} entries")
random_index = random.randint(0, len(data_set['question']) - 1)
question = data_set['question'][random_index]
answer_id = data_set['answer'][random_index]
answer_choice = data_set['choices'][random_index]
atom_question = f"Find the solution of this question:\n---\n{question}\n---\n"
atom_answer = answer_choice[answer_id]

return atom_problem
# Select an atom question and answer from the SAT Math
elif selected_resource == 'satmath':
ds = load_dataset("mcaleste/sat_multiple_choice_math_may_23")
bt.logging.debug("Generating problem using SAT Math dataset.")
data_set = ds['train']
bt.logging.info(f"Loaded SAT Math dataset with {len(data_set['Question'])} entries")
random_index = random.randint(0, len(data_set['Question']) - 1)
question = data_set['Question'][random_index]
possible_answers = data_set['Possible Answers'][random_index]
answer_id = data_set['Answer'][random_index]
atom_question = f"Find the solution of this question:\n---\n{question}\n---\n"
atom_answer = self.get_answer_value(possible_answers, answer_id)

def get_revised_math_question(self, math_problem: str, conditions: dict) -> str:
except Exception as e:
bt.logging.error(f"Error accessing dataset {selected_resource}: {e}. Attempting to load an alternative dataset.")
# Retry with a different dataset
return self.get_atom_logic_problem()

return atom_question, atom_answer

def get_revised_logic_question(self, logic_question: str, conditions: dict) -> str:
# prompt = "Please paraphrase by adding word or expression to this question as if you were a {profile} who is {mood} and write in a {tone} tone. You can use incorrect grammar, typo or add more context! Don't add your solution! Just say the revised version, you don't need to be polite.".format(
# **conditions
# )
Expand All @@ -58,7 +158,6 @@ def get_revised_math_question(self, math_problem: str, conditions: dict) -> str:
"Do not include the solution or add unnecessary context."
).format(**conditions)

bt.logging.debug(f"Revising prompt: {prompt}")

# messages = [
# {
Expand All @@ -81,7 +180,7 @@ def get_revised_math_question(self, math_problem: str, conditions: dict) -> str:
"ensuring the question sounds natural and appropriate for that individual."
),
},
{"role": "assistant", "content": math_problem},
{"role": "assistant", "content": logic_question},
{"role": "user", "content": prompt},
]

Expand All @@ -93,6 +192,13 @@ def get_revised_math_question(self, math_problem: str, conditions: dict) -> str:
)

response = response.choices[0].message.content.strip()
bt.logging.debug(f"Generated revised math question: {response}")
return response



def get_answer_value(self, possible_answers, answer):
# Get the value of the answer from the possible answers
options = possible_answers.split()
for i, option in enumerate(options):
if option.startswith(answer + ")"):
return options[i + 1]
return None # Return None if the answer is not found
3 changes: 1 addition & 2 deletions logicnet/validator/miner_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ def get_miner_uids(self, category: str):
"""
Get miner uids based on category, useful if subnet has multiple categories
"""
print(self.all_uids_info)
available_uids = [
int(uid)
for uid in self.all_uids_info.keys()
Expand Down Expand Up @@ -181,4 +180,4 @@ def get_model_specific_weights(self, category, normalize=True):
# Normalizing the tensor
if tensor_sum > 0:
model_specific_weights = model_specific_weights / tensor_sum
return model_specific_weights
return model_specific_weights
16 changes: 7 additions & 9 deletions neurons/validator/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def init_category(config=None):
config.llm_client.base_url,
config.llm_client.key,
config.llm_client.model,
config.dataset_weight,
),
"rewarder": LogicRewarder(
config.llm_client.base_url,
Expand Down Expand Up @@ -126,11 +127,6 @@ def forward(self):
# Update scores on chain
self.update_scores_on_chain()
self.save_state()
bt.logging.info(
"\033[1;32m✅ Loop completed, uids info:\n"
+ str(self.miner_manager.all_uids_info).replace("},", "},\n")
+ "\033[0m"
)
self.store_miner_infomation()

actual_time_taken = time.time() - loop_start
Expand Down Expand Up @@ -162,6 +158,7 @@ def async_query_and_reward(
continue
base_synapse = synapse.copy()
synapse = synapse.miner_synapse()
bt.logging.info(f"\033[1;34m🧠 Synapse to be sent to miners: {synapse}\033[0m")
axons = [self.metagraph.axons[int(uid)] for uid in uids]
bt.logging.debug(f"\033[1;34m🧠 Axon: {axons}\033[0m")
responses = dendrite.query(
Expand All @@ -170,9 +167,10 @@ def async_query_and_reward(
deserialize=False,
timeout=self.categories[category]["timeout"],
)
bt.logging.debug(
f"\033[1;34m🧠 Miner response: {responses[0].logic_answer}\033[0m"
)
# for response, uid in zip(responses, uids):
# bt.logging.debug(
# f"\033[1;34m🧠 Miner response for {uid}: {response.logic_answer}\033[0m"
# )
reward_responses = [
response
for response, should_reward in zip(responses, should_rewards)
Expand Down Expand Up @@ -391,4 +389,4 @@ def convert_to_serializable(data):
with Validator() as validator:
while True:
bt.logging.info("\033[1;32m🟢 Validator running...\033[0m", time.time())
time.sleep(360)
time.sleep(360)
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ sentence-transformers==3.0.1
python-dotenv==1.0.1
sympy
wandb
datasets
git+https://github.com/lukew3/mathgenerator.git