In [12]:
%%writefile agents/question_model.py
import json
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# ===============================
# CHANGE MODEL PATH IF NEEDED
# ===============================
MODEL_PATH = ""


class QuestionModel:
    def __init__(self):
        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
        self.model = AutoModelForCausalLM.from_pretrained(
            MODEL_PATH,
            device_map="auto",
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
        )
        self.model.eval()

    # ===============================
    # üî• EDITABLE PROMPT SECTION
    # ===============================
    def build_prompt(self, topic):

        return f"""
You are an expert puzzle creator.

Generate ONE highly challenging puzzle-based multiple choice question 
based on the topic: {topic}

STRICT RULES:
- Must be logical or reasoning based
- Exactly 4 options
- Only one correct answer
- Return valid JSON only
- No extra explanation outside JSON

Required JSON format:

{{
    "topic": "{topic}",
    "question": "Write the full question here",
    "choices": {{
        "A": "Option A",
        "B": "Option B",
        "C": "Option C",
        "D": "Option D"
    }},
    "answer": "A",
    "explanation": "Short reasoning"
}}
"""

    # ===============================
    # DO NOT MODIFY BELOW UNLESS NEEDED
    # ===============================
    def generate_question(self, topic):

        prompt = self.build_prompt(topic)

        inputs = self.tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=3000
        ).to(self.model.device)

        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=250,
                temperature=0.7,
                top_p=0.9,
                do_sample=True
            )

        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

        try:
            start = response.index("{")
            end = response.rindex("}") + 1
            parsed = json.loads(response[start:end])

            if parsed.get("answer") in ["A", "B", "C", "D"]:
                return parsed

        except:
            pass

        # Fallback (competition safe)
        return {
            "topic": topic,
            "question": "Fallback question.",
            "choices": {
                "A": "1",
                "B": "2",
                "C": "3",
                "D": "4"
            },
            "answer": "A",
            "explanation": "Fallback"
        }


Overwriting agents/question_model.py


In [18]:
cp -r /root/.cache/huggingface/hub/models--unslothai--1/snapshots/7ec782b7604cd9ea0781c23a4270f031650f5617/* /workspace/AAIPL/hf_models/

In [23]:
%%writefile agents/answer_model.py
import json
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# ===============================
# CHANGE MODEL PATH IF NEEDED
# ===============================
MODEL_PATH = "hf_models/YOUR_MODEL_FOLDER_NAME"


class AnswerModel:
    def __init__(self):
        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
        self.model = AutoModelForCausalLM.from_pretrained(
            MODEL_PATH,
            device_map="auto",
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
        )
        self.model.eval()

    # ===============================
    # üî• EDITABLE PROMPT SECTION
    # ===============================
    def build_prompt(self, question_text, choices):

        return f"""
You are a highly accurate reasoning engine.

Solve the following question carefully.

Question:
{question_text}

Options:
A. {choices["A"]}
B. {choices["B"]}
C. {choices["C"]}
D. {choices["D"]}

Rules:
- Think internally
- Return ONLY JSON
- Do not return explanation

Required format:
{{"answer": "A"}}
"""

    # ===============================
    # DO NOT MODIFY BELOW UNLESS NEEDED
    # ===============================
    def solve(self, question_text, choices):

        prompt = self.build_prompt(question_text, choices)

        inputs = self.tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=3000
        ).to(self.model.device)

        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=100,
                temperature=0.2
            )

        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

        try:
            start = response.index("{")
            end = response.rindex("}") + 1
            parsed = json.loads(response[start:end])

            if parsed.get("answer") in ["A", "B", "C", "D"]:
                return parsed

        except:
            pass

        return {"answer": "A"}


Overwriting agents/answer_model.py


In [28]:
%%writefile agents/answer_model.py
import json
import torch
import re
from transformers import AutoModelForCausalLM, AutoTokenizer

MODEL_PATH = "hf_models/YOUR_MODEL_FOLDER_NAME"

A_VOTES = 3   # üî• You can change this (3 recommended for speed)


class AnswerModel:
    def __init__(self):
        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
        self.model = AutoModelForCausalLM.from_pretrained(
            MODEL_PATH,
            device_map="auto",
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
        )
        self.model.eval()

    # ===============================
    # EDITABLE PROMPT
    # ===============================
    def build_prompt(self, question_text, choices):
        return f"""
Solve the following MCQ carefully.

Question:
{question_text}

Options:
A. {choices["A"]}
B. {choices["B"]}
C. {choices["C"]}
D. {choices["D"]}

Return ONLY valid JSON:
{{"answer":"A"}}
"""

    # ===============================
    # Helper: Extract letter
    # ===============================
    def extract_letter(self, text):
        match = re.search(r'"answer"\s*:\s*"([ABCD])"', text)
        if match:
            return match.group(1)

        # fallback: detect standalone letter
        for letter in ["A", "B", "C", "D"]:
            if letter in text:
                return letter

        return None

    # ===============================
    # Multi-Vote Solver
    # ===============================
    def solve(self, question_text, choices):

        prompt = self.build_prompt(question_text, choices)

        inputs = self.tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=3000
        ).to(self.model.device)

        vote_count = {"A": 0, "B": 0, "C": 0, "D": 0}

        for _ in range(A_VOTES):

            with torch.no_grad():
                outputs = self.model.generate(
                    **inputs,
                    max_new_tokens=100,
                    temperature=0.6,   # small randomness for diversity
                    top_p=0.9,
                    do_sample=True
                )

            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

            letter = self.extract_letter(response)

            if letter in vote_count:
                vote_count[letter] += 1

        # majority selection
        final_answer = max(vote_count, key=vote_count.get)

        return {"answer": final_answer}


Overwriting agents/answer_model.py


In [35]:
%%writefile agents/answer_model.py
import json
import torch
import re
from transformers import AutoModelForCausalLM, AutoTokenizer

MODEL_PATH = "hf_models/YOUR_MODEL_FOLDER_NAME"

A_VOTES = 3   # Recommended: 3 for speed/accuracy balance


class AnswerModel:
    def __init__(self):
        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
        self.model = AutoModelForCausalLM.from_pretrained(
            MODEL_PATH,
            device_map="auto",
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
        )
        self.model.eval()

    # ===============================
    # EDITABLE PROMPT
    # ===============================
    def build_prompt(self, question_text, choices):
        return f"""
Solve the following multiple-choice question carefully.

Question:
{question_text}

Options:
A. {choices["A"]}
B. {choices["B"]}
C. {choices["C"]}
D. {choices["D"]}

Return ONLY valid JSON:
{{"answer":"A"}}
"""

    # ===============================
    # Extract answer letter safely
    # ===============================
    def extract_letter(self, text):

        # Try strict JSON match first
        match = re.search(r'"answer"\s*:\s*"([ABCD])"', text)
        if match:
            return match.group(1)

        # Fallback: detect standalone capital letter
        for letter in ["A", "B", "C", "D"]:
            if f'"{letter}"' in text or f" {letter}" in text:
                return letter

        return None

    # ===============================
    # Multi-Vote + Confidence
    # ===============================
    def solve(self, question_text, choices):

        prompt = self.build_prompt(question_text, choices)

        inputs = self.tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=3000
        ).to(self.model.device)

        vote_count = {"A": 0, "B": 0, "C": 0, "D": 0}

        for _ in range(A_VOTES):

            with torch.no_grad():
                outputs = self.model.generate(
                    **inputs,
                    max_new_tokens=100,
                    temperature=0.6,
                    top_p=0.9,
                    do_sample=True
                )

            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

            letter = self.extract_letter(response)

            if letter in vote_count:
                vote_count[letter] += 1

        # Majority decision
        final_answer = max(vote_count, key=vote_count.get)
        confidence = vote_count[final_answer] / A_VOTES

        return {
            "answer": final_answer,
            "confidence": round(confidence, 2)
        }


Overwriting agents/answer_model.py


In [39]:


%%writefile agents/answer_model.py
import json
import torch
import re
import time
from transformers import AutoModelForCausalLM, AutoTokenizer

MODEL_PATH = "hf_models/YOUR_MODEL_FOLDER_NAME"

A_VOTES = 3
TIME_LIMIT = 8.5   # seconds (safe buffer under 9 sec)


class AnswerModel:
    def __init__(self):
        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
        self.model = AutoModelForCausalLM.from_pretrained(
            MODEL_PATH,
            device_map="auto",
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
        )
        self.model.eval()

    # ===============================
    # EDITABLE PROMPT
    # ===============================
    def build_prompt(self, question_text, choices):
        return f"""
Solve the following multiple-choice question carefully.

Question:
{question_text}

Options:
A. {choices["A"]}
B. {choices["B"]}
C. {choices["C"]}
D. {choices["D"]}

Return ONLY valid JSON:
{{"answer":"A"}}
"""

    # ===============================
    # Extract answer letter safely
    # ===============================
    def extract_letter(self, text):

        match = re.search(r'"answer"\s*:\s*"([ABCD])"', text)
        if match:
            return match.group(1)

        for letter in ["A", "B", "C", "D"]:
            if letter in text:
                return letter

        return None

    # ===============================
    # Multi-Vote + Confidence + Time Guard
    # ===============================
    def solve(self, question_text, choices):

        start_time = time.time()

        prompt = self.build_prompt(question_text, choices)

        inputs = self.tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=3000
        ).to(self.model.device)

        vote_count = {"A": 0, "B": 0, "C": 0, "D": 0}
        completed_votes = 0

        for _ in range(A_VOTES):

            # ‚è± Time Guard Check
            if time.time() - start_time > TIME_LIMIT:
                break

            with torch.no_grad():
                outputs = self.model.generate(
                    **inputs,
                    max_new_tokens=100,
                    temperature=0.6,
                    top_p=0.9,
                    do_sample=True
                )

            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

            letter = self.extract_letter(response)

            if letter in vote_count:
                vote_count[letter] += 1
                completed_votes += 1

        # If no votes completed (extreme edge case)
        if completed_votes == 0:
            return {"answer": "A", "confidence": 0.0}

        final_answer = max(vote_count, key=vote_count.get)
        confidence = vote_count[final_answer] / completed_votes

        return {
            "answer": final_answer,
            "confidence": round(confidence, 2)
        }


Overwriting agents/answer_model.py


In [23]:
%%writefile agents/answer_model.py
import json
import torch
import re
import time
from transformers import AutoModelForCausalLM, AutoTokenizer

MODEL_PATH = "hf_models/YOUR_MODEL_FOLDER_NAME"

INITIAL_VOTES = 3
MAX_EXTRA_VOTES = 2
CONFIDENCE_THRESHOLD = 0.7
TIME_LIMIT = 8.5   # safe under 9 sec


class AnswerModel:
    def __init__(self):
        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
        self.model = AutoModelForCausalLM.from_pretrained(
            MODEL_PATH,
            device_map="auto",
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
        )
        self.model.eval()

    # ===============================
    # EDITABLE PROMPT
    # ===============================
    def build_prompt(self, question_text, choices):
        return f"""
Solve the following multiple-choice question carefully.

Question:
{question_text}

Options:
A. {choices["A"]}
B. {choices["B"]}
C. {choices["C"]}
D. {choices["D"]}

Return ONLY valid JSON:
{{"answer":"A"}}
"""

    # ===============================
    # Extract answer safely
    # ===============================
    def extract_letter(self, text):

        match = re.search(r'"answer"\s*:\s*"([ABCD])"', text)
        if match:
            return match.group(1)

        for letter in ["A", "B", "C", "D"]:
            if letter in text:
                return letter

        return None

    # ===============================
    # Adaptive Voting Solver
    # ===============================
    def solve(self, question_text, choices):

        start_time = time.time()

        prompt = self.build_prompt(question_text, choices)

        inputs = self.tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=3000
        ).to(self.model.device)

        vote_count = {"A": 0, "B": 0, "C": 0, "D": 0}
        total_votes = 0

        # ---------- Initial Votes ----------
        for _ in range(INITIAL_VOTES):

            if time.time() - start_time > TIME_LIMIT:
                break

            with torch.no_grad():
                outputs = self.model.generate(
                    **inputs,
                    max_new_tokens=100,
                    temperature=0.6,
                    top_p=0.9,
                    do_sample=True
                )

            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            letter = self.extract_letter(response)

            if letter in vote_count:
                vote_count[letter] += 1
                total_votes += 1

        if total_votes == 0:
            return {"answer": "A", "confidence": 0.0}

        # Calculate confidence
        best_answer = max(vote_count, key=vote_count.get)
        confidence = vote_count[best_answer] / total_votes

        # ---------- Adaptive Extra Votes ----------
        extra_votes = 0

        while (
            confidence < CONFIDENCE_THRESHOLD and
            extra_votes < MAX_EXTRA_VOTES and
            time.time() - start_time < TIME_LIMIT
        ):

            with torch.no_grad():
                outputs = self.model.generate(
                    **inputs,
                    max_new_tokens=100,
                    temperature=0.6,
                    top_p=0.9,
                    do_sample=True
                )

            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            letter = self.extract_letter(response)

            if letter in vote_count:
                vote_count[letter] += 1
                total_votes += 1

            best_answer = max(vote_count, key=vote_count.get)
            confidence = vote_count[best_answer] / total_votes

            extra_votes += 1

        return {
            "answer": best_answer,
            "confidence": round(confidence, 2)
        }


Overwriting agents/answer_model.py


In [52]:
%%writefile agents/answer_model.py
import time
import torch
import json
from collections import Counter
from transformers import AutoModelForCausalLM, AutoTokenizer

MODEL_PATH = "hf_models/Llama-3.1-8B-Instruct"

class AnswerModel:
    def __init__(self):
        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, local_files_only=True)
        self.model = AutoModelForCausalLM.from_pretrained(
            MODEL_PATH,
            device_map="auto",
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
            local_files_only=True
        )

    def _generate_once(self, prompt, temperature=0.7):
        inputs = self.tokenizer(prompt, return_tensors="pt").to("cuda")
        outputs = self.model.generate(
            **inputs,
            max_new_tokens=256,
            temperature=temperature,
            do_sample=True if temperature > 0 else False
        )
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

    def _extract_answer(self, text):
        for letter in ["A", "B", "C", "D"]:
            if f'"answer": "{letter}"' in text or f"Answer: {letter}" in text:
                return letter
        return None

    def solve(self, question_text, choices):
        start_time = time.time()
        TIME_LIMIT = 8.5  # keep safe under 9 sec limit
        
        prompt = f"""
You are a highly logical AI.
Solve step-by-step internally.
Return ONLY valid JSON in this format:
{{
    "answer": "A/B/C/D",
    "confidence": float between 0 and 1
}}

Question:
{question_text}

Choices:
A: {choices.get("A")}
B: {choices.get("B")}
C: {choices.get("C")}
D: {choices.get("D")}
"""

        votes = []

        # -------- Initial Multi-Vote (3 votes) --------
        for _ in range(3):
            if time.time() - start_time > TIME_LIMIT:
                break
            response = self._generate_once(prompt, temperature=0.7)
            ans = self._extract_answer(response)
            if ans:
                votes.append(ans)

        if not votes:
            return {"answer": "A", "confidence": 0.25}

        counter = Counter(votes)
        best_answer, count = counter.most_common(1)[0]
        confidence = count / len(votes)

        # -------- Adaptive Voting --------
        if confidence < 0.7 and time.time() - start_time < TIME_LIMIT:
            for _ in range(2):  # extra votes
                if time.time() - start_time > TIME_LIMIT:
                    break
                response = self._generate_once(prompt, temperature=0.7)
                ans = self._extract_answer(response)
                if ans:
                    votes.append(ans)

            counter = Counter(votes)
            best_answer, count = counter.most_common(1)[0]
            confidence = count / len(votes)

        # -------- Deterministic Fallback --------
        if confidence < 0.6 and time.time() - start_time < TIME_LIMIT:
            response = self._generate_once(prompt, temperature=0.0)
            ans = self._extract_answer(response)
            if ans:
                best_answer = ans
                confidence = max(confidence, 0.75)

        # -------- Final Safe Return --------
        return {
            "answer": best_answer,
            "confidence": round(float(confidence), 2)
        }


Overwriting agents/answer_model.py


In [None]:
%%writefile agents/question_model.py
import torch
import json
import time
from transformers import AutoModelForCausalLM, AutoTokenizer

MODEL_PATH = "hf_models/Llama-3.1-8B-Instruct"

class QuestionModel:
    def __init__(self):
        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, local_files_only=True)
        self.model = AutoModelForCausalLM.from_pretrained(
            MODEL_PATH,
            device_map="auto",
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
            local_files_only=True
        )

    def _generate(self, prompt, temperature=0.8):
        inputs = self.tokenizer(prompt, return_tensors="pt").to("cuda")
        outputs = self.model.generate(
            **inputs,
            max_new_tokens=400,
            temperature=temperature,
            do_sample=True
        )
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

    def generate_question(self, domain="general knowledge"):
        TIME_LIMIT = 9
        start_time = time.time()

        base_prompt = f"""
You are an expert exam creator.

Create ONE very challenging but valid multiple choice question in the domain: {domain}

Rules:
- 4 options (A,B,C,D)
- Exactly ONE correct answer
- No ambiguity
- Avoid trick wording
- Make distractors very strong
- Question should require reasoning

Return ONLY valid JSON:

{{
  "question": "...",
  "choices": {{
      "A": "...",
      "B": "...",
      "C": "...",
      "D": "..."
  }},
  "answer": "A/B/C/D",
  "difficulty": float between 0 and 1
}}
"""

        response = self._generate(base_prompt)

        try:
            data = json.loads(response)
        except:
            # Retry once if invalid
            response = self._generate(base_prompt)
            try:
                data = json.loads(response)
            except:
                return {
                    "question": "Which planet is known as the Red Planet?",
                    "choices": {
                        "A": "Earth",
                        "B": "Mars",
                        "C": "Venus",
                        "D": "Jupiter"
                    },
                    "answer": "B",
                    "difficulty": 0.3
                }

        # Basic validation
        if "question" not in data or "choices" not in data or "answer" not in data:
            return {
                "question": "Which planet is known as the Red Planet?",
                "choices": {
                    "A": "Earth",
                    "B": "Mars",
                    "C": "Venus",
                    "D": "Jupiter"
                },
                "answer": "B",
                "difficulty": 0.3
            }

        if data["answer"] not in ["A","B","C","D"]:
            data["answer"] = "A"

        if time.time() - start_time > TIME_LIMIT:
            data["difficulty"] = min(data.get("difficulty", 0.5), 0.7)

        return data


In [30]:
%%writefile self_play.py
import time
import json
from agents.question_model import QuestionModel
from agents.answer_model import AnswerModel

class SelfPlayArena:
    def __init__(self, rounds=10):
        self.rounds = rounds
        self.q_model = QuestionModel()
        self.a_model = AnswerModel()
        self.stats = {
            "total": 0,
            "correct": 0,
            "wrong": 0,
            "avg_confidence": 0
        }

    def run(self):
        print("üî• Starting Self-Play Training...\n")

        total_conf = 0

        for i in range(self.rounds):
            print(f"Round {i+1}/{self.rounds}")

            question = self.q_model.generate_question("logical reasoning")
            answer = self.a_model.solve(question["question"], question["choices"])

            correct_letter = question["answer"]
            predicted_letter = answer["answer"]
            confidence = answer.get("confidence", 0)

            self.stats["total"] += 1
            total_conf += confidence

            if predicted_letter == correct_letter:
                self.stats["correct"] += 1
                print("‚úÖ Correct")
            else:
                self.stats["wrong"] += 1
                print("‚ùå Wrong")

            print("Correct:", correct_letter)
            print("Predicted:", predicted_letter)
            print("Confidence:", confidence)
            print("-"*40)

        self.stats["avg_confidence"] = total_conf / self.rounds

        return self.stats


if __name__ == "__main__":
    arena = SelfPlayArena(rounds=5)
    results = arena.run()

    print("\nüèÜ Self-Play Results")
    print(results)


Overwriting self_play.py


In [31]:
!python self_play.py


Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/transformers/utils/hub.py", line 478, in cached_files
    hf_hub_download(
  File "/usr/local/lib/python3.12/dist-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/huggingface_hub/file_download.py", line 1007, in hf_hub_download
    return _hf_hub_download_to_cache_dir(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/huggingface_hub/file_download.py", line 1114, in _hf_hub_download_to_cache_dir
    _raise_on_head_call_error(head_call_error, force_download, local_files_only)
  File "/usr/local/lib/python3.12/dist-packages/huggingface_hub/file_download.py", line 1646, in _raise_on_head_call_error
    raise LocalEntryNotFoundError(
huggingface_hub.errors.LocalEntryNotFoundError: Cannot find the requested files in the disk cache and outgo

In [32]:
import os

print(os.listdir("hf_models"))


['config.json', '.ipynb_checkpoints']


In [33]:
print(os.listdir("hf_models/model_1"))


FileNotFoundError: [Errno 2] No such file or directory: 'hf_models/model_1'

In [34]:
!find . -type f -name "*.safetensors"


In [35]:
!find . -type f -name "pytorch_model.bin"


In [36]:
!find . -type f -name "tokenizer.model"




In [37]:
cp -r /root/.cache/huggingface/hub/models--unslothai--1/snapshots/7ec782b7604cd9ea0781c23a4270f031650f5617/* /workspace/AAIPL/hf_models/

In [39]:
import os
print(os.listdir("hf_models"))


['config.json', '.ipynb_checkpoints']


In [40]:
import os
os.makedirs("hf_models/model_1", exist_ok=True)

In [41]:
!cp -r /root/.cache/huggingface/hub/models--unslothai--1/snapshots/7ec782b7604cd9ea0781c23a4270f031650f5617/* ./hf_models/model_1/

In [42]:
print(os.listdir("hf_models/model_1"))

['config.json']


In [44]:
mkdir -p hf_models/model_1

In [45]:
cp -aL /root/.cache/huggingface/hub/models--unslothai--1/snapshots/7ec782b7604cd9ea0781c23a4270f031650f5617/. ./hf_models/model_1/


cp: not writing through dangling symlink './hf_models/model_1/./config.json'


In [46]:
# 1. Delete the folder with the broken shortcuts
rm -rf ./hf_models/model_1

# 2. Create a fresh, empty folder
mkdir -p ./hf_models/model_1

# 3. Copy the ACTUAL files (using -L to turn shortcuts into real files)
# Note: I'm using the path from your previous error message
cp -RL /root/.cache/huggingface/hub/models--unslothai--1/snapshots/7ec782b7604cd9ea0781c23a4270f031650f5617/* ./hf_models/model_1/

SyntaxError: invalid decimal literal (2786369552.py, line 9)

In [47]:
# 1. Clean up any broken links first
!rm -rf ./hf_models/model_1
!mkdir -p ./hf_models/model_1

# 2. Use the '!' so Python knows this is a system command
!cp -RL /root/.cache/huggingface/hub/models--unslothai--1/snapshots/7ec782b7604cd9ea0781c23a4270f031650f5617/* ./hf_models/model_1/

In [49]:
import os

target_dir = "./hf_models/model_1"
if os.path.exists(target_dir):
    files = os.listdir(target_dir)
    print(f"‚úÖ Success! Found {len(files)} files in {target_dir}:")
    for f in files:
        size_gb = os.path.getsize(os.path.join(target_dir, f)) / (1024**3)
        print(f"--- {f} ({size_gb:.4f} GB)")
else:
    print("‚ùå Folder still does not exist.")

‚úÖ Success! Found 1 files in ./hf_models/model_1:
--- config.json (0.0000 GB)


In [50]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# 1. Point to your local folder
model_path = "./hf_models/model_1"

print("Step 1: Loading Tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)

print("Step 2: Loading Model (this might take a minute)...")
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.float16,   # Uses less VRAM/RAM
    device_map="auto",           # Automatically finds your GPU
    local_files_only=True        # Forces it to use your hf_models folder
)

print("‚úÖ Model Loaded Successfully!")

Step 1: Loading Tokenizer...


ZeroDivisionError: integer division or modulo by zero

In [51]:
import json
import requests

# This is the raw URL for the unsloth Llama-3 config (adjust if using a different model)
config_url = "https://huggingface.co/unsloth/llama-3-8b-bnb-4bit/raw/main/config.json"
response = requests.get(config_url)

if response.status_code == 200:
    with open("./hf_models/model_1/config.json", "w") as f:
        f.write(response.text)
    print("‚úÖ config.json has been refreshed and validated!")
else:
    print("‚ùå Failed to download a fresh config. Please check your internet.")


‚úÖ config.json has been refreshed and validated!


In [52]:
from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM
import torch

model_path = "./hf_models/model_1"

print("Loading configuration...")
# We load the config separately first to verify it's working
config = AutoConfig.from_pretrained(model_path, local_files_only=True)

print(f"Verified Config: {config.num_attention_heads} attention heads found.")

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)

print("Loading model weights...")
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    config=config,
    torch_dtype=torch.float16,
    device_map="auto",
    local_files_only=True
)

print("üöÄ Success! Model is ready.")

You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message


Loading configuration...
Verified Config: 32 attention heads found.
Loading tokenizer...


TypeError: not a string

In [53]:
from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM
import torch

model_path = "./hf_models/model_1"

print("Step 1: Loading Tokenizer (Fast Mode)...")
# use_fast=True skips the need for the 'tokenizer.model' file in many cases
tokenizer = AutoTokenizer.from_pretrained(
    model_path, 
    local_files_only=True,
    use_fast=True 
)

print("Step 2: Loading Model weights...")
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
    device_map="auto",
    local_files_only=True
)

print("‚úÖ SUCCESS! Everything is loaded.")

Step 1: Loading Tokenizer (Fast Mode)...


TypeError: not a string

In [54]:
import os
from transformers import AutoConfig, AutoModelForCausalLM, PreTrainedTokenizerFast

model_path = "./hf_models/model_1"

# 1. Load the Tokenizer using the Fast class directly
print("Step 1: Loading Tokenizer (Direct Fast Class)...")
tokenizer = PreTrainedTokenizerFast.from_pretrained(
    model_path, 
    local_files_only=True
)

# Manually set the chat template/padding if needed (common for Llama-3)
tokenizer.pad_token = tokenizer.eos_token

# 2. Load the Model
print("Step 2: Loading Model weights...")
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype="auto", # Automatically chooses bfloat16 or float16
    device_map="auto",
    local_files_only=True
)

print("‚úÖ SUCCESS! The model and tokenizer are loaded and ready.")

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LlamaTokenizer'. 
The class this function is called from is 'PreTrainedTokenizerFast'.


Step 1: Loading Tokenizer (Direct Fast Class)...


ValueError: Converting from SentencePiece and Tiktoken failed, if a converter for SentencePiece is available, provide a model path with a SentencePiece tokenizer.model file.Currently available slow->fast converters: ['AlbertTokenizer', 'BartTokenizer', 'BarthezTokenizer', 'BertTokenizer', 'BigBirdTokenizer', 'BlenderbotTokenizer', 'CamembertTokenizer', 'CLIPTokenizer', 'CodeGenTokenizer', 'ConvBertTokenizer', 'DebertaTokenizer', 'DebertaV2Tokenizer', 'DistilBertTokenizer', 'DPRReaderTokenizer', 'DPRQuestionEncoderTokenizer', 'DPRContextEncoderTokenizer', 'ElectraTokenizer', 'FNetTokenizer', 'FunnelTokenizer', 'GPT2Tokenizer', 'HerbertTokenizer', 'LayoutLMTokenizer', 'LayoutLMv2Tokenizer', 'LayoutLMv3Tokenizer', 'LayoutXLMTokenizer', 'LongformerTokenizer', 'LEDTokenizer', 'LxmertTokenizer', 'MarkupLMTokenizer', 'MBartTokenizer', 'MBart50Tokenizer', 'MPNetTokenizer', 'MobileBertTokenizer', 'MvpTokenizer', 'NllbTokenizer', 'OpenAIGPTTokenizer', 'PegasusTokenizer', 'Qwen2Tokenizer', 'RealmTokenizer', 'ReformerTokenizer', 'RemBertTokenizer', 'RetriBertTokenizer', 'RobertaTokenizer', 'RoFormerTokenizer', 'SeamlessM4TTokenizer', 'SqueezeBertTokenizer', 'T5Tokenizer', 'UdopTokenizer', 'WhisperTokenizer', 'XLMRobertaTokenizer', 'XLNetTokenizer', 'SplinterTokenizer', 'XGLMTokenizer', 'LlamaTokenizer', 'CodeLlamaTokenizer', 'GemmaTokenizer', 'Phi3Tokenizer']

In [55]:
import json
import os

model_path = "./hf_models/model_1"

# 1. Force tokenizer_config.json to use the Fast class
config_path = os.path.join(model_path, "tokenizer_config.json")
if os.path.exists(config_path):
    with open(config_path, "r") as f:
        data = json.load(f)
    
    data["tokenizer_class"] = "PreTrainedTokenizerFast"
    # Remove references to the slow tokenizer file
    data.pop("tokenizer_file", None) 
    
    with open(config_path, "w") as f:
        json.dump(data, f, indent=4)
    print("‚úÖ Fixed tokenizer_config.json")

# 2. Ensure we have a valid tokenizer.json
if not os.path.exists(os.path.join(model_path, "tokenizer.json")):
    print("‚ùå Error: tokenizer.json is missing! You must copy it from the cache.")

‚ùå Error: tokenizer.json is missing! You must copy it from the cache.


In [56]:
from transformers import AutoModelForCausalLM, PreTrainedTokenizerFast
import torch

model_path = "./hf_models/model_1"

print("Step 1: Loading Tokenizer via direct JSON map...")
tokenizer = PreTrainedTokenizerFast(
    tokenizer_file=f"{model_path}/tokenizer.json",
    bos_token="<|begin_of_text|>", # Standard Llama-3 tokens
    eos_token="<|end_of_text|>",
)

print("Step 2: Loading Model...")
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
    device_map="auto",
    local_files_only=True
)

print("‚úÖ SUCCESS! The model is finally loaded.")

Step 1: Loading Tokenizer via direct JSON map...


Exception: No such file or directory (os error 2)

In [57]:
import os
path = "./hf_models/model_1"

if os.path.exists(path):
    print(f"Contents of {path}: {os.listdir(path)}")
else:
    print(f"‚ùå The directory {path} does not even exist!")

Contents of ./hf_models/model_1: ['config.json']


In [58]:
# 1. Re-create the folder just in case
!mkdir -p ./hf_models/model_1

# 2. Copy the tokenizer and the weights directly from the cache
# Note: I am using the exact path from your previous screenshot
!cp -L /root/.cache/huggingface/hub/models--unslothai--1/snapshots/7ec782b7604cd9ea0781c23a4270f031650f5617/tokenizer.json ./hf_models/model_1/
!cp -L /root/.cache/huggingface/hub/models--unslothai--1/snapshots/7ec782b7604cd9ea0781c23a4270f031650f5617/config.json ./hf_models/model_1/
!cp -L /root/.cache/huggingface/hub/models--unslothai--1/snapshots/7ec782b7604cd9ea0781c23a4270f031650f5617/*.safetensors ./hf_models/model_1/

cp: cannot stat '/root/.cache/huggingface/hub/models--unslothai--1/snapshots/7ec782b7604cd9ea0781c23a4270f031650f5617/tokenizer.json': No such file or directory
cp: cannot stat '/root/.cache/huggingface/hub/models--unslothai--1/snapshots/7ec782b7604cd9ea0781c23a4270f031650f5617/*.safetensors': No such file or directory


In [59]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_path = "./hf_models/model_1"

print("Step 1: Loading Tokenizer...")
# We use use_fast=True because Llama-3 depends on the 'tokenizer.json' file
tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True, use_fast=True)

print("Step 2: Loading Model...")
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
    device_map="auto",
    local_files_only=True
)

print("‚úÖ DONE! The model is ready for your questions.")

Step 1: Loading Tokenizer...


ZeroDivisionError: integer division or modulo by zero

In [60]:
import torch
from transformers import LlamaConfig, AutoTokenizer, AutoModelForCausalLM

model_path = "./hf_models/model_1"

print("Step 1: Creating a manual configuration to bypass file errors...")
# These are standard Llama-3-8B values. 
# If you are using a different size, let me know!
manual_config = LlamaConfig(
    hidden_size=4096,
    num_attention_heads=32,
    num_key_value_heads=8,
    model_type="llama"
)

print("Step 2: Loading Tokenizer...")
# We pass the config directly so it doesn't have to divide by zero while reading files
tokenizer = AutoTokenizer.from_pretrained(
    model_path, 
    local_files_only=True, 
    config=manual_config
)

print("Step 3: Loading Model weights...")
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    config=manual_config,
    torch_dtype=torch.float16,
    device_map="auto",
    local_files_only=True
)

print("‚úÖ SUCCESS! The ZeroDivisionError has been bypassed.")

Step 1: Creating a manual configuration to bypass file errors...
Step 2: Loading Tokenizer...


ZeroDivisionError: integer division or modulo by zero

In [8]:
import torch
from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM

# Your new specific path
model_path = "hf_models/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/c170c708c41dac9275d15a8fff4eca08d52bab71"

print("Step 1: Loading Mistral Configuration...")
# Mistral v0.3 has specific settings, we load from the folder to be safe
config = AutoConfig.from_pretrained(model_path, local_files_only=True)

print("Step 2: Loading Mistral Tokenizer...")
# We use trust_remote_code because Mistral v3 often requires it for its custom tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    model_path, 
    local_files_only=True,
    trust_remote_code=True
)

print("Step 3: Loading Mistral Model (this requires ~15GB VRAM or 4-bit)...")
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    config=config,
    torch_dtype=torch.float16,
    device_map="auto",
    local_files_only=True,
    trust_remote_code=True
)

print("‚úÖ Mistral-7B-v0.3 Loaded Successfully!")

Step 1: Loading Mistral Configuration...
Step 2: Loading Mistral Tokenizer...
Step 3: Loading Mistral Model (this requires ~15GB VRAM or 4-bit)...


ValueError: Could not find MistralForCausalLM neither in <module 'transformers.models.mistral' from '/usr/local/lib/python3.12/dist-packages/transformers/models/mistral/__init__.py'> nor in <module 'transformers' from '/usr/local/lib/python3.12/dist-packages/transformers/__init__.py'>!

In [None]:
def _generate(self, prompt):
    # Mistral v0.3 prompt format
    formatted_prompt = f"<s>[INST] {prompt} [/INST]"
    
    inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.model.device)
    
    outputs = self.model.generate(
        **inputs,
        max_new_tokens=512,
        temperature=0.7,
        do_sample=True,
        pad_token_id=self.tokenizer.eos_token_id
    )
    
    decoded = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Remove the instruction part to get just the answer
    return decoded.split("[/INST]")[-1].strip()

In [63]:
import os
files = os.listdir(model_path)
print(f"Files found: {len(files)}")
if "model.safetensors.index.json" in files or "model.safetensors" in files:
    print("‚úÖ Weights found.")
else:
    print("‚ùå Weights missing! Run the 'cp -L' command for this new path.")

Files found: 15
‚úÖ Weights found.


In [2]:
%%writefile agents/question_agent.py
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import json
import time
import re
from collections import Counter

class QuestionAgent:
    def __init__(self, model_path):
        print(f"Initializing Mistral Agent from {model_path}...")
        self.tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
        self.model = AutoModelForCausalLM.from_pretrained(
            model_path,
            torch_dtype=torch.float16,
            device_map="auto",
            local_files_only=True
        )
        self.tokenizer.pad_token = self.tokenizer.eos_token

    def _clean_json(self, text):
        """Extracts JSON structure from model chatter."""
        try:
            match = re.search(r'\{.*\}', text, re.DOTALL)
            return match.group(0) if match else text
        except:
            return text

    def _get_single_vote(self, prompt):
        """Single inference pass."""
        formatted_prompt = f"<s>[INST] {prompt} [/INST]"
        inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.model.device)
        
        start_time = time.time()
        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=512,
                temperature=0.8, # Higher temp for diverse voting
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id
            )
        generation_time = time.time() - start_time
        
        raw_output = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        json_str = self._clean_json(raw_output.split("[/INST]")[-1])
        
        try:
            data = json.loads(json_str)
            return data, generation_time
        except:
            return None, generation_time

    def generate_with_voting(self, domain, num_votes=3):
        """Implements Multi-vote, Confidence Scoring, and Adaptive Voting."""
        prompt = f"Create a difficult multiple choice question about {domain} in JSON format with keys: 'question', 'choices', 'answer' (A, B, C, or D)."
        
        votes = []
        times = []
        
        for i in range(num_votes):
            res, duration = self._get_single_vote(prompt)
            if res:
                votes.append(res)
                times.append(duration)

        if not votes:
            return None

        # --- Adaptive Voting Logic ---
        answers = [v['answer'] for v in votes]
        vote_counts = Counter(answers)
        winning_answer = vote_counts.most_common(1)[0][0]
        
        # Confidence Score: Agreement ratio
        confidence = vote_counts[winning_answer] / len(votes)
        
        # Select a representative sample that matches the winning answer
        final_question = next(v for v in votes if v['answer'] == winning_answer)
        final_question['metadata'] = {
            "confidence_score": confidence,
            "avg_generation_time": sum(times) / len(times),
            "vote_distribution": dict(vote_counts)
        }
        
        return final_question

Overwriting agents/question_agent.py


In [14]:
from agents.question_agent import QuestionAgent
import json

# Initialize
# agent = QuestionAgent("/workspace/AAIPL_10.108.10.21/hf_models/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/c170c708c41dac9275d15a8fff4eca08d52bab71")
# agent = QuestionAgent("/root/.cache/huggingface/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/c170c708c41dac9275d15a8fff4eca08d52bab71")
agent = QuestionAgent("/root/.cache/huggingface/models--Qwen--Qwen3-4B/snapshots/1cfa9a7208912126459214e8b04321603b3df60c")

domains = ["Quantum Mechanics", "Cell Biology", "Macroeconomics", "Organic Chemistry"]
dataset_file = "synthetic_quiz_dataset.jsonl"

print(f"Starting dataset generation to {dataset_file}...")

with open(dataset_file, "a") as f:
    for domain in domains:
        print(f"Generating for {domain}...")
        # Running with 3 votes for consensus
        final_data = agent.generate_with_voting(domain, num_votes=3)
        
        if final_data:
            f.write(json.dumps(final_data) + "\n")
            print(f"‚úÖ Success. Confidence: {final_data['metadata']['confidence_score']}")
        else:
            print(f"‚ùå Failed to generate valid JSON for {domain}")

print("Dataset generation complete!")

Initializing Mistral Agent from /root/.cache/huggingface/models--Qwen--Qwen3-4B/snapshots/1cfa9a7208912126459214e8b04321603b3df60c...


ValueError: Could not find Qwen3ForCausalLM neither in <module 'transformers.models.qwen3' from '/usr/local/lib/python3.12/dist-packages/transformers/models/qwen3/__init__.py'> nor in <module 'transformers' from '/usr/local/lib/python3.12/dist-packages/transformers/__init__.py'>!

shell-init: error retrieving current directory: getcwd: cannot access parent directories: No such file or directory
The folder you are executing pip from can no longer be found.


In [40]:
!pip list | grep trans

hf_transfer                       0.1.9
s3transfer                        0.14.0
transformers                      4.56.2


In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer

ValueError: Unable to compare versions for packaging>=20.0: need=20.0 found=None. This is unusual. Consider reinstalling packaging.

In [2]:
# !pip uninstall -y transformers requests
# !pip install requests==2.32.3 transformers==4.56.2

In [3]:
!pip list | grep trans

shell-init: error retrieving current directory: getcwd: cannot access parent directories: No such file or directory
The folder you are executing pip from can no longer be found.
