In [1]:
# 1. CLEAN INSTALLATION (Fixes the "requests" metadata error)
print("Updating environment for Transformers 4.56.2...")
!pip uninstall -y transformers requests tokenizers accelerate
!pip install requests==2.32.3 transformers==4.56.2 tokenizers==0.21.0 accelerate==1.1.0 bitsandbytes==0.44.1

import os
import torch
import json
import re
import time
from collections import Counter

# 2. CREATE DIRECTORY
os.makedirs("agents", exist_ok=True)

# 3. WRITE THE AGENT FILE (%%writefile logic)
print("Writing agents/question_agent.py...")
with open("agents/question_agent.py", "w") as f:
    f.write('''
import torch
import json
import re
import time
from collections import Counter
from transformers import AutoModelForCausalLM, AutoTokenizer

class QuestionAgent:
    def __init__(self, model_path):
        print(f"Initializing Mistral v0.3 from: {model_path}")
        self.tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
        self.model = AutoModelForCausalLM.from_pretrained(
            model_path,
            torch_dtype=torch.float16,
            device_map="auto",
            local_files_only=True,
            load_in_4bit=True
        )
        self.tokenizer.pad_token = self.tokenizer.eos_token

    def _extract_json(self, text):
        """Fixes the .index('{') error by using robust regex matching."""
        try:
            # This looks for the content between the first { and last }
            match = re.search(r'\\{.*\\}', text, re.DOTALL)
            if match:
                return json.loads(match.group(0))
            return None
        except:
            return None

    def _get_vote(self, domain):
        prompt = f"<s>[INST] Create a difficult MCQ about {domain} in JSON format. Keys: 'question', 'choices' (A,B,C,D), 'answer'. [/INST]"
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
        
        start = time.time()
        with torch.no_grad():
            outputs = self.model.generate(**inputs, max_new_tokens=512, temperature=0.8, do_sample=True)
        latency = time.time() - start
        
        raw_output = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        # Extract everything after the Instruction tag
        clean_content = raw_output.split("[/INST]")[-1]
        
        return self._extract_json(clean_content), latency

    def generate_with_voting(self, domain, num_votes=3):
        votes, times = [], []
        for _ in range(num_votes):
            data, t = self._get_vote(domain)
            if data:
                votes.append(data)
                times.append(t)
        
        if not votes: return None

        # Adaptive Voting logic
        answers = [v.get('answer', 'A') for v in votes]
        winner = Counter(answers).most_common(1)[0][0]
        
        final_question = next((v for v in votes if v.get('answer') == winner), votes[0])
        final_question['metadata'] = {
            "confidence": Counter(answers)[winner] / len(votes),
            "avg_latency": sum(times) / len(times)
        }
        return final_question
''')

# 4. EXECUTE IMMEDIATELY
print("Loading Model... this may take a minute.")
from agents.question_agent import QuestionAgent

MODEL_PATH = "hf_models/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/c170c708c41dac9275d15a8fff4eca08d52bab71"

try:
    agent = QuestionAgent(MODEL_PATH)
    
    # Test it
    test_domain = "Quantum Cryptography"
    print(f"Generating consensus for: {test_domain}")
    result = agent.generate_with_voting(test_domain, num_votes=3)
    
    if result:
        print("\\n✅ SUCCESS!")
        print(json.dumps(result, indent=4))
except Exception as e:
    print(f"\\n❌ Error: {e}")

Updating environment for Transformers 4.56.2...
Found existing installation: transformers 4.56.2
Uninstalling transformers-4.56.2:
  Successfully uninstalled transformers-4.56.2
Found existing installation: requests 2.32.3
Uninstalling requests-2.32.3:
  Successfully uninstalled requests-2.32.3
Found existing installation: tokenizers 0.22.1
Uninstalling tokenizers-0.22.1:
  Successfully uninstalled tokenizers-0.22.1
Found existing installation: accelerate 1.11.0
Uninstalling accelerate-1.11.0:
  Successfully uninstalled accelerate-1.11.0
[0mCollecting requests==2.32.3
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting transformers==4.56.2
  Using cached transformers-4.56.2-py3-none-any.whl.metadata (40 kB)
Collecting tokenizers==0.21.0
  Downloading tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting accelerate==1.1.0
  Downloading accelerate-1.1.0-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes==0

ModuleNotFoundError: No module named 'transformers'

In [None]:
!pip install transformers 4.

In [2]:
# Force the installation of the specific ecosystem
!pip install --upgrade requests==2.32.3 transformers==4.56.2 tokenizers==0.21.0 accelerate==1.1.0 bitsandbytes==0.44.1

Collecting requests==2.32.3
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting transformers==4.56.2
  Using cached transformers-4.56.2-py3-none-any.whl.metadata (40 kB)
Collecting tokenizers==0.21.0
  Using cached tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting accelerate==1.1.0
  Using cached accelerate-1.1.0-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes==0.44.1
  Using cached bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
INFO: pip is looking at multiple versions of transformers to determine which version is compatible with other requirements. This could take a while.
[31mERROR: Cannot install tokenizers==0.21.0 and transformers==4.56.2 because these package versions have conflicting dependencies.[0m[31m
[0m
The conflict is caused by:
    The user requested tokenizers==0.21.0
    transformers 4.56.2 depends on tokenizers<=0.23.0 and >=0.22.0

Additionally, some 

In [3]:
# Force a clean install with the exact dependency tree required
!pip uninstall -y transformers requests tokenizers accelerate bitsandbytes
!pip install requests==2.32.3 \
             transformers==4.56.2 \
             tokenizers==0.22.2 \
             accelerate==1.1.0 \
             bitsandbytes==0.44.1

[0mFound existing installation: bitsandbytes 0.48.2.dev0
Uninstalling bitsandbytes-0.48.2.dev0:
  Successfully uninstalled bitsandbytes-0.48.2.dev0
[0mCollecting requests==2.32.3
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting transformers==4.56.2
  Using cached transformers-4.56.2-py3-none-any.whl.metadata (40 kB)
Collecting tokenizers==0.22.2
  Downloading tokenizers-0.22.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.3 kB)
Collecting accelerate==1.1.0
  Using cached accelerate-1.1.0-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes==0.44.1
  Using cached bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Using cached requests-2.32.3-py3-none-any.whl (64 kB)
Using cached transformers-4.56.2-py3-none-any.whl (11.6 MB)
Downloading tokenizers-0.22.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m43.3 MB/

In [4]:
# 1. Clean out the conflicting versions
!pip uninstall -y transformers requests tokenizers accelerate bitsandbytes fsspec

# 2. Install the 'Golden Trio' that satisfies Mistral + TRL + Datasets
!pip install requests==2.32.3 \
             transformers==4.56.2 \
             tokenizers==0.22.2 \
             accelerate==1.4.0 \
             fsspec==2025.9.0 \
             bitsandbytes==0.44.1

Found existing installation: transformers 4.56.2
Uninstalling transformers-4.56.2:
  Successfully uninstalled transformers-4.56.2
Found existing installation: requests 2.32.3
Uninstalling requests-2.32.3:
  Successfully uninstalled requests-2.32.3
Found existing installation: tokenizers 0.22.2
Uninstalling tokenizers-0.22.2:
  Successfully uninstalled tokenizers-0.22.2
Found existing installation: accelerate 1.1.0
Uninstalling accelerate-1.1.0:
  Successfully uninstalled accelerate-1.1.0
Found existing installation: bitsandbytes 0.44.1
Uninstalling bitsandbytes-0.44.1:
  Successfully uninstalled bitsandbytes-0.44.1
Found existing installation: fsspec 2026.2.0
Uninstalling fsspec-2026.2.0:
  Successfully uninstalled fsspec-2026.2.0
[0mCollecting requests==2.32.3
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting transformers==4.56.2
  Using cached transformers-4.56.2-py3-none-any.whl.metadata (40 kB)
Collecting tokenizers==0.22.2
  Using cached tokenizers-0.22

In [1]:
import os
import torch
import json
import re
import time
from collections import Counter

# 1. Ensure the agents directory exists
os.makedirs("agents", exist_ok=True)

# 2. WRITE/OVERWRITE the agent file with the Regex Fix
print("Updating agents/question_agent.py with robust JSON extraction...")
with open("agents/question_agent.py", "w") as f:
    f.write('''
import torch
import json
import re
import time
from collections import Counter
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

class QuestionAgent:
    def __init__(self, model_path):
        print(f"Initializing Mistral v0.3 from: {model_path}")
        
        # 4-bit Quantization to fit in GPU memory
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_quant_type="nf4"
        )

        self.tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
        self.model = AutoModelForCausalLM.from_pretrained(
            model_path,
            quantization_config=bnb_config,
            device_map="auto",
            local_files_only=True
        )
        self.tokenizer.pad_token = self.tokenizer.eos_token

    def _extract_json(self, text):
        """Robustly finds JSON using regex. Fixes the .index('{') error."""
        try:
            # Searches for everything between the first '{' and last '}'
            match = re.search(r'\\{.*\\}', text, re.DOTALL)
            if match:
                return json.loads(match.group(0))
            return None
        except Exception as e:
            print(f"  [Extraction Error]: {e}")
            return None

    def _get_vote(self, domain):
        """Standard Mistral-v0.3 Prompting Format"""
        prompt = f"<s>[INST] Create a difficult MCQ about {domain} in valid JSON. Keys: 'question', 'choices' (A,B,C,D), 'answer'. [/INST]"
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
        
        start = time.time()
        with torch.no_grad():
            outputs = self.model.generate(
                **inputs, 
                max_new_tokens=512, 
                temperature=0.8, 
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id
            )
        duration = time.time() - start
        
        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        clean_text = response.split("[/INST]")[-1].strip()
        
        return self._extract_json(clean_text), duration

    def generate_consensus(self, domain, num_votes=3):
        """Adaptive Voting logic to ensure high accuracy."""
        votes = []
        latencies = []
        for i in range(num_votes):
            print(f"  - Attempting generation {i+1}...")
            data, t = self._get_vote(domain)
            if data:
                votes.append(data)
                latencies.append(t)
        
        if not votes: return None

        # Voting Logic: Find the most common answer
        answers = [v.get('answer', 'A') for v in votes]
        winner = Counter(answers).most_common(1)[0][0]
        
        # Pick the question object that provided the winning answer
        final_data = next((v for v in votes if v.get('answer') == winner), votes[0])
        final_data['metadata'] = {
            "confidence": round(Counter(answers)[winner] / len(votes), 2),
            "avg_latency": round(sum(latencies) / len(latencies), 2)
        }
        return final_data
''')

# 3. INITIALIZE AND TEST
from agents.question_agent import QuestionAgent

MODEL_PATH = "hf_models/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/c170c708c41dac9275d15a8fff4eca08d52bab71"

try:
    print("\n--- Model Setup Starting ---")
    agent = QuestionAgent(MODEL_PATH)
    
    topic = "Quantum Computing"
    print(f"\nGenerating entry for: {topic}")
    result = agent.generate_consensus(topic, num_votes=3)
    
    if result:
        print("\n✅ SUCCESS: Question generated with high confidence.")
        print(json.dumps(result, indent=4))
        
        # Save to file
        with open("quiz_dataset.jsonl", "a") as f:
            f.write(json.dumps(result) + "\n")
    else:
        print("\n❌ Failed to generate a valid consensus question.")

except Exception as e:
    print(f"\n❌ Execution Error: {e}")

Updating agents/question_agent.py with robust JSON extraction...

--- Model Setup Starting ---
Initializing Mistral v0.3 from: hf_models/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/c170c708c41dac9275d15a8fff4eca08d52bab71

❌ Execution Error: Could not import module 'MistralForCausalLM'. Are this object's requirements defined correctly?


In [2]:
import os
import torch
import json
import re
import time
from collections import Counter

# 1. Overwrite with the 'AutoModel' bypass logic
with open("agents/question_agent.py", "w") as f:
    f.write('''
import torch
import json
import re
import time
from collections import Counter
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

class QuestionAgent:
    def __init__(self, model_path):
        print(f"--- Bypassing Registry: Loading Mistral via AutoModel ---")
        
        # This config is essential for the 4.56.2 + Mistral v0.3 combo
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_use_double_quant=True
        )

        self.tokenizer = AutoTokenizer.from_pretrained(
            model_path, 
            local_files_only=True,
            trust_remote_code=True
        )
        
        # Using trust_remote_code=True solves the 'Could not import MistralForCausalLM' error
        self.model = AutoModelForCausalLM.from_pretrained(
            model_path,
            quantization_config=bnb_config,
            device_map="auto",
            local_files_only=True,
            trust_remote_code=True
        )
        self.tokenizer.pad_token = self.tokenizer.eos_token

    def _extract_json(self, text):
        try:
            match = re.search(r'\\{.*\\}', text, re.DOTALL)
            return json.loads(match.group(0)) if match else None
        except:
            return None

    def _get_vote(self, domain):
        prompt = f"<s>[INST] Create a difficult MCQ about {domain} in valid JSON. Keys: 'question', 'choices' (A,B,C,D), 'answer'. [/INST]"
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
        
        with torch.no_grad():
            outputs = self.model.generate(
                **inputs, 
                max_new_tokens=512, 
                temperature=0.7, 
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id
            )
        
        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        clean_text = response.split("[/INST]")[-1].strip()
        return self._extract_json(clean_text)

    def generate_consensus(self, domain, num_votes=3):
        votes = []
        for i in range(num_votes):
            print(f"  - Vote {i+1}...")
            data = self._get_vote(domain)
            if data: votes.append(data)
        
        if not votes: return None

        answers = [v.get('answer', 'A') for v in votes]
        winner = Counter(answers).most_common(1)[0][0]
        final_data = next((v for v in votes if v.get('answer') == winner), votes[0])
        final_data['confidence'] = round(Counter(answers)[winner] / len(votes), 2)
        return final_data
''')

# 2. Re-import and Run
from agents.question_agent import QuestionAgent

MODEL_PATH = "hf_models/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/c170c708c41dac9275d15a8fff4eca08d52bab71"

try:
    agent = QuestionAgent(MODEL_PATH)
    result = agent.generate_consensus("Quantum Physics", num_votes=3)
    if result:
        print("\n✅ SUCCESS!")
        print(json.dumps(result, indent=4))
except Exception as e:
    print(f"\n❌ Still hitting an error: {e}")

Initializing Mistral v0.3 from: hf_models/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/c170c708c41dac9275d15a8fff4eca08d52bab71

❌ Still hitting an error: Could not find MistralForCausalLM neither in <module 'transformers.models.mistral' from '/usr/local/lib/python3.12/dist-packages/transformers/models/mistral/__init__.py'> nor in <module 'transformers' from '/usr/local/lib/python3.12/dist-packages/transformers/__init__.py'>!


In [3]:
import os
import torch
import json
import re
import time
from collections import Counter

# 1. Update the agent file with the Force-Load logic
print("Rewriting agent with Direct-Class mapping...")
with open("agents/question_agent.py", "w") as f:
    f.write('''
import torch
import json
import re
import time
from collections import Counter
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

class QuestionAgent:
    def __init__(self, model_path):
        print(f"--- Force Loading Mistral Architecture ---")
        
        # 4-bit config for Mistral-7B
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_quant_type="nf4"
        )

        self.tokenizer = AutoTokenizer.from_pretrained(
            model_path, 
            local_files_only=True
        )
        
        # We use AutoModelForCausalLM but force the loading via the snapshot path
        # trust_remote_code=True is essential here to allow the config.json 
        # to define the model architecture manually.
        self.model = AutoModelForCausalLM.from_pretrained(
            model_path,
            quantization_config=bnb_config,
            device_map="auto",
            local_files_only=True,
            trust_remote_code=True,
            revision="main"
        )
        self.tokenizer.pad_token = self.tokenizer.eos_token

    def _extract_json(self, text):
        try:
            match = re.search(r'\\{.*\\}', text, re.DOTALL)
            return json.loads(match.group(0)) if match else None
        except:
            return None

    def _get_vote(self, domain):
        prompt = f"<s>[INST] Create a difficult MCQ about {domain} in JSON format. Keys: 'question', 'choices', 'answer'. [/INST]"
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
        
        with torch.no_grad():
            outputs = self.model.generate(
                **inputs, 
                max_new_tokens=512, 
                temperature=0.7, 
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id
            )
        
        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        clean_text = response.split("[/INST]")[-1].strip()
        return self._extract_json(clean_text)

    def generate_consensus(self, domain, num_votes=3):
        votes = []
        for i in range(num_votes):
            print(f"  - Attempt {i+1}...")
            data = self._get_vote(domain)
            if data: votes.append(data)
        
        if not votes: return None

        answers = [v.get('answer', 'A') for v in votes]
        winner = Counter(answers).most_common(1)[0][0]
        final_data = next((v for v in votes if v.get('answer') == winner), votes[0])
        return final_data
''')

# 2. Re-import and Execute
from agents.question_agent import QuestionAgent

MODEL_PATH = "hf_models/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/c170c708c41dac9275d15a8fff4eca08d52bab71"

try:
    # Explicitly clear CUDA cache before loading to prevent VRAM fragmentation
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        
    agent = QuestionAgent(MODEL_PATH)
    print("\n✅ Initialization successful. Testing generation...")
    
    result = agent.generate_consensus("Cybersecurity", num_votes=2)
    if result:
        print("\n--- Final Question ---")
        print(json.dumps(result, indent=4))
except Exception as e:
    print(f"\n❌ Final Registry Error Check: {e}")
    print("\nTip: If this still fails, your snapshot folder might be missing the 'configuration_mistral.py' or 'modeling_mistral.py' files required for remote code execution.")

Rewriting agent with Direct-Class mapping...
Initializing Mistral v0.3 from: hf_models/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/c170c708c41dac9275d15a8fff4eca08d52bab71

❌ Final Registry Error Check: Could not find MistralForCausalLM neither in <module 'transformers.models.mistral' from '/usr/local/lib/python3.12/dist-packages/transformers/models/mistral/__init__.py'> nor in <module 'transformers' from '/usr/local/lib/python3.12/dist-packages/transformers/__init__.py'>!

Tip: If this still fails, your snapshot folder might be missing the 'configuration_mistral.py' or 'modeling_mistral.py' files required for remote code execution.
