# Mistral 7B Fine-Tuning with Professor Synapse Dataset

This notebook uses Unsloth and the Professor Synapse dataset to fine‑tune the Mistral 7B v03 model with custom modifications:

- Special XML tokens for structured reasoning
- Logit bias for banned tokens
- Integrated system prompt for conversation framing
- Maximum sequence length set to 4096 tokens

Replace placeholders such as your Hugging Face token as needed.

In [None]:
# Install dependencies
!pip install "unsloth[colab] @ git+https://github.com/unslothai/unsloth.git"
!pip install datasets==2.18.0 trl==0.8.0 peft==0.10.0

In [None]:
import torch
from unsloth import FastMistralModel

# Load the Mistral 7B v03 model and tokenizer with 4-bit quantization
model, tokenizer = FastMistralModel.from_pretrained(
    model_name="unsloth/mistral-7b-v03-bnb-4bit",
    max_seq_length=4096,
    load_in_4bit=True,
    token="hf_..."  # Replace with your Hugging Face token
)

In [None]:
def setup_special_tokens(tokenizer, model):
    """
    Adds special XML tokens for structured reasoning and creates a logit bias for banned words.
    """
    tags = [
        "reasoning", "mem", "goal", "subgoal", "prog", "done",
        "now", "ctx", "kg", "tri", "subj", "pred", "obj",
        "logic", "prop", "sym", "nat", "crit", "doubt",
        "proof", "step", "chain", "reflect", "err", "note", "warn", "verb", "cont"
    ]

    # Create opening and closing tokens
    special_tokens = []
    for tag in tags:
        special_tokens.extend([f"<{tag}>", f"</{tag}>"])

    # Add tokens to the tokenizer and resize model embeddings
    added = tokenizer.add_tokens(special_tokens, special_tokens=True)
    print(f"Added {added[1]} special tokens.")
    model.resize_token_embeddings(len(tokenizer))

    # Define banned words and obtain their token IDs
    banned_words = ["delve", "tapestry", "embark", "revolutionize", "Ah"]
    banned_tokens = []
    for word in banned_words:
        tokens = tokenizer.encode(word, add_special_tokens=False)
        banned_tokens.extend(tokens)

    # Create a logit bias dictionary: assign a strong negative bias (-100) to banned tokens
    logit_bias = {token_id: -100 for token_id in banned_tokens}
    return logit_bias

# Setup special tokens and retrieve logit bias
logit_bias = setup_special_tokens(tokenizer, model)

In [None]:
# Define the full system prompt
system_prompt = """# MISSION
Act as **Professor Synapse 🧙🏿‍♂️**, a wise and knowledgeable companion. Let's collaborate to achieve our goals! You always use <reasoning> prior to output.

# <REASONING>
1. **Construct Working Memory**: Synthesize relevant conversation details.
2. **Develop a Knowledge Graph**: Identify key entities as semantic triplets.
3. **Perform Logical Reasoning**: Build a coherent chain of reasoning.

## REASONING SCHEMA
<reasoning>
  <mem>
    <goal>Primary Objective</goal>
    <subgoal>Current Subgoal</subgoal>
    <prog>
      <done>Completed steps</done>
      <now>Current steps</now>
    </prog>
    <ctx>Relevant Context</ctx>
  </mem>
  <kg>
    <tri>
      <subj>Subject</subj>
      <pred>predicate</pred>
      <obj>Object</obj>
    </tri>
  </kg>
  <logic>
    <prop>
      <sym>□(P → Q)</sym>
      <nat>Explanation of the assertion</nat>
    </prop>
    <proof>
      <sym>∴ ∀x(P(x) → Q(x))</sym>
      <nat>Supporting evidence</nat>
    </proof>
    <crit>
      <sym>◇¬(P → Q)</sym>
      <nat>Counter perspective</nat>
    </crit>
    <doubt>
      <sym>?(P ∨ ¬P)</sym>
      <nat>Explanation of uncertainty</nat>
    </doubt>
  </logic>
  <chain>
    <step index="1">Initial analysis</step>
    <step index="2" depends_on="1">Further details</step>
    <reflect>Self-reflection on reasoning</reflect>
    <cont>Continuation from previous steps</cont>
    <err>Error handling if any</err>
    <note>Additional notes</note>
    <warn>Warnings if any</warn>
    <verb>Verbosity instructions</verb>
  </chain>
</reasoning>

# CONTEXT
You are in a problem-solving conversation. Provide actionable and direct responses.

# TRAITS
Wise, Curious, Strategic, Patient, Light‑hearted, Cooperative.

# INSTRUCTIONS
1. Gather context and goals.
2. Collaborate to solve problems.
3. Ask for feedback and iterate.

You are now transmogrified into Professor Synapse 🧙🏾‍♂️✨. Begin with <reasoning>.
"""

In [None]:
from datasets import load_dataset

# Load the Professor Synapse dataset
dataset = load_dataset("SynapticLabs/professor-synapse", split="train")

# Function to format conversation examples by incorporating the system prompt
def format_conversation(example, system_prompt):
    """Formats a conversation by prepending the system prompt and wrapping messages in the required tags."""
    messages = example["messages"]
    formatted = "<s>[INST] " + system_prompt + "\n\n"
    for msg in messages:
        if msg["role"] == "user":
            formatted += msg["content"] + " [/INST]"
        elif msg["role"] == "assistant":
            reasoning_block = msg.get("reasoning", "")
            if reasoning_block:
                formatted += "<reasoning>" + reasoning_block + "</reasoning>"
            formatted += msg["content"] + " </s>[INST] "
    return {"text": formatted}

# Format the dataset by applying the system prompt to each conversation
dataset = dataset.map(lambda x: format_conversation(x, system_prompt), remove_columns=["messages"])

In [None]:
from trl import SFTTrainer

# Configure the SFTTrainer with custom training parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=4096,
    packing=True,
    args={
        "num_train_epochs": 3,
        "per_device_train_batch_size": 2,
        "gradient_accumulation_steps": 4,
        "warmup_ratio": 0.1,
        "learning_rate": 2e-5,
        "fp16": not torch.cuda.is_bf16_supported(),
        "bf16": torch.cuda.is_bf16_supported(),
        "logging_steps": 50,
        "optim": "adamw_8bit",
        "weight_decay": 0.01,
        "lr_scheduler_type": "cosine",
        "seed": 42,
        "output_dir": "outputs",
        "logit_bias": logit_bias,
        "chat_template": "[INST] {system}\n\n{prompt} [/INST]",
        "system_prompt": system_prompt
    }
)

In [None]:
# Train the model and save the fine-tuned checkpoint
trainer.train()
model.save_pretrained("professor_synapse_finetuned")
print("Training complete and model saved.")