# Professor Synapse Training Pipeline
This notebook implements fine-tuning using Unsloth and KTO training methodology.

In [None]:
!pip install --upgrade pip
!pip install "unsloth[colab]@git+https://github.com/unslothai/unsloth.git" --quiet
!pip install datasets trl transformers --quiet

In [None]:
import torch
from unsloth import FastLanguageModel, is_bfloat16_supported, add_new_tokens
from datasets import load_dataset
from trl import KTOTrainer, KTOConfig
from transformers import DataCollatorForLanguageModeling

max_seq_length = 4096
dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
load_in_4bit = True

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Pixtral-12B-Base-2409",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
    attn_implementation="flash_attention_2"
)

In [None]:
system_prompt = r"""# MISSION
Act as **Professor Synapse üßôüèø‚Äç‚ôÇÔ∏è**, a wise and knowledgeable companion to me. Let's collaborate to achieve our goals! You always use <reasoning> prior to output.

# <REASONING>
1. **Construct Working Memory**: Synthesize relevant information from the conversation, including goals, progress, user preferences, and contextual factors.
2. **Develop a Knowledge Graph**: Identify key entities and concepts. Represent them as semantic triplets with subject, predicate, and object.
3. **Perform Logical Reasoning**: Utilize your **Working Memory** and **Knowledge Graph** to construct a coherent **Chain of Reasoning** that reflects your cognitive process.

## REASONING SCHEMA
Adhere strictly to the following format:
``````"""

tokenizer = FastLanguageModel.get_chat_template(
    tokenizer,
    chat_template="zephyr",
    map_eos_token=True,
    system_message=system_prompt
)

In [None]:
tags = [
    "reasoning", "mem", "goal", "subgoal", "prog", "done",
    "now", "ctx", "kg", "tri", "subj", "pred", "obj",
    "logic", "prop", "sym", "nat", "crit", "doubt", "proof",
    "step", "chain", "reflect", "err", "note", "warn", "verb", "cont"
]
additional_tokens = [f"<{tag}>" for tag in tags] + [f"</{tag}>" for tag in tags]
tokenizer.add_special_tokens({"additional_special_tokens": additional_tokens})
model.resize_token_embeddings(len(tokenizer))

In [None]:
def detect_format(sample):
    if "conversations" in sample and "quality_metrics" in sample:
        return "professor_synapse"
    return "custom"

def parse_professor_synapse(sample):
    metrics = sample["quality_metrics"]
    score = (1.0 * metrics.get("conversation_quality", 0)
            - 0.5 * (1 if metrics.get("bias_detection", False) else 0)
            + 0.75 * metrics.get("reasoning_quality", 0))
    return {
        "prompt": sample["conversations"][0]["value"],
        "completion": sample["conversations"][1]["value"],
        "quality_score": score,
        "label": 1 if score >= 1.25 else -1
    }

FORMAT_PARSERS = {"professor_synapse": parse_professor_synapse}

def universal_converter(dataset, format="auto"):
    first_sample = dataset[0]
    detected_format = detect_format(first_sample) if format == "auto" else format
    parser = FORMAT_PARSERS.get(detected_format, lambda x: x)
    converted = dataset.map(parser)
    if detected_format == "professor_synapse":
        converted = converted.remove_columns(["conversations", "quality_metrics"])
    return converted

In [None]:
dataset = load_dataset("SynapticLabs/professor-synapse", split="train")
converted_dataset = universal_converter(dataset)
print("Dataset loaded and converted. Sample:", converted_dataset[0])

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    moe_lora_r=16,
    moe_lora_alpha=32,
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
        "block_sparse_moe.gate",
        "block_sparse_moe.experts.w1",
        "block_sparse_moe.experts.w2",
        "block_sparse_moe.experts.w3"
    ],
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    lora_dropout=0.05,
    bias="none"
)

trainer = KTOTrainer(
    model=model,
    args=KTOConfig(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=8,
        learning_rate=5e-7,
        num_train_epochs=3,
        beta=0.1,
        max_length=max_seq_length,
        optim="paged_adamw_8bit",
        warmup_ratio=0.1,
        max_grad_norm=0.3,
        lr_scheduler_type="cosine",
        neftune_noise_alpha=5,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported()
    ),
    tokenizer=tokenizer,
    train_dataset=converted_dataset,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

model.config.use_cache = True
trainer.model.config.banned_words = ["delve", "tapestry", "embark", "revolutionize", "Ah"]

In [None]:
trainer.train()

In [None]:
model = model.merge_and_unload()
model.save_pretrained_gguf(
    "prof-synapse",
    tokenizer,
    quantization_method="q4_k_m",
    expert_quant="q6_k"
)
print("Model saved successfully in GGUF format.")