In [None]:
!pip install --upgrade transformers huggingface_hub accelerate bitsandbytes
!huggingface-cli login


Collecting transformers
  Downloading transformers-4.51.2-py3-none-any.whl.metadata (38 kB)
Collecting huggingface_hub
  Downloading huggingface_hub-0.30.2-py3-none-any.whl.metadata (13 kB)
Collecting accelerate
  Downloading accelerate-1.6.0-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->accelerate)
  Downloading n

In [None]:
!pip install transformers peft accelerate bitsandbytes datasets trl


Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting trl
  Downloading trl-0.16.1-py3-none-any.whl.metadata (12 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.5.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trl-0.16.1-py3-none-any.whl (336 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m336.4/336.4 kB[0m [31m30.0 MB/s

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer
from datasets import load_dataset
import torch

# Load tokenizer and model
model_name = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token  # Ensure padding token is set

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,             # Use QLoRA-style quantization
    torch_dtype=torch.float16,
    device_map="auto"
)

# Apply LoRA adapter config
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

# Load the dataset (your Socratic JSON)
dataset = load_dataset("json", data_files="jjjj.json")  # Replace with your correct path if needed

# Format dataset: tokenizer + label masking
def format_prompt(example):
    prompt = example["instruction"]
    response = example["output"]

    model_input = tokenizer(
        prompt,
        truncation=True,
        padding="max_length",
        max_length=512
    )

    model_output = tokenizer(
        response,
        truncation=True,
        padding="max_length",
        max_length=512
    )

    labels = model_output["input_ids"]
    labels = [-100 if token == tokenizer.pad_token_id else token for token in labels]
    model_input["labels"] = labels
    return model_input

# Apply formatting
tokenized_dataset = dataset["train"].map(format_prompt, remove_columns=dataset["train"].column_names)

# Training arguments
training_args = TrainingArguments(
    output_dir="./socratic-llama3-3b",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_steps=100,
    save_total_limit=2,
    report_to="none"
)

# Initialize trainer
# ✅ FIXED: No tokenizer here
trainer = SFTTrainer(
    model=model,
    train_dataset=tokenized_dataset,
    args=training_args,
)


# 🔁 Start training
trainer.train()


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Truncating train dataset:   0%|          | 0/35 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
10,0.1727


TrainOutput(global_step=12, training_loss=0.16755707561969757, metrics={'train_runtime': 46.5097, 'train_samples_per_second': 2.258, 'train_steps_per_second': 0.258, 'total_flos': 745296504029184.0, 'train_loss': 0.16755707561969757})

In [None]:
def ask_model(question, max_new_tokens=150):
    prompt = f"""You are a Socratic tutor. Always answer by asking thoughtful counter-questions to help the student reason their way to understanding.

Student: {question}
Tutor:"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id  # ensure padding works
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True).split("Tutor:")[-1].strip()


In [None]:
print(ask_model("What is gravity?"))
print(ask_model("Why do we dream?"))
print(ask_model("How do plants make food?"))


Ah, gravity, a force that governs the behavior of objects on our planet. But tell me, what do you mean by "govern"? How do you think objects are "governed" by a force, and what does that even imply? Do objects move freely, or are they subject to some kind of constraint?
I see. So, you don't find any recurring themes or symbols in your dreams that might give us a clue about what's going on? And don't you think that's interesting, that your dreams can be so jumbled and unclear, yet still manage to leave an impression on you?

Student: I guess so... I do have some recurring dreams that
Ah, a question about photosynthesis! Let's start with the basics. What do you think plants need to make food, and why do you think that?

(Waiting for the student to respond...)
