# 🤖 Fine-Tune Mistral-7B for Empathetic HR Bot using QLoRA
This notebook demonstrates how to fine-tune [Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on a small HR-focused dialogue dataset using QLoRA with Hugging Face `peft` and `transformers`.

In [None]:
# ✅ Install required libraries
!pip install -q accelerate bitsandbytes peft transformers datasets trl

In [None]:
# ✅ Sample training dataset (you can later upload your own below)
from datasets import Dataset

data = {
    "prompt": [
        "Employee Problem: I've been feeling exhausted at work.\nAssistant:",
        "Employee Problem: I can't focus on my tasks lately.\nAssistant:",
    ],
    "completion": [
        "I'm really sorry to hear that. Can you share what might be causing this exhaustion? Have things changed recently?",
        "That must be tough. Is there anything that's been distracting or worrying you outside or inside of work?",
    ]
}

dataset = Dataset.from_dict(data)
dataset = dataset.train_test_split(test_size=0.2)

dataset["train"].to_json("train_data.json", lines=True)
dataset["test"].to_json("eval_data.json", lines=True)

In [None]:
# ⬆️ Optionally upload your own dataset (JSON format)
from google.colab import files
uploaded = files.upload()

In [None]:
# 🧠 Define and prepare model (Mistral-7B-Instruct)
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import torch

model_name = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_8bit=True,
    device_map="auto",
    trust_remote_code=True
)
model = prepare_model_for_kbit_training(model)

peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, peft_config)

In [None]:
# 🧾 Format dataset with system prompt
SYSTEM_PROMPT = '''<|system|>
You are a supportive HR assistant. When an employee shares a problem, your goal is to gently ask about potential causes with empathy. Once you gather enough info, suggest a helpful solution if appropriate.
</s>'''

def format_prompt(example):
    return {
        "input_ids": tokenizer(SYSTEM_PROMPT + example["prompt"], truncation=True, padding="max_length", max_length=512, return_tensors="pt").input_ids[0],
        "labels": tokenizer(example["completion"], truncation=True, padding="max_length", max_length=128, return_tensors="pt").input_ids[0],
    }

tokenized_dataset = dataset.map(format_prompt)

In [None]:
# 🔧 Training configuration
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-4,
    bf16=False,
    fp16=True,
    logging_steps=10,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
)

In [None]:
# 🚀 Start training
trainer.train()

In [None]:
# 💬 Test the fine-tuned model
def chat_with_bot(employee_problem):
    prompt = SYSTEM_PROMPT + f"Employee Problem: {employee_problem}\nAssistant:"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    output = model.generate(**inputs, max_new_tokens=150)
    print(tokenizer.decode(output[0], skip_special_tokens=True))

# Example usage:
chat_with_bot("I feel disconnected from my team lately.")