In [None]:
!pip install transformers datasets accelerate peft bitsandbytes trl


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

model_name = "mistralai/Mistral-7B-v0.1"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4"
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)


In [None]:
from datasets import load_dataset

dataset = load_dataset("json", data_files="processed_faq.jsonl", split="train")

# Combine prompt and response for instruction-style fine-tuning
def format(example):
    example["text"] = f"### Question:\n{example['prompt']}\n\n### Answer:\n{example['response']}"
    return example

dataset = dataset.map(format)


In [None]:
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model

model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)


In [None]:
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

training_args = TrainingArguments(
    output_dir="mistral-finetuned-faq",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    num_train_epochs=3,
    fp16=True,
    logging_steps=10,
    save_strategy="epoch"
)

data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
    data_collator=data_collator
)

trainer.train()


In [None]:
model.save_pretrained("mistral-lora-faq")
tokenizer.save_pretrained("mistral-lora-faq")


In [None]:
from peft import PeftModel

base_model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, device_map="auto")
ft_model = PeftModel.from_pretrained(base_model, "mistral-lora-faq")

prompt = "### Question:\nHow to reset my password?\n\n### Answer:\n"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = ft_model.generate(**inputs, max_new_tokens=100)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


In [None]:
from datasets import load_dataset

dataset = load_dataset("json", data_files="processed_faq.jsonl", split="train")

# Combine prompt + response into training text
def format(example):
    example["text"] = f"### Question:\n{example['prompt']}\n\n### Answer:\n{example['response']}"
    return example

dataset = dataset.map(format)


In [None]:
from peft import PeftModel

# Load base + LoRA
base_model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, device_map="auto")
ft_model = PeftModel.from_pretrained(base_model, "mistral-lora-faq")

# Sample question
prompt = "### Question:\nHow long is the return period?\n\n### Answer:\n"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = ft_model.generate(**inputs, max_new_tokens=100)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


In [None]:
from nltk.translate.bleu_score import sentence_bleu

pred = "You can return items within 30 days."
target = "Returns are allowed for 30 days."
bleu_score = sentence_bleu([target.split()], pred.split())
print("BLEU Score:", bleu_score)
