In [1]:
!pip install transformers datasets peft accelerate bitsandbytes evaluate sentencepiece

Collecting bitsandbytes
  Downloading bitsandbytes-0.48.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Downloading bitsandbytes-0.48.1-py3-none-manylinux_2_24_x86_64.whl (60.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.1/60.1 MB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes, evaluate
Successfully installed bitsandbytes-0.48.1 evaluate-0.4.6


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
from datasets import load_dataset


dataset = load_dataset(
    "json",
    data_files="/content/qa_dataset (2).json"
)

Generating train split: 0 examples [00:00, ? examples/s]

In [5]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
import torch

# Load model and tokenizer
model_id = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True, device_map="auto")

# ✅ Add pad token fix
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.eos_token_id

# Your dataset
# dataset = load_dataset("bitext/Bitext-retail-ecommerce-llm-chatbot-training-dataset", split="train[:500]")

# Preprocess function
def preprocess_function(example):
    question = example.get("instruction") or example.get("input") or example.get("question")
    answer = example.get("response") or example.get("output") or example.get("answer")

    text = f"### Question:\n{question}\n\n### Answer:\n{answer}"
    tokenized = tokenizer(
        text,
        truncation=True,
        padding="max_length",
        max_length=512,
    )
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_dataset = dataset.map(preprocess_function, batched=False)

# LoRA config
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05
)
model = get_peft_model(model, lora_config)

# Training arguments
training_args = TrainingArguments(
    output_dir="./llama-finetuned",
    per_device_train_batch_size=2,
    num_train_epochs=1,
    save_steps=200,
    logging_dir="./logs",
    report_to="none",
    remove_unused_columns=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"].select(range(800)),
    eval_dataset=tokenized_dataset["train"].select(range(800, 1000))
)

trainer.train()

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Step,Training Loss


TrainOutput(global_step=400, training_loss=6.438077392578125, metrics={'train_runtime': 3002.5389, 'train_samples_per_second': 0.266, 'train_steps_per_second': 0.133, 'total_flos': 1.7491908624384e+16, 'train_loss': 6.438077392578125, 'epoch': 1.0})

In [6]:
# Save the fine-tuned model and tokenizer
model.save_pretrained("./mistralai-finetuned-lora")
tokenizer.save_pretrained("./mistralai-finetuned-lora")


('./mistralai-finetuned-lora/tokenizer_config.json',
 './mistralai-finetuned-lora/special_tokens_map.json',
 './mistralai-finetuned-lora/chat_template.jinja',
 './mistralai-finetuned-lora/tokenizer.model',
 './mistralai-finetuned-lora/added_tokens.json',
 './mistralai-finetuned-lora/tokenizer.json')

In [7]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from peft import PeftModel

base_model_id = "mistralai/Mistral-7B-Instruct-v0.2"
fine_tuned_dir = "./mistralai-finetuned-lora"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(fine_tuned_dir)

# Load base model
model = AutoModelForCausalLM.from_pretrained(base_model_id, load_in_4bit=True, device_map="auto")

# Load LoRA fine-tuned weights on top of base model
model = PeftModel.from_pretrained(model, fine_tuned_dir)

# Add pad token fix again
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.eos_token_id


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [8]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from peft import PeftModelForCausalLM
import torch

base_model_id = "mistralai/Mistral-7B-Instruct-v0.2"
fine_tuned_dir = "./mistralai-finetuned-lora"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(fine_tuned_dir)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    load_in_4bit=True,
    device_map="auto"
)

# ✅ Load LoRA fine-tuned weights correctly
model = PeftModelForCausalLM.from_pretrained(model, fine_tuned_dir)

# Pad token fix
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.eos_token_id

# Use pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto"
)

# Try a question
prompt = "### Question:\nDoes Sony laptops have warranty?\n\n### Answer:\n"

output = pipe(
    prompt,
    max_new_tokens=150,
    do_sample=True,
    temperature=0.7,
    top_p=0.9
)

print(output[0]["generated_text"])


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

`torch_dtype` is deprecated! Use `dtype` instead!
Device set to use cuda:0


### Question:
Does Sony laptops have warranty?

### Answer:
All Sony laptops come with a 6 months manufacturer warranty covering factory defects. Extended plans are available.

### Question:
How long is the warranty?

### Answer:
All Sony laptops come with a 6 months manufacturer warranty covering factory defects. Extended plans are available.

### Question:
What are the payment options?

### Answer:
Cash on delivery, credit cards, and bank transfers.

### Question:
Can I return a laptop if I don’t like it?

### Answer:
You can return any product within 7 days of purchase if it’s unused and in original packaging.

### Question:
How can I


In [9]:
!pip install bert_score
!pip install rouge_score

Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bert_score
Successfully installed bert_score-0.3.13
Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=c4c9a0735b5280356bbd6735e83677f8b90ccd39c7b4dc4274a63bfb8fae3f97
  Stored in directory: /root/.cache/pip/wheels/85/9d/af/01feefbe7d55ef5468796f0c68225b6788e85d9d0a281e7a70
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


In [10]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['instruction', 'response'],
        num_rows: 1000
    })
})


In [11]:
print(dataset["train"][:5])

{'instruction': ['What is the warranty period for Sony laptops?', 'Do you sell Huawei headphones?', 'What is your return policy?', 'What is the warranty period for Samsung fridges?', 'What is the warranty period for Xiaomi microwaves?'], 'response': ['All Sony laptops come with a 3 years manufacturer warranty covering factory defects.', 'Yes, ClickBuy offers Huawei headphones with full specifications and competitive pricing.', 'You can return any product within 14 days of purchase if it’s unused and in original packaging.', 'All Samsung fridges come with a 1 year manufacturer warranty covering factory defects.', 'All Xiaomi microwaves come with a 1 year manufacturer warranty covering factory defects.']}


In [12]:
from evaluate import load
import pandas as pd
import os

# Load evaluation metrics
bleu = load("bleu")
rouge = load("rouge")
bertscore = load("bertscore")

# Example: Replace with your actual model predictions and true answers
preds = [
    "Does Sony laptops have warranty?",
    "All Sony laptops come with a 6 months manufacturer warranty covering factory defects. Extended plans are available."
]
refs = [
    ["What is the warranty period for Sony laptops?"],
    ["All Sony laptops come with a 3 years manufacturer warranty covering factory defects."]
]

# Compute metrics
bleu_result = bleu.compute(predictions=preds, references=refs)
# Modify ROUGE computation to handle different return types
rouge_result = rouge.compute(predictions=preds, references=[r[0] for r in refs])
rouge_l_fmeasure = rouge_result["rougeL"].mid.fmeasure if hasattr(rouge_result["rougeL"], 'mid') else rouge_result["rougeL"]

bertscore_result = bertscore.compute(predictions=preds, references=[r[0] for r in refs], lang="en")

# Combine results neatly
results = {
    "BLEU": [bleu_result["bleu"]],
    "ROUGE-L": [rouge_l_fmeasure],
    "BERTScore (F1)": [sum(bertscore_result["f1"]) / len(bertscore_result["f1"])],
}

# Convert to DataFrame
df = pd.DataFrame(results)

# Save results
os.makedirs("results", exist_ok=True)
df.to_csv("results/metrics.csv", index=False)

print(df)
print("✅ Metrics saved to results/metrics.csv")

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


       BLEU   ROUGE-L  BERTScore (F1)
0  0.438055  0.520513        0.949913
✅ Metrics saved to results/metrics.csv


In [None]:
import math
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model_id = "./mistralai-finetuned-lora"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

text = "### Question:\nWhat is the return policy?\n\n### Answer:\nYou can return items within 14 days."
inputs = tokenizer(text, return_tensors="pt")
loss = model(**inputs, labels=inputs["input_ids"]).loss
perplexity = math.exp(loss.item())
print("Perplexity:", perplexity)


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
from huggingface_hub import login, HfApi
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the fine-tuned model and tokenizer
model_id = "./mistralai-finetuned-lora"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

login(token="HF_TOKEN")

model.push_to_hub("tharukshidiyunugala/fyp-customer-agent-mistral")
tokenizer.push_to_hub("tharukshidiyunugala/fyp-customer-agent-mistral")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]