# TEST ACCURACY OF OUR FINE-TUNED MODEL

In [1]:
%%capture

!pip install unsloth # install unsloth
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git # Also get the latest version Unsloth!

In [2]:
%%capture

!pip install evaluate

In [22]:
from unsloth import FastLanguageModel
from unsloth import is_bfloat16_supported
import torch
import evaluate  # Updated from `load_metric`
import pandas as pd
from tqdm import tqdm  # For progress tracking
from datasets import load_dataset, DatasetDict
from transformers import AutoTokenizer

In [4]:
model_name = "aaditya-vaid/Llama-3.2-3B-Fine-Tuned-IMDB10K"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name,
    max_seq_length=512,  
    dtype=torch.float16,  
    attn_implementation="flash_attention_2"
)
model.eval()
FastLanguageModel.for_inference(model)  # Enable faster inference

==((====))==  Unsloth 2025.3.19: Fast Llama patching. Transformers: 4.50.3.
   \\   /|    Tesla P100-PCIE-16GB. Num GPUs = 1. Max memory: 15.888 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 6.0. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.35G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/97.3M [00:00<?, ?B/s]

Unsloth 2025.3.19 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 3072, padding_idx=128004)
        (layers): ModuleList(
          (0): LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=3072, out_features=3072, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3072, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=3072, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora.Linear

In [5]:
prompt_temp = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.

### Instruction:
Classify the following product review as positive or negative.

### Question:
{}

### Response:
{}"""

In [6]:
def truncate_text(text, max_length=512, buffer_tokens=50):
    """Truncates the input text to fit within the model's max sequence length."""
    tokenized_text = tokenizer.encode(text, truncation=False)  # Get token IDs
    max_allowed_length = max_length - buffer_tokens  # Reserve space for response

    if len(tokenized_text) > max_allowed_length:
        truncated_text = tokenizer.decode(tokenized_text[:max_allowed_length], skip_special_tokens=True)
        return truncated_text
    return text

In [26]:
def classify_review(example):
    question = truncate_text(example["eng_reviews"])  # Ensure input is within limit
    input_text = prompt_temp.format(question, "")

    inputs = tokenizer(input_text, return_tensors="pt").to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs.input_ids,
            attention_mask=inputs.attention_mask,
            max_new_tokens=50,
            use_cache=True,
        )

    prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
    predicted_label = prediction.split("### Response:")[-1].strip().upper()  # Extract response

    return {"predicted_sentiment": predicted_label}

In [8]:
# Load dataset (assuming it's a CSV)
dataset = load_dataset("csv", data_files="/kaggle/input/amazon-reviews-for-sentiment-analysis/amazon_review_dataset.csv")['train']

dataset

Generating train split: 0 examples [00:00, ? examples/s]

Dataset({
    features: ['eng_reviews', 'sentiments'],
    num_rows: 7763
})

In [24]:
dataset[0]

{'eng_reviews': "OnePlus has outdone itself this time, delivering a smartphone that truly lives up to its legacy as a flagship killer. After using it for a week, here are my ratings:  1. Design: 10/10 The slim, boxy design with a matte black finish feels premium and elegant. The in-hand feel is exceptional.  2. AI: 10/10 Packed with all the latest AI features, it’s both innovative and intuitive.  3. OS: 9/10 The return of OxygenOS is a brilliant move! While there are a few bloatware apps, OxygenOS 15 is lightning-fast and smooth.  4. Camera: 9/10 While not flagship-level, the camera performs impressively, capturing excellent detail in most scenarios.  5. Battery: 10/10 The silicon battery is a game-changer, offering 10-11 hours of screen-on time with moderate use—simply outstanding.  6. Processor: 10/10 The Snapdragon 8 Gen 3 delivers optimized, power-efficient performance that feels seamless.  7. Display: 10/10 The display is breathtaking—vivid, sharp, and a delight to use.  Overall, 

In [27]:
dataset = dataset.map(classify_review)

Map:   0%|          | 0/7763 [00:00<?, ? examples/s]

In [30]:
dataset[0]

{'eng_reviews': "OnePlus has outdone itself this time, delivering a smartphone that truly lives up to its legacy as a flagship killer. After using it for a week, here are my ratings:  1. Design: 10/10 The slim, boxy design with a matte black finish feels premium and elegant. The in-hand feel is exceptional.  2. AI: 10/10 Packed with all the latest AI features, it’s both innovative and intuitive.  3. OS: 9/10 The return of OxygenOS is a brilliant move! While there are a few bloatware apps, OxygenOS 15 is lightning-fast and smooth.  4. Camera: 9/10 While not flagship-level, the camera performs impressively, capturing excellent detail in most scenarios.  5. Battery: 10/10 The silicon battery is a game-changer, offering 10-11 hours of screen-on time with moderate use—simply outstanding.  6. Processor: 10/10 The Snapdragon 8 Gen 3 delivers optimized, power-efficient performance that feels seamless.  7. Display: 10/10 The display is breathtaking—vivid, sharp, and a delight to use.  Overall, 

In [31]:
label_map = {"POSITIVE": 1, "NEGATIVE": 0}

In [32]:
dataset = dataset.filter(lambda example: example["predicted_sentiment"] in label_map)

Filter:   0%|          | 0/7763 [00:00<?, ? examples/s]

In [33]:
true_labels = [label_map[label] for label in dataset["sentiments"]]
predicted_labels = [label_map[label] for label in dataset["predicted_sentiment"]]

# Compute accuracy
accuracy_metric = evaluate.load("accuracy")
accuracy = accuracy_metric.compute(predictions=predicted_labels, references=true_labels)
print("Accuracy:", accuracy["accuracy"])

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

Accuracy: 0.8607497101635966
