In [1]:
from transformers import AutoTokenizer
from peft import PeftModel
from transformers import AutoModelForSequenceClassification
import torch

In [2]:
base_model_name = "xlm-roberta-base"
lora_model_path = "./English_DistilBERT_Model"

In [3]:
tokenizer = AutoTokenizer.from_pretrained(lora_model_path)

In [4]:
base_model = AutoModelForSequenceClassification.from_pretrained(base_model_name, num_labels=2)

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
model = PeftModel.from_pretrained(base_model, lora_model_path)
model.eval()
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(F"Using: {device}")
model.to(device)

  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'
Using: mps


PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): XLMRobertaForSequenceClassification(
      (roberta): XLMRobertaModel(
        (embeddings): XLMRobertaEmbeddings(
          (word_embeddings): Embedding(250002, 768, padding_idx=1)
          (position_embeddings): Embedding(514, 768, padding_idx=1)
          (token_type_embeddings): Embedding(1, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (encoder): XLMRobertaEncoder(
          (layer): ModuleList(
            (0-11): 12 x XLMRobertaLayer(
              (attention): XLMRobertaAttention(
                (self): XLMRobertaSdpaSelfAttention(
                  (query): lora.Linear(
                    (base_layer): Linear(in_features=768, out_features=768, bias=True)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.05, inplace=False)
                    )
          

In [6]:
label_map = {
    0: "Negative",
    1: "Positive",
    2: "Neutral"
}

In [7]:
def predict_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predicted_class_id = logits.argmax().item()
        return label_map[predicted_class_id]

In [8]:
sample_texts = [
    "I absolutely loved this product!",
    "I'm not happy with the service.",
    "It's okay, nothing special.",
    "Je suis très déçu du produit.",
    "बहुत ही खराब अनुभव था।",
    ":(",
    ":)",
    "Este produto é incrível!",
    "This product is amazing!",
    "Este produto é muito bom!",
    "Este produto é muito ruim!",
]

for text in sample_texts:
    prediction = predict_sentiment(text)
    print(f"> \"{text}\" → {prediction}")


> "I absolutely loved this product!" → Positive
> "I'm not happy with the service." → Negative
> "It's okay, nothing special." → Positive
> "Je suis très déçu du produit." → Negative
> "बहुत ही खराब अनुभव था।" → Negative
> ":(" → Negative
> ":)" → Positive
> "Este produto é incrível!" → Positive
> "This product is amazing!" → Positive
> "Este produto é muito bom!" → Positive
> "Este produto é muito ruim!" → Positive


In [9]:
diagnostic_texts = {
    # Category 1: Basic Polarity (Sanity Checks)
    "This is a fantastic development!": "Positive",
    "I am incredibly happy and satisfied.": "Positive",
    "This is a total disaster, a complete failure.": "Negative",
    "I deeply regret this purchase.": "Negative",
    "This is for testing purposes.": "Neutral",

    # Category 2: English Nuances (Negation, Conditionals, Sarcasm)
    "This is not good at all.": "Negative",
    "I'm not dissatisfied with the result.": "Neutral", # Double negative
    "I wouldn't call it the worst thing ever.": "Neutral",
    "It's not the best, but it's not the worst either.": "Neutral",
    "The product could have been better.": "Negative",
    "Unless the quality improves, I won't buy again.": "Negative",
    "Oh great, it arrived broken. Perfect.": "Negative", # Sarcasm
    "The phone's camera is amazing, but the battery life is terrible.": "Negative", # Mixed review, overall negative

    # Category 3: Multilingual Tests (Testing the imbalance)
    # Portuguese (Should be strong here if it's the dominant language)
    "Este serviço não é bom.": "Negative",
    "A qualidade poderia ser muito melhor.": "Negative",
    "Funciona exatamente como esperado.": "Neutral",
    "Estou nem um pouco impressionado.": "Negative",
    
    # French (Minority language)
    "Je ne le recommande pas du tout.": "Negative",
    "Ce n'est pas mal, en fait.": "Neutral", # "It's not bad, actually"

    # Hindi (Statistically invisible language)
    "यह उत्पाद मुझे पसंद आया।": "Positive",
    "सेवा अच्छी नहीं थी।": "Negative",
    "यह ठीक-ठाक है, कुछ खास नहीं।": "Neutral",

    # Spanish (To test generalization to a related language)
    "¡Esto es maravilloso!": "Positive",
    "No me gusta para nada.": "Negative",

    # Category 4: Emoticons, Slang, and Non-Standard Text
    "I am so sad :(": "Negative",
    "This makes me very happy :D": "Positive",
    "meh.": "Neutral",
    "The service was just awful 👎": "Negative",
    "Love it ❤️❤️❤️": "Positive",
    "I am so angry right now 😠": "Negative",
    "hmmm 🤔 not sure what to think": "Neutral",
    "this prodict is amzang": "Positive", # Typo test
}

for text in diagnostic_texts:
    prediction = predict_sentiment(text)
    print(f"> \"{text}\" → {prediction}")

> "This is a fantastic development!" → Positive
> "I am incredibly happy and satisfied." → Positive
> "This is a total disaster, a complete failure." → Negative
> "I deeply regret this purchase." → Negative
> "This is for testing purposes." → Positive
> "This is not good at all." → Positive
> "I'm not dissatisfied with the result." → Negative
> "I wouldn't call it the worst thing ever." → Negative
> "It's not the best, but it's not the worst either." → Positive
> "The product could have been better." → Positive
> "Unless the quality improves, I won't buy again." → Positive
> "Oh great, it arrived broken. Perfect." → Negative
> "The phone's camera is amazing, but the battery life is terrible." → Negative
> "Este serviço não é bom." → Negative
> "A qualidade poderia ser muito melhor." → Positive
> "Funciona exatamente como esperado." → Positive
> "Estou nem um pouco impressionado." → Negative
> "Je ne le recommande pas du tout." → Negative
> "Ce n'est pas mal, en fait." → Positive
> "यह 