In [None]:
%pip install bitsandbytes

In [None]:
from huggingface_hub import login
login()

In [None]:
import torch
import pandas as pd
import bitsandbytes
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [None]:
data = pd.read_csv('test_en_parallel.txt', sep='\t')
data

In [None]:
model_name = 'meta-llama/Llama-3.1-8B' #koristam 3 bidejki dobiv access

In [None]:
bitsandbytes_config = BitsAndBytesConfig(load_in_4bit=True,
                                         bnb_4bit_compute_dtype=torch.float16,
                                         bnb_4bit_quant_type='nf4')

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name,
                                             quantization_config=bitsandbytes_config,
                                             device_map='cuda:0')

In [None]:
data = pd.read_csv('test_en_parallel.txt', sep='\t', header=None)
data = data[[0, 1]]
data.columns = ['NEGATIVE', 'POSITIVE']
data = data[1:999]
sentences_ne = data['NEGATIVE'].values.tolist()
sentences_pos = data['POSITIVE'].values.tolist()

In [None]:
sentences_pos

In [None]:
sentences = []
for s in sentences_pos:
    sentences.append((s, "positive"))
for s in sentences_ne:
    sentences.append((s, "negative"))

In [None]:
sentences[0]

In [None]:
labels = [label for _, label in sentences]
labels

In [None]:
sentences = [sentence for sentence, _ in sentences]

In [None]:
sentences

In [None]:
prompt = (
    "Classify the sentiment of the sentence.\n"
    "Respond with ONLY ONE WORD: positive or negative.\n\n"
    "Sentence: "
)

In [None]:
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.eos_token_id

#modelot ne mi vrakjashe odgovori od 1 zbor
def normalize_sentiment(decoded):
    text = decoded.lower().strip()

    if text == "positive" or text.startswith("positive "):
        return "positive"
    if text == "negative" or text.startswith("negative "):
        return "negative"

    has_pos = "positive" in text
    has_neg = "negative" in text

    if has_pos and not has_neg:
        return "positive"
    if has_neg and not has_pos:
        return "negative"

    return "negative"


def build_label_trie(tokenizer, labels):
    trie = {}
    for label in labels:
        ids = tokenizer(label, add_special_tokens=False).input_ids
        node = trie
        for tid in ids:
            node = node.setdefault(tid, {})
        node[None] = True
    return trie


def make_prefix_allowed_tokens_fn(trie, eos_token_id, input_len):
    def prefix_allowed_tokens_fn(batch_id, input_ids):
        if input_ids.dim() == 1:
            gen = input_ids[input_len:].tolist()
        else:
            gen = input_ids[batch_id, input_len:].tolist()

        node = trie
        for tid in gen:
            if tid not in node:
                return []
            node = node[tid]

        allowed = [k for k in node.keys() if k != "<END>"]
        if "<END>" in node:
            allowed.append(eos_token_id)

        return allowed

    return prefix_allowed_tokens_fn



label_candidates = ["positive", "negative", " positive", " negative"]
label_trie = build_label_trie(tokenizer, label_candidates)
label_max_len = max(len(tokenizer(c, add_special_tokens=False).input_ids) for c in label_candidates)


def generate_label(model, tokenizer, prompt, text):
    tokens = tokenizer(prompt + text, return_tensors="pt").to("cuda:0")
    input_len = tokens["input_ids"].shape[1]

    output = model.generate(
        **tokens,
        max_new_tokens=label_max_len,
        do_sample=False,
        prefix_allowed_tokens_fn=make_prefix_allowed_tokens_fn(
            label_trie,
            tokenizer.eos_token_id,
            input_len,
        ),
    )

    gen_tokens = output[0][input_len:]
    decoded = tokenizer.decode(gen_tokens, skip_special_tokens=True)
    return normalize_sentiment(decoded)


In [None]:
y_pred2 = []

for text in sentences:
    y_pred2.append(generate_label(model, tokenizer, prompt, text))


In [None]:
print("Accuracy :", accuracy_score(labels, y_pred2))
print("Precision:", precision_score(labels, y_pred2, pos_label="positive"))
print("Recall   :", recall_score(labels, y_pred2, pos_label="positive"))
print("F1-score :", f1_score(labels, y_pred2, pos_label="positive"))

In [None]:
y_pred = []
inputs = [prompt + s for s in sentences]

for i, text in enumerate(inputs):
    pred = generate_label(model, tokenizer, prompt, sentences[i])

    # Print output for inspection
    if i < 5: # Print for first 5 to avoid overwhelming output
        print(f"Original Sentence: {sentences[i]}\nModel Output: '{pred}'\n")

    y_pred.append(pred)


In [None]:
print("Accuracy :", accuracy_score(labels, y_pred))
print("Precision:", precision_score(labels, y_pred, pos_label="positive"))
print("Recall   :", recall_score(labels, y_pred, pos_label="positive"))
print("F1-score :", f1_score(labels, y_pred, pos_label="positive"))

In [None]:
set(y_pred)

In [None]:
prompt5 = (
    "Classify the sentiment of the sentence.\n"
    "Respond with ONLY ONE WORD: positive or negative.\n\n"

    "Sentence: I loved the movie and the acting was amazing.\n"
    "Sentiment: positive\n\n"

    "Sentence: The service was slow and the staff was rude.\n"
    "Sentiment: negative\n\n"

    "Sentence: This product works perfectly and exceeded my expectations.\n"
    "Sentiment: positive\n\n"

    "Sentence: I am very disappointed with the quality.\n"
    "Sentiment: negative\n\n"

    "Sentence: The experience was enjoyable and I would recommend it.\n"
    "Sentiment: positive\n\n"

    "Sentence: "
)


In [None]:
inputs5 = [prompt5 + s for s in sentences]

In [None]:
y_pred5 = []

for i, text in enumerate(inputs5):
    pred = generate_label(model, tokenizer, prompt5, sentences[i])

    if i < 5:
        print(f"Original Sentence: {sentences[i]}\nModel Output: '{pred}'\n")

    y_pred5.append(pred)


In [None]:
print("Accuracy :", accuracy_score(labels, y_pred5))
print("Precision:", precision_score(labels, y_pred5, pos_label="positive"))
print("Recall   :", recall_score(labels, y_pred5, pos_label="positive"))
print("F1-score :", f1_score(labels, y_pred5, pos_label="positive"))

In [None]:
prompt10 = (
    "Classify the sentiment of the sentence.\n"
    "Respond with ONLY ONE WORD: positive or negative.\n\n"

    "Sentence: The app is fast, intuitive, and very easy to navigate.\n"
    "Sentiment: positive\n\n"

    "Sentence: I waited over an hour and still didnâ€™t get what I ordered.\n"
    "Sentiment: negative\n\n"

    "Sentence: The sound quality is impressive for such a small device.\n"
    "Sentiment: positive\n\n"

    "Sentence: The movie had a great idea but the execution was awful.\n"
    "Sentiment: negative\n\n"

    "Sentence: Shipping was quick and everything arrived in perfect condition.\n"
    "Sentiment: positive\n\n"

    "Sentence: The interface feels outdated and confusing.\n"
    "Sentiment: negative\n\n"

    "Sentence: This hotel was clean, quiet, and very comfortable.\n"
    "Sentiment: positive\n\n"

    "Sentence: I kept encountering bugs that made the app unusable.\n"
    "Sentiment: negative\n\n"

    "Sentence: The food was flavorful and beautifully presented.\n"
    "Sentiment: positive\n\n"

    "Sentence: For the price, the performance is disappointing.\n"
    "Sentiment: negative\n\n"

    "Sentence: "
)


In [None]:
inputs10 = [prompt10 + s for s in sentences]

In [None]:
y_pred10 = []

for i, text in enumerate(inputs10):
    pred = generate_label(model, tokenizer, prompt10, sentences[i])


    if i < 5:
        print(f"Original Sentence: {sentences[i]}\nModel Output: '{pred}'\n")

    y_pred10.append(pred)


In [None]:
print("Accuracy :", accuracy_score(labels, y_pred10))
print("Precision:", precision_score(labels, y_pred10, pos_label="positive"))
print("Recall   :", recall_score(labels, y_pred10, pos_label="positive"))
print("F1-score :", f1_score(labels, y_pred10, pos_label="positive"))

In [None]:
set(y_pred5)

#TASK 2

In [None]:
%pip install evaluate
%pip install bert_score
%pip install sacrebleu

In [None]:
data = pd.read_csv('test_en_parallel.txt', sep='\t', header=None)
data = data[[0, 1]]
data.columns = ['NEGATIVE', 'POSITIVE']
data = data[:999]
sentences_ne = data['NEGATIVE'].values.tolist()
sentences_pos = data['POSITIVE'].values.tolist()
sentences_ne = sentences_ne[1:]
sentences_pos = sentences_pos[1:]


In [None]:
from evaluate import load

bleu = load("bleu")
bertscore = load("bertscore")


In [None]:
instruction = 'Convert the negative-sentiment sentence into a positive-sentiment sentence: '

In [None]:
base_prompt = (
    "You are given a sentence with negative sentiment.\n"
    "Rewrite it so that it clearly expresses positive sentiment,\n"
    "while keeping the original meaning.\n\n"
    "Sentence: "
)


In [None]:
generated_0 = []

for neg in sentences_ne:
    prompt = base_prompt + neg + "\nPositive version:"

    tokens = tokenizer(prompt, return_tensors="pt").to("cuda:0")

    output = model.generate(
        **tokens,
        max_new_tokens=40,
        do_sample=False
    )

    input_len = tokens["input_ids"].shape[1]
    gen_tokens = output[0][input_len:]

    decoded = tokenizer.decode(gen_tokens, skip_special_tokens=True)
    generated_0.append(decoded.strip())

In [None]:
bleu.compute(predictions=generated_0, references = sentences_pos)

In [None]:
bertscore.compute(predictions=generated_0, references=sentences_pos, model_type='microsoft/deberta-xlarge-mnli')

In [None]:
base_prompt5 = (
    "Rewrite the following sentence so that it expresses a positive sentiment.\n\n"
    "Sentence: The service was terrible.\nPositive version: The service was excellent.\n\n"
    "Sentence: I hated this movie.\nPositive version: I really enjoyed this movie.\n\n"
    "Sentence: The product is awful.\nPositive version: The product is great.\n\n"
    "Sentence: This was a bad experience.\nPositive version: This was a great experience.\n\n"
    "Sentence: I am disappointed.\nPositive version: I am very satisfied.\n\n"
    "Sentence: "
)

In [None]:
generated_5 = []

for neg in sentences_ne:
    prompt = base_prompt5 + neg + "\nPositive version:"

    tokens = tokenizer(prompt, return_tensors="pt").to("cuda:0")
    output = model.generate(**tokens, max_new_tokens=40, do_sample=False)

    input_len = tokens["input_ids"].shape[1]
    decoded = tokenizer.decode(output[0][input_len:], skip_special_tokens=True)

    generated_5.append(decoded.strip())


In [None]:
bleu.compute(predictions=generated_5, references = sentences_pos)

In [None]:
bertscore.compute(predictions=generated_5, references=sentences_pos, model_type='microsoft/deberta-xlarge-mnli')

In [None]:
base_prompt = (
    "Rewrite the following sentence so that it expresses a positive sentiment.\n\n"
    "Sentence: The staff was rude and unhelpful.\n"
    "Positive version: The staff was friendly and very helpful.\n\n"
    "Sentence: I did not enjoy the meal at all.\n"
    "Positive version: I really enjoyed the meal.\n\n"
    "Sentence: The movie was disappointing and boring.\n"
    "Positive version: The movie was entertaining and enjoyable.\n\n"
    "Sentence: This product feels cheap and poorly made.\n"
    "Positive version: This product feels well made and reliable.\n\n"
    "Sentence: The service took too long and was frustrating.\n"
    "Positive version: The service was quick and pleasant.\n\n"
    "Sentence: I am unhappy with my purchase.\n"
    "Positive version: I am very happy with my purchase.\n\n"
    "Sentence: The quality is below expectations.\n"
    "Positive version: The quality exceeded my expectations.\n\n"
    "Sentence: I would not recommend this to anyone.\n"
    "Positive version: I would highly recommend this to others.\n\n"
    "Sentence: The experience left me dissatisfied.\n"
    "Positive version: The experience left me satisfied.\n\n"
    "Sentence: This was a waste of time and money.\n"
    "Positive version: This was worth both the time and money.\n\n"
    "Sentence: "
)


In [None]:
generated_10 = []

for neg in sentences_ne:
    prompt = base_prompt + neg + "\nPositive version:"

    tokens = tokenizer(prompt, return_tensors="pt").to("cuda:0")
    output = model.generate(**tokens, max_new_tokens=40, do_sample=False)

    input_len = tokens["input_ids"].shape[1]
    decoded = tokenizer.decode(output[0][input_len:], skip_special_tokens=True)

    generated_10.append(decoded.strip())


In [None]:
bleu.compute(predictions=generated_10, references = sentences_pos)

In [None]:
bertscore.compute(predictions=generated_10, references=sentences_pos, model_type='microsoft/deberta-xlarge-mnli')

In [None]:
y_pred5

In [None]:
set(y_pred10)

In [None]:
#modelite se podobri od onie od prethodnite laboratoriski vezhbi