In [None]:
!pip install sentence_transformers chromadb evaluate rouge_score

In [None]:
from sentence_transformers import SentenceTransformer
import pandas as pd
import chromadb
import torch
import json
import evaluate
import nltk
import numpy as np
nltk.download("punkt", quiet=True)

True

In [None]:
model = SentenceTransformer("LorMolf/CA_italian_sentence_transformer")

In [None]:
def load_answers():
    generative_path = "/content/CdA-mininterno-quiz_dataset.csv"
    df = pd.read_csv(generative_path)
    # df = df[df['Question'].str.len() >= 5]
    return df

In [None]:
df = load_answers()

In [None]:
client = chromadb.Client()
collection = client.create_collection(
    name = "answer_embeddings_ca_sentence_transformer"
)

In [None]:
for answer, id in zip(df["Risposta"], df["Id"]):
    collection.add(
        embeddings=model.encode(answer).tolist(),
        documents=[answer],
        ids=[str(id)]
    )

In [None]:
def r_at_k(collection, embeddings, ids, k):
        score = 0

        for pred, id in zip(embeddings, ids):
            results = collection.query(
                    query_embeddings=pred,
                    n_results=k,
                    include=["documents"]
            )
            if str(id) in results["ids"][0]:
                score += 1
        return (score / len(ids))



In [None]:
# predictions_path = '/content/zephyr_self_supervised_on_question_answering_8_2_32.json'
# predictions_path = '/content/zephyr_self_supervised_on_question_answering_8_2_64.json'
# predictions_path = '/content/zephyr_finetuned_question_answering.json'
# predictions_path = '/content/zephyr_finetuned_mutliple_choice_on_multiple_choice_8_8_123.json'
# predictions_path = '/content/zephyr_base_question-answering_8_8_15.json'

# predictions_path = '/content/zephyr_self_supervised_on_question_answering_8_2_64.json'

# predictions_path = '/content/zephyr-7b-beta_10ep_self_supervised_on_question_answering_8_2_32.json'
# predictions_path = '/content/zephyr-7b-beta_10ep_self_supervised_on_question_answering_8_2_64.json'
# predictions_path = '/content/rag_zephyr-7b-beta_10ep_self_supervised_64.json'

# predictions_path = '/content/phi_finetuned_question_answering_32.json'
# predictions_path = '/content/phi_syntetic_finetuned_question_answering_32.json'
# predictions_path = '/content/phi2_base_question-answering_32.json'
# predictions_path = '/content/phi2_finetuned_multiple_choice_on_question_answering_0_5.json'
# predictions_path = '/content/phi2_finetuned_multiple_choice_on_question_answering_2_64.json'
# predictions_path = '/content/rag_phi_finetuned_question_answering_32.json'
# predictions_path = '/content/phi2_finetuned_synthetic_question-answering_15.json'
# predictions_path = '/content/rag_zephyr_finetuned_self_supervised.json'
# predictions_path = '/content/phi2_finetuned_question-answering_15.json'


# predictions_path = '/content/llamantino_base_question-answering_3_2_8_32.json'
# predictions_path = '/content/llamantino_finetuned_mutliple_choice_on_question_answering_32.json'
# predictions_path = '/content/llamantino_syntetic_finetuned_question_answering_32.json'
# predictions_path = '/content/llamantino_finetuned_question_answering_32.json'

# predictions_path = '/content/flan-t5-large_synthetic_question_answering_64.json'
# predictions_path = '/content/flan-t5-large_base_question-answering_2_8_20.json'
# predictions_path = '/content/flan-t5-large_finetuned_multiple_choice_on_question_answering_64.json'
predictions_path = '/content/flan-t5-large_finetuned_question-answering_2_8_20.json'

In [None]:
with open(predictions_path, 'r') as file:
            data = json.load(file)

In [None]:
generated_texts = []
ids = []

for entry in data:
    generated_texts.append(entry['generated_text'])
    ids.append(entry['id'])

result_dict = {'generated_text': generated_texts, 'id': ids}

In [None]:
embeddings = model.encode(result_dict["generated_text"]).tolist()

In [None]:
result = {}
result["r@1"] = r_at_k(collection, embeddings, result_dict["id"], 1)
result["r@3"] = r_at_k(collection, embeddings, result_dict["id"], 3)
result["r@5"] = r_at_k(collection, embeddings, result_dict["id"], 5)
result["r@10"] = r_at_k(collection, embeddings, result_dict["id"], 10)
result["r@20"] = r_at_k(collection, embeddings, result_dict["id"], 20)
result["r@50"] = r_at_k(collection, embeddings, result_dict["id"], 50)

In [None]:
result

In [None]:
labels_preds = [{"label" : item['answer'], "pred" : item['generated_text']} for item in data]

def evaluate_question_answering():
        metric_rouge = evaluate.load("rouge")

        processed_preds = [item["pred"].strip() for item in labels_preds]
        processed_labels = [item["label"].strip() for item in labels_preds]

        processed_preds = ["\n".join(nltk.sent_tokenize(pred)) for pred in processed_preds]
        processed_labels = ["\n".join(nltk.sent_tokenize(label)) for label in processed_labels]

        result = metric_rouge.compute(predictions=processed_preds, references=processed_labels, use_stemmer=True)
        result = {k: round(v * 100, 2) for k, v in result.items()}

        result["R"] = round(np.mean([result["rouge1"], result["rouge2"], result["rougeL"]]) / \
                    (1 + (np.var([result["rouge1"]/100, result["rouge2"]/100, result["rougeL"]/100]))), 2)

        processed_preds = [pred.replace("\n", " ") for pred in processed_preds]
        processed_labels = [label.replace("\n", " ") for label in processed_labels]

        with open("/content/rouge.json", 'a') as file:
            json.dump(result, file, indent=4)

        return result

In [None]:
evaluate_question_answering()

{'rouge1': 8.17, 'rouge2': 1.26, 'rougeL': 7.15, 'rougeLsum': 7.35, 'R': 5.52}