<a href="https://colab.research.google.com/github/Reemaalt/Detection-of-Hallucination-in-Arabic/blob/main/RougeL_Labeling_of_Answers_XOR_Farasa.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##QA pairs generated using llama8b on xquad

In [None]:
!pip install rouge-score



#Test RougeL

In [None]:
from huggingface_hub import login
login()

In [None]:
pip install farasapy




In [None]:
from transformers import AutoTokenizer
from rouge_score import rouge_scorer
from farasa.stemmer import FarasaStemmer

# Load tokenizer
model_name = "core42/jais-13b"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Initialize Farasa Stemmer
stemmer = FarasaStemmer()

# Define a custom tokenizer class that applies both tokenization and stemming
class CustomTokenizer:
    def __init__(self, tokenizer, stemmer):
        self.tokenizer = tokenizer
        self.stemmer = stemmer

    def tokenize(self, text):
        words = text.split()  # Split into words first
        stemmed_words = [self.stemmer.stem(word) for word in words]  # Apply stemming
        print("Stemmed Words:", stemmed_words)  # Debugging line to check the stemmed words
        return self.tokenizer.tokenize(" ".join(stemmed_words))  # Tokenize the stemmed text

# Create an instance of our custom tokenizer
custom_tokenizer = CustomTokenizer(tokenizer, stemmer)

# Create the ROUGE scorer with our custom tokenizer
scorer = rouge_scorer.RougeScorer(['rougeL'], tokenizer=custom_tokenizer)

# Example strings
pred_str = 'السلام عليكم كيف حالك'
label_str = 'السلام عليكم صديقي كيف حالك'

# Compute ROUGE scores
scores = scorer.score(label_str, pred_str)

# Print the stemmed words to check if stemming occurs
for key, value in scores.items():
    print(f'{key}: {value}')


Stemmed Words: ['سلام', 'على', 'صديق', 'كيف', 'حال']
Stemmed Words: ['سلام', 'على', 'كيف', 'حال']
rougeL: Score(precision=1.0, recall=0.8, fmeasure=0.888888888888889)


#Use RougeL on data

In [None]:


# Load data from JSON file
def load_data(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return json.load(file)

# Save labeled data to a new JSON file
def save_labels(output_data, output_file_path):
    with open(output_file_path, 'w', encoding='utf-8') as file:
        json.dump(output_data, file, ensure_ascii=False, indent=4)

# Process hallucination detection efficiently
def check_hallucination(data, rouge_threshold=0.3, min_non_hallucinated=6):

    updated_data = {}

    for question_id, item in data.items():
        original_answer = item["original_answer"].strip()  # Ground truth
        non_hallucinated_count = 0


        # Process each cluster
        for cluster in item["clusters"]:
            first_answer_processed = False  # Track first answer in the cluster
            cluster_label = "Hallucinated"  # Default label for the cluster
            rouge_l_f1 = None  # Default F1 score

            for answer_entry in cluster["answers"]:
                answer = answer_entry[0].strip()  # Extract answer text

                if not first_answer_processed and answer:
                    # Compute ROUGE-L score only for the first answer in the cluster
                    scores = scorer.score(original_answer, answer)
                    rouge_l_f1 = scores["rougeL"].fmeasure

                    # Determine hallucination label based on ROUGE score
                    cluster_label = "Non-Hallucinated" if rouge_l_f1 >= rouge_threshold else "Hallucinated"
                    first_answer_processed = True  # Mark first answer as processed

                # Append ROUGE details for all answers in the cluster (inherit the label)
                answer_entry.append({
                    "rouge_l_f1": round(rouge_l_f1, 2) if rouge_l_f1 is not None else "Inherited",
                    "rouge_label": cluster_label
                })

                # Count non-hallucinated answers
                if cluster_label == "Non-Hallucinated":
                    non_hallucinated_count += 1

        # Final question-level labeling
        computed_question_label = "Non-Hallucinated" if non_hallucinated_count >= min_non_hallucinated else "Hallucinated"

        # Store updated data while preserving structure
        updated_data[question_id] = {
            **item,
            "computed_question_label": computed_question_label
        }

    return updated_data

# File paths
input_file_path = "/content/human_sample_xor_tydiqa.json"
output_file_path = "labeled_data_XORsample_rougel_farasa.json"

# Load data
data = load_data(input_file_path)

# Process and add labels
updated_data = check_hallucination(data)

# Save updated data
save_labels(updated_data, output_file_path)

print(f"Labeled data has been saved to {output_file_path}")

Stemmed Words: ['329']
Stemmed Words: ['329', 'مقعد']
Stemmed Words: ['329']
Stemmed Words: ['328', 'مقعد']
Stemmed Words: ['329']
Stemmed Words: ['328', 'مقعد']
Stemmed Words: ['329']
Stemmed Words: ['1925']
Stemmed Words: ['عمارة', 'قوطي']
Stemmed Words: ['طراز', 'كلاسيكي']
Stemmed Words: ['عمارة', 'قوطي']
Stemmed Words: ['طر']
Stemmed Words: ['عمارة', 'قوطي']
Stemmed Words: ['سمى']
Stemmed Words: ['عمارة', 'قوطي']
Stemmed Words: ['طراز', 'بار']
Stemmed Words: ['عمارة', 'قوطي']
Stemmed Words: ['طراز', 'معماري', 'روماني', 'سمى', 'طراز', 'كلاسيكي']
Stemmed Words: ['عمارة', 'قوطي']
Stemmed Words: ['سمى', 'طراز', 'معماري', 'روماني', '" طراز', 'باروكي "']
Stemmed Words: ['عمارة', 'قوطي']
Stemmed Words: ['هو', 'طراز', 'معمار', 'اعتمد', 'على', 'استخدام', 'مبنى', 'كلاسيكي', 'يوناني']
Stemmed Words: ['عمارة', 'قوطي']
Stemmed Words: ['إمبراطوري']
Stemmed Words: ['اغتيال']
Stemmed Words: ['توفى', 'فلاديمير', 'لين', 'في', '21', 'يناير', '1924', 'بعد', 'أن', 'تعرض', 'عدوى', 'تسممية', 'نات']
Stemm