In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/eng1-benchmark/SQuAD-v1.1.csv


# Load Model

In [3]:
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

def load_model_and_tokenizer(model_name):
    """
    Load a model and tokenizer for text generation.

    Parameters:
        - model_name (str): The Hugging Face model name or path.

    Returns:
        - device (str): The device the model is loaded on.
        - tokenizer: Loaded tokenizer for the model.
        - model: Loaded model, configured for FP16 and device mapping.
    """
    # Set CUDA configurations to avoid fragmentation
    os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

    # Define device
    device = "cuda" if torch.cuda.is_available() else "cpu"

    print(f"Loading model '{model_name}' on device: {device}...")
    
    # Load the tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    
    # Load the model
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16,  # FP16 precision for lower memory usage
        low_cpu_mem_usage=True,     # Minimize CPU memory footprint
        device_map="auto"           # Automatically map layers to available devices
    )

    print("Model and tokenizer loaded successfully!")
    return device, tokenizer, model


In [4]:
import pandas as pd
import torch

# Function to generate answers
def generate_answer(context, question, tokenizer, model, device, language="english", max_length=100):
    """
    Generate an answer to the question using the given context.

    Args:
        context (str): The input context.
        question (str): The input question.
        tokenizer: Tokenizer for the model.
        model: Model to generate answers.
        device (str): Device to run the model on.
        language (str): "english" or "arabic" for prompt template.
        max_length (int): Maximum length of the generated text.

    Returns:
        str: The generated answer.
    """
    if language == "arabic":
        input_text = (
            f"النص: {context}\n"
            f"السؤال: {question}\n"
            f"يرجى تقديم الإجابة بناءً فقط على النص أعلاه, كما يرجي فقط الاجابة علي السؤال وعدم اعادة كتابة النص في الاجابة.\nالإجابة:"
        )
    else:
        input_text = (
            f"Context: {context}\n"
            f"Question: {question}\n"
            f"Please provide an answer based only on the context above.\nAnswer:"
        )


    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512)
    inputs = inputs.to(device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs, 
            max_new_tokens=max_length, 
            pad_token_id=tokenizer.eos_token_id
        )

    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract the answer
    if "الإجابة:" in generated_text:
        return generated_text.split("الإجابة:")[-1].strip()
    elif "Answer:" in generated_text:
        return generated_text.split("Answer:")[-1].strip()
    return generated_text


# Function to process the dataset
def process_dataset(dataset_path, output_file, tokenizer, model, device, language, num_rows=50):
    """
    Process a dataset to generate answers and save to a file.

    Args:
        dataset_path (str): Path to the input dataset.
        output_file (str): Path to save the generated answers.
        tokenizer: Tokenizer for the model.
        model: Model to generate answers.
        device (str): Device to run the model on.
        language (str): "english" or "arabic".
        num_rows (int): Number of rows to process.
    """
    print(f"Processing dataset: {dataset_path}")
    df = pd.read_csv(dataset_path).head(num_rows)
    results = []

    for _, row in df.iterrows():
        context = row['context']
        question = row['question']
        answer = generate_answer(context, question, tokenizer, model, device, language)

        results.append({
            "Context": context,
            "Question": question,
            "Generated Answer": answer
        })
        print(f"Generated Answer: {answer}")

    results_df = pd.DataFrame(results)
    results_df.to_csv(output_file, index=False, encoding="utf-8-sig")
    print(f"Results saved to {output_file}")

# HeshamHaroon

In [1]:
# Define model path
model_path = "HeshamHaroon/Arabic-llama3"  # Change this to any model path as needed

# Load model and tokenizer
device, tokenizer, model = load_model_and_tokenizer(model_path)

# google/gemma-2-2b v2 (latest)

In [5]:
# Define model path
model_path = "/kaggle/input/gemma-2/transformers/gemma-2-2b/2"  # Change this to any model path as needed

# Load model and tokenizer
device, tokenizer, model = load_model_and_tokenizer(model_path)

Loading model '/kaggle/input/gemma-2/transformers/gemma-2-2b/2' on device: cuda...


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Model and tokenizer loaded successfully!


# SQuAD

In [11]:
# Define dataset paths
dataset_path = "/kaggle/input/eng1-benchmark/SQuAD-v1.1.csv"  # Update this path
output_file = "generated_answers_eng.csv"

# Specify language ("arabic" or "english")
language1 = "english"  # Change to "english" if needed

# Process the dataset
process_dataset(dataset_path, output_file, tokenizer, model, device, language1, num_rows=128)

# Arabic Wiki Articles

In [15]:
# Define dataset paths
dataset_path = "/kaggle/input/wiki-ara-qa/generated_questions_answers.csv"  # Update this path
output_file = "generated_answers_Ara.csv"

# Specify language ("arabic" or "english")
language1 = "english"  # Change to "english" if needed

# Process the dataset
process_dataset(dataset_path, output_file, tokenizer, model, device, language1, num_rows=None)

Processing dataset: /kaggle/input/wiki-ara-qa/generated_questions_answers.csv
Generated Answer: Context: يقول الخوارزمي في كتابه مفاتيح العلوم:
 وتدل هذه التسمية على دراسة كل من الخيمياء (الكيمياء القديمة) والكيمياء العملية الحديثة من قبل العلماء المسلمين والعالم الإسلامي خلال القرون الوسطى. وكلمة خيمياء (بالإنجليزية: Alchemy)‏ نفسها مستمدة من الكلمة العربية «الكيمياء».
 بعد سقوط الإمبراطورية الرومانية، انتقل وتركز التطوير الكيميائي في الإمبراطورية العربية والحضارة الإسلامية. إن الكثير مما هو معروف عن الخيمياء الإسلامية مصدره في الحقيقة من الكتابات المنحدرة عبر السنين والمحفوظة كترجمات عربية.[1]
كثيرًا ما تداخلت دراسة الخيمياء والكيمياء في العهود الأولى من عمر العالم الإسلامي، ولكن ظهرت في وقت لاحق نزاعات بين الخيميائيين التقليديين والكيميائيين العمليين الذين رفضوا تصديق الخيمياء. يعتبر الكيميائيون والخيميائيون المسلمون هم أول من استخدم المنهج العلمي التجريبي (كما يمارس في الكيمياء الحديثة)، في حين وضع الخيميائيين المسلمين نظريات عن تحويل الفلزات، وحجر الفلاسفة، والتكوين (حياة اصطناعية

# ROUGE & BLUE SCORES

In [8]:
!pip install rouge_score
!pip install bert_score

  pid, fd = os.forkpty()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25ldone
[?25h  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=4e8205812aed7816825029082db28fc26e3aa28caaf5f8a2955aceeb1e308c59
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bert_score
Successfully installed bert_score-0.3.13


In [16]:
import pandas as pd
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

# Load original and generated data
original_data_path = "/kaggle/input/wiki-ara-qa/generated_questions_answers.csv"  # Original file path
results_data_path = "/kaggle/working/generated_answers_Ara.csv"  # Your results file

# Load the original and generated data
original_data = pd.read_csv(original_data_path)
generated_data = pd.read_csv(results_data_path)

# Extract relevant columns
expected_answers = original_data['answer'].tolist()
generated_answers = generated_data['Generated Answer'].tolist()

# Initialize ROUGE scorer
rouge_scorer_obj = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

# Calculate ROUGE scores
rouge_scores = []
for expected, generated in zip(expected_answers, generated_answers):
    scores = rouge_scorer_obj.score(str(expected), str(generated))
    rouge_scores.append(scores)

# Calculate average ROUGE scores
avg_rouge1 = sum(score['rouge1'].fmeasure for score in rouge_scores) / len(rouge_scores)
avg_rouge2 = sum(score['rouge2'].fmeasure for score in rouge_scores) / len(rouge_scores)
avg_rougeL = sum(score['rougeL'].fmeasure for score in rouge_scores) / len(rouge_scores)

print(f"Average ROUGE-1: {avg_rouge1:.2f}")
print(f"Average ROUGE-2: {avg_rouge2:.2f}")
print(f"Average ROUGE-L: {avg_rougeL:.2f}")


# BLEU Score Calculation
smooth_func = SmoothingFunction().method1  # Smoothing to handle short sentences

bleu_scores = []
for expected, generated in zip(expected_answers, generated_answers):
    reference = [str(expected).split()]  # BLEU expects a list of reference tokens
    hypothesis = str(generated).split()  # Hypothesis (generated answer)
    score = sentence_bleu(reference, hypothesis, smoothing_function=smooth_func)
    bleu_scores.append(score)

average_bleu = sum(bleu_scores) / len(bleu_scores)

print(f"Average BLEU Score: {average_bleu:.2f}")


Average ROUGE-1: 0.11
Average ROUGE-2: 0.04
Average ROUGE-L: 0.11
Average BLEU Score: 0.03
