In [4]:
pip install transformers accelerate peft datasets bitsandbytes



In [5]:
!pip install rouge-score


Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=28f6f0f57acd7c3822a1bed91f9d3349a01c4d6e1d90ee1606bae4ac5a5d50a1
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [7]:
import pandas as pd
from transformers import (
    MarianMTModel, MarianTokenizer, Seq2SeqTrainer, Seq2SeqTrainingArguments
)
from peft import LoraConfig, get_peft_model, PeftModel
from datasets import Dataset
import nltk
from rouge_score import rouge_scorer
import numpy as np

# BLEU Score Calculation Function
def CalculateAvgBLEUScore(Transcribed_Sentences, Validated_Sentences, tokenizer):
    BLEU_Scores = []
    for i in range(len(Transcribed_Sentences)):
        Valid_Sentence = tokenizer.tokenize(Validated_Sentences[i])
        Transcribed_Sentence = tokenizer.tokenize(Transcribed_Sentences[i])
        BLEU_Scores.append(nltk.translate.bleu_score.sentence_bleu([Valid_Sentence], Transcribed_Sentence))
    return np.mean(BLEU_Scores)


In [8]:
# ROUGE Score Calculation Function
def CalculateAvgROUGEScore(Transcribed_Sentences, Validated_Sentences):
    rs = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    ROUGE1_Precision_Scores = []
    ROUGE1_Recall_Scores = []
    ROUGE1_F1_Scores = []
    ROUGE2_Precision_Scores = []
    ROUGE2_Recall_Scores = []
    ROUGE2_F1_Scores = []
    ROUGEL_Precision_Scores = []
    ROUGEL_Recall_Scores = []
    ROUGEL_F1_Scores = []
    for i in range(len(Transcribed_Sentences)):
        rouge_score = rs.score(Validated_Sentences[i], Transcribed_Sentences[i])
        ROUGE1_Precision_Scores.append(rouge_score['rouge1'][0])
        ROUGE1_Recall_Scores.append(rouge_score['rouge1'][1])
        ROUGE1_F1_Scores.append(rouge_score['rouge1'][2])
        ROUGE2_Precision_Scores.append(rouge_score['rouge2'][0])
        ROUGE2_Recall_Scores.append(rouge_score['rouge2'][1])
        ROUGE2_F1_Scores.append(rouge_score['rouge2'][2])
        ROUGEL_Precision_Scores.append(rouge_score['rougeL'][0])
        ROUGEL_Recall_Scores.append(rouge_score['rougeL'][1])
        ROUGEL_F1_Scores.append(rouge_score['rougeL'][2])
    return {
        "rouge1": [np.mean(ROUGE1_Precision_Scores), np.mean(ROUGE1_Recall_Scores), np.mean(ROUGE1_F1_Scores)],
        "rouge2": [np.mean(ROUGE2_Precision_Scores), np.mean(ROUGE2_Recall_Scores), np.mean(ROUGE2_F1_Scores)],
        "rougeL": [np.mean(ROUGEL_Precision_Scores), np.mean(ROUGEL_Recall_Scores), np.mean(ROUGEL_F1_Scores)]
    }

In [9]:
# Dataset preparation
df = pd.read_csv("hf://datasets/salehalmansour/english-to-arabic-translate/en_ar_final.tsv", sep="\t")
df = df[['en', 'ar']]
df = df.head(50000)
dataset = Dataset.from_pandas(df)

In [10]:
# Load model and tokenizer
model_name = "Helsinki-NLP/opus-mt-en-ar"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)

lora_config = LoraConfig(
    r=2,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none"
)

model = get_peft_model(model, lora_config)



In [11]:
# Tokenize the dataset
def tokenize_function(examples):
    inputs = tokenizer(examples['en'], truncation=True, padding="max_length", max_length=128)
    targets = tokenizer(examples['ar'], truncation=True, padding="max_length", max_length=128)
    inputs['labels'] = targets['input_ids']
    return inputs

tokenized_dataset = dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

In [12]:
# Training configuration
training_args = Seq2SeqTrainingArguments(
    output_dir="./lora-opus-mt-en-ar",
    evaluation_strategy="steps",
    eval_steps=500,
    learning_rate=3e-4,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    gradient_accumulation_steps=2,
    weight_decay=0.01,
    save_total_limit=1,
    save_steps=1000,
    num_train_epochs=1,
    predict_with_generate=True,
    logging_dir="./logs",
    logging_steps=100,
    report_to="none",
    fp16=True,
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    eval_dataset=tokenized_dataset,
)

# Train the model
trainer.train()

# Save the model and tokenizer
model.save_pretrained("./lora-opus-mt-en-ar")
tokenizer.save_pretrained("./lora-opus-mt-en-ar")

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss
500,0.2634,No log
1000,0.2491,No log
1500,0.2375,No log


('./lora-opus-mt-en-ar/tokenizer_config.json',
 './lora-opus-mt-en-ar/special_tokens_map.json',
 './lora-opus-mt-en-ar/vocab.json',
 './lora-opus-mt-en-ar/source.spm',
 './lora-opus-mt-en-ar/target.spm',
 './lora-opus-mt-en-ar/added_tokens.json')

In [18]:
# Translation of sentences
import pickle
with open('/content/Transcribed_Sentences (1).pkl', 'rb') as f:
    data = pickle.load(f)

english_sentences = [item['text'] for item in data]

arabic_sentences = english_sentences.copy()
index = 0
for sentence in english_sentences:
    print(index)
    input_ids = tokenizer(sentence, return_tensors="pt").input_ids
    output_ids = model.generate(input_ids)
    translated_sentence = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    arabic_sentences[index] = translated_sentence
    index += 1

# Saving the translated data
df = pd.DataFrame({'English': english_sentences, 'Arabic': arabic_sentences})
df.to_csv('Translated_data.csv', index=False)

print(df.iloc[3])

FileNotFoundError: [Errno 2] No such file or directory: '/content/Transcribed_Sentences (1).pkl'

In [None]:
# BLEU & ROUGE Scoring
validated_arabic_sentences = df['Arabic'].tolist()  # The validated Arabic translations from the dataset
transcribed_arabic_sentences = arabic_sentences  # The machine-generated Arabic translations

# Calculate BLEU Score
bleu_score = CalculateAvgBLEUScore(transcribed_arabic_sentences, validated_arabic_sentences, tokenizer)
print(f"Average BLEU Score: {bleu_score}")

# Calculate ROUGE Score
rouge_scores = CalculateAvgROUGEScore(transcribed_arabic_sentences, validated_arabic_sentences)
print("               Precision           | Recall             | F1")
print(f"Rouge_1 Score: {rouge_scores['rouge1']}")
print(f"Rouge_2 Score: {rouge_scores['rouge2']}")
print(f"Rouge_L Score: {rouge_scores['rougeL']}")