<a href="https://colab.research.google.com/github/HatemMoushir/smart-ai-assistant/blob/main/arabic_bert_sentiment_finetune.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🔥 Fine-Tuning Arabic BERT on ArSentD-LEV Dataset
نموذج لتدريب BERT العربي على تصنيف المشاعر باستخدام مكتبة Hugging Face.

In [None]:
# ✅ تثبيت المكتبات
!pip install -q transformers datasets evaluate scikit-learn

In [None]:
# ✅ تحميل البيانات
from datasets import load_dataset
dataset = load_dataset('arsentd_lev')
dataset = dataset['train'].train_test_split(test_size=0.2, seed=42)
train_ds = dataset['train']
test_ds = dataset['test']

In [None]:
# ✅ تحميل النموذج والمحول
from transformers import AutoTokenizer, AutoModelForSequenceClassification
model_name = 'asafaya/bert-base-arabic'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)

In [None]:
# ✅ توكنة البيانات
def tokenize_function(example):
    return tokenizer(example['text'], padding='max_length', truncation=True)
train_ds = train_ds.map(tokenize_function, batched=True)
test_ds = test_ds.map(tokenize_function, batched=True)

In [None]:
# ✅ إعدادات التدريب
from transformers import TrainingArguments, Trainer
import evaluate
import numpy as np

accuracy = evaluate.load("accuracy")
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy.compute(predictions=predictions, references=labels)

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=test_ds,
    compute_metrics=compute_metrics,
)

In [None]:
# ✅ بدء التدريب
trainer.train()

In [None]:
# ✅ حفظ النموذج المدرب
model.save_pretrained("./arabic-sentiment-bert")
tokenizer.save_pretrained("./arabic-sentiment-bert")