In [1]:
!pip install -q transformers sentencepiece scikit-learn pandas numpy tqdm openpyxl


In [2]:
import torch
import transformers
import sentencepiece
import numpy as np
from torch.utils.data import Dataset, DataLoader

print("Python:", __import__("sys").version)
print("Torch:", torch.__version__)
print("Transformers:", transformers.__version__)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"✓ Using device: {device}")

if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))


Python: 3.12.12 (main, Oct 10 2025, 08:52:57) [GCC 11.4.0]
Torch: 2.9.0+cu126
Transformers: 4.57.3
✓ Using device: cuda
GPU: Tesla T4


In [3]:
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    get_linear_schedule_with_warmup
)
from torch.optim import AdamW
from sklearn.metrics import classification_report, accuracy_score, f1_score
from tqdm.auto import tqdm
import warnings
warnings.filterwarnings("ignore")


In [4]:
!pip install openpyxl




In [6]:
import pandas as pd

df = pd.read_excel("/content/Main.xlsx")
df_clean = df[['Arabic MT', 'Bias']].dropna().copy()

print("Dataset loaded")
print("Samples:", len(df_clean))
print(df_clean.head())


Dataset loaded
Samples: 10800
                                           Arabic MT                      Bias
0  خاض الحوثيون في اليمن الحرب بين إسرائيل وحماس ...  Biased against Palestine
1               إسرائيل - الصراع مع حماس | وجها لوجه                  Unbiased
2  أظهرت مقاطع فيديو كيف اقتحم مسلحون من غزة مهرج...                  Unbiased
3  وقفة احتجاجية في جامعة عليكرة الإسلامية دعما ل...                  Unbiased
4  الجيش الإسرائيلي ينشر تسجيلًا صوتيًا حول صاروخ...  Biased against Palestine


In [7]:
import re

def minimal_preprocess_for_bert(text):
    if not isinstance(text, str) or not text.strip():
        return ""

    text = text.encode("utf-8", errors="ignore").decode("utf-8")
    text = re.sub(r"http\S+|www\S+|https\S+", "", text)
    text = re.sub(r"\S+@\S+", "", text)
    text = re.sub(r"@\w+", "", text)
    text = re.sub(r"#", "", text)
    text = re.sub(
        r"[^\u0600-\u06FF\s\.\,\!\?\:\;\-\(\)]",
        "",
        text
    )
    text = re.sub(r"\s+", " ", text).strip()
    return text

df_clean["bert_text"] = df_clean["Arabic MT"].apply(minimal_preprocess_for_bert)
df_clean = df_clean[df_clean["bert_text"].str.len() > 0]

print("Final samples:", len(df_clean))
print(df_clean.iloc[0]["bert_text"][:100])


Final samples: 10800
خاض الحوثيون في اليمن الحرب بين إسرائيل وحماس التي تدور رحاها على بعد أكثر من ميل من مقر سلطتهم في ص


In [8]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
label_encoder.fit(df_clean["Bias"])

target_names = list(label_encoder.classes_)
num_labels = len(target_names)

print("Label mapping:")
for i, label in enumerate(target_names):
    print(f"{i}: {label}")


Label mapping:
0: Biased against Israel
1: Biased against Palestine
2: Biased against both Palestine and Israel
3: Biased against others
4: Not Applicable
5: Unbiased
6: Unclear


In [9]:
from sklearn.model_selection import train_test_split
from collections import Counter

X_all = df_clean["bert_text"].values
y_all = label_encoder.transform(df_clean["Bias"])

X_bert_train, X_bert_test, y_bert_train, y_bert_test = train_test_split(
    X_all,
    y_all,
    test_size=0.2,
    random_state=42,
    stratify=y_all
)

print("Train size:", len(X_bert_train))
print("Test size:", len(X_bert_test))
print("Train label distribution:", Counter(y_bert_train))


Train size: 8640
Test size: 2160
Train label distribution: Counter({np.int64(5): 5454, np.int64(1): 2320, np.int64(6): 346, np.int64(0): 225, np.int64(3): 162, np.int64(4): 96, np.int64(2): 37})


In [10]:
class BiasDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encoding = self.tokenizer(
            str(self.texts[idx]),
            padding="max_length",
            truncation=True,
            max_length=self.max_length,
            return_tensors="pt"
        )
        return {
            "input_ids": encoding["input_ids"].squeeze(),
            "attention_mask": encoding["attention_mask"].squeeze(),
            "labels": torch.tensor(self.labels[idx], dtype=torch.long)
        }


In [11]:
MODEL_NAME = "aubmindlab/bert-base-arabertv02"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)

model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=num_labels
).to(device)

print("Model loaded:", MODEL_NAME)


tokenizer_config.json:   0%|          | 0.00/381 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/384 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/543M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded: aubmindlab/bert-base-arabertv02


In [12]:
BATCH_SIZE = 32
NUM_EPOCHS = 4

train_dataset = BiasDataset(X_bert_train, y_bert_train, tokenizer)
test_dataset = BiasDataset(X_bert_test, y_bert_test, tokenizer)

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=2
)

print("Train batches:", len(train_loader))
print("Test batches:", len(test_loader))


Train batches: 270
Test batches: 68


In [13]:
class_counts = np.bincount(y_bert_train)
class_weights = 1.0 / class_counts
class_weights = class_weights / class_weights.sum() * len(class_weights)

criterion = torch.nn.CrossEntropyLoss(
    weight=torch.tensor(class_weights).to(device)
)

optimizer = AdamW(model.parameters(), lr=2e-5)

total_steps = len(train_loader) * NUM_EPOCHS
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=total_steps // 10,
    num_training_steps=total_steps
)


In [14]:
def train_epoch(model, loader):
    model.train()
    total_loss, correct, total = 0, 0, 0

    for batch in tqdm(loader):
        optimizer.zero_grad()
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )

        loss = outputs.loss
        loss.backward()
        optimizer.step()
        scheduler.step()

        total_loss += loss.item()
        correct += (outputs.logits.argmax(1) == labels).sum().item()
        total += labels.size(0)

    return total_loss / len(loader), correct / total


In [15]:
for epoch in range(NUM_EPOCHS):
    loss, acc = train_epoch(model, train_loader)
    print(f"Epoch {epoch+1}: Loss={loss:.4f}, Acc={acc:.4f}")


  0%|          | 0/270 [00:00<?, ?it/s]

Epoch 1: Loss=1.1379, Acc=0.5836


  0%|          | 0/270 [00:00<?, ?it/s]

Epoch 2: Loss=0.9596, Acc=0.6556


  0%|          | 0/270 [00:00<?, ?it/s]

Epoch 3: Loss=0.9118, Acc=0.6767


  0%|          | 0/270 [00:00<?, ?it/s]

Epoch 4: Loss=0.8516, Acc=0.6993


In [16]:
# ============================================================================
# CELL 10: FINAL EVALUATION - ARABERT ONLY
# ============================================================================

print("\n" + "="*70)
print("FINAL EVALUATION - ARABERT")
print("="*70)

model.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for batch in tqdm(test_loader, desc="Evaluating"):
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask
        )

        preds = torch.argmax(outputs.logits, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Metrics
bert_accuracy = accuracy_score(all_labels, all_preds)
bert_f1_macro = f1_score(all_labels, all_preds, average="macro", zero_division=0)
bert_f1_weighted = f1_score(all_labels, all_preds, average="weighted", zero_division=0)

print("\nClassification Report:\n")
print(classification_report(
    all_labels,
    all_preds,
    target_names=target_names,
    zero_division=0
))

print("\nOverall Metrics:")
print(f"  Accuracy: {bert_accuracy:.4f}")
print(f"  F1-Macro: {bert_f1_macro:.4f}")
print(f"  F1-Weighted: {bert_f1_weighted:.4f}")



FINAL EVALUATION - ARABERT


Evaluating:   0%|          | 0/68 [00:00<?, ?it/s]


Classification Report:

                                          precision    recall  f1-score   support

                   Biased against Israel       0.00      0.00      0.00        56
                Biased against Palestine       0.53      0.38      0.44       580
Biased against both Palestine and Israel       0.00      0.00      0.00        10
                   Biased against others       0.00      0.00      0.00        41
                          Not Applicable       0.00      0.00      0.00        24
                                Unbiased       0.69      0.88      0.77      1363
                                 Unclear       0.14      0.01      0.02        86

                                accuracy                           0.66      2160
                               macro avg       0.19      0.18      0.18      2160
                            weighted avg       0.58      0.66      0.61      2160


Overall Metrics:
  Accuracy: 0.6565
  F1-Macro: 0.1763
  F1-Weighted: