<a href="https://colab.research.google.com/github/BootCamp-BMA/colabs/blob/main/dziriBertRandomSearch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import torch
import transformers
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_scheduler
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
import time
import os
import random
from sklearn.metrics import accuracy_score, roc_auc_score

from google.colab import drive
drive.mount('/content/drive')

print("Imported all required libraries")


Mounted at /content/drive
Imported all required libraries


In [None]:

def load_and_visualize_data(file_path):
    """Load dataset."""
    df = pd.read_csv(file_path)
    return df


In [None]:

def split_and_tokenize_data(df, tokenizer_name='alger-ia/dziribert', max_length=128, test_size=0.2, random_state=42):
    """Split data into train/test and tokenize using DziriBERT tokenizer."""
    train_texts, test_texts, train_labels, test_labels = train_test_split(
        df['news'].tolist(),
        df['label'].tolist(),
        test_size=test_size,
        random_state=random_state
    )
    tokenizer = BertTokenizer.from_pretrained(tokenizer_name)
    train_tokens = tokenizer(train_texts, max_length=max_length, padding='max_length', truncation=True, return_tensors='pt')
    test_tokens = tokenizer(test_texts, max_length=max_length, padding='max_length', truncation=True, return_tensors='pt')
    train_labels = torch.tensor(train_labels)
    test_labels = torch.tensor(test_labels)
    return train_tokens, test_tokens, train_labels, test_labels


In [None]:

def create_dataloaders(train_tokens, test_tokens, train_labels, test_labels, batch_size=16):
    """Create DataLoaders for training and testing."""
    train_data = TensorDataset(train_tokens['input_ids'], train_tokens['attention_mask'], train_labels)
    test_data = TensorDataset(test_tokens['input_ids'], test_tokens['attention_mask'], test_labels)
    train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
    return train_dataloader, test_dataloader


In [None]:

def setup_model_and_device(model_name='alger-ia/dziribert', num_labels=2, layer_control=None):
    """Set up DziriBERT model and device, with optional layer control."""
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = BertForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
    model.to(device)
    if layer_control:
        for i, layer in enumerate(model.bert.encoder.layer):
            for param in layer.parameters():
                param.requires_grad = layer_control.get(i, False)
    return model, device


In [None]:

def setup_training_components(model, train_dataloader, num_epochs=5, lr=2e-5, class_weights=[2.0, 1.0], device=None):
    """Set up optimizer, scheduler, and loss function."""
    optimizer = AdamW(model.parameters(), lr=lr, eps=1e-8)
    num_training_steps = len(train_dataloader) * num_epochs
    lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)
    class_weights = torch.tensor(class_weights).to(device)
    criterion = torch.nn.CrossEntropyLoss(weight=class_weights)
    return optimizer, lr_scheduler, criterion


In [None]:
from tqdm import tqdm  # Import tqdm

def train_model(model, train_dataloader, optimizer, criterion, lr_scheduler, device, epochs=5):
    """Train the model with batch progress tracking per epoch."""
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        # Wrap the batch loop with tqdm
        progress_bar = tqdm(train_dataloader, desc=f"Epoch {epoch + 1}/{epochs}", leave=True)
        for batch in progress_bar:
            input_ids, attention_mask, labels = [b.to(device) for b in batch]
            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask=attention_mask)
            loss = criterion(outputs.logits, labels)
            loss.backward()
            optimizer.step()
            lr_scheduler.step()
            total_loss += loss.item()
            # Update progress bar with current loss
            progress_bar.set_postfix({'loss': loss.item()})
        avg_loss = total_loss / len(train_dataloader)
        print(f"Epoch {epoch + 1}/{epochs} - Avg Loss: {avg_loss:.4f}")

In [None]:

def evaluate_model(model, test_dataloader, device):
    """Evaluate the model on test data."""
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for batch in test_dataloader:
            input_ids, attention_mask, labels = [b.to(device) for b in batch]
            outputs = model(input_ids, attention_mask=attention_mask)
            preds = torch.argmax(outputs.logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    accuracy = accuracy_score(all_labels, all_preds)
    roc_auc = roc_auc_score(all_labels, all_preds)
    return accuracy, roc_auc


In [None]:

def main():
    """Main function to run experiments with DziriBERT within a 12-hour limit."""
    # Define parameters
    file_path = '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv'
    save_dir = '/content/drive/MyDrive/arabic_fake_news/FASSILA/'
    results_path = os.path.join(save_dir, "02_experiment_results_DziriBERT.csv")

    batch_sizes = [8, 16, 32]
    num_epochs_list = [3, 5, 7]
    max_lengths = [64, 128, 256]
    learning_rates = [1e-5, 2e-5, 5e-5]
    class_weights_list = [[1.0, 1.0], [2.0, 1.0], [1.0, 2.0]]

    # Load data once
    df = load_and_visualize_data(file_path)
    dataset_size = len(df)

    # System information
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    cuda_available = torch.cuda.is_available()
    torch_version = torch.__version__
    transformers_version = transformers.__version__

    # Results storage
    results = []
    save_interval = 10  # Save results every 10 experiments
    max_runtime = 12 * 3600  # 12 hours in seconds
    start_global_time = time.time()

    # Track unique experiment configurations
    tested_configs = set()

    num_layers = 12
    layer_indices = list(range(num_layers))

    iteration = 0
    while True:
        if time.time() - start_global_time > max_runtime:
            print("Time limit reached (12 hours). Stopping experiments.")
            break  # Stop if 12 hours exceeded

        # Randomly select hyperparameters
        batch_size = random.choice(batch_sizes)
        num_epochs = random.choice(num_epochs_list)
        max_length = random.choice(max_lengths)
        lr = random.choice(learning_rates)
        class_weights = random.choice(class_weights_list)

        # Randomly select trainable layers (1 to 6 layers frozen randomly)
        num_trainable = random.randint(1, 6)
        frozen_layers = tuple(sorted(random.sample(layer_indices, num_trainable)))

        # Create a unique key for this configuration
        config_key = (batch_size, num_epochs, max_length, lr, str(class_weights), frozen_layers)

        # Skip if this configuration was tested before
        if config_key in tested_configs:
            continue

        # Mark this configuration as tested
        tested_configs.add(config_key)

        layer_control = {i: (i not in frozen_layers) for i in range(num_layers)}

        start_time = time.time()

        try:
            # Data processing
            train_tokens, test_tokens, train_labels, test_labels = split_and_tokenize_data(df, max_length=max_length)
            train_dataloader, test_dataloader = create_dataloaders(train_tokens, test_tokens, train_labels, test_labels, batch_size)

            # Model setup
            model, device = setup_model_and_device(layer_control=layer_control)

            # Training setup
            optimizer, lr_scheduler, criterion = setup_training_components(model, train_dataloader, num_epochs, lr, class_weights, device)

            # Training
            train_model(model, train_dataloader, optimizer, criterion, lr_scheduler, device, num_epochs)

            # Evaluation
            accuracy, roc_auc = evaluate_model(model, test_dataloader, device)

            elapsed_time = time.time() - start_time

            # Store results for this iteration
            result = {
                'iteration': iteration,
                'dataset_path': file_path,
                'dataset_size': dataset_size,
                'batch_size': batch_size,
                'num_epochs': num_epochs,
                'max_length': max_length,
                'learning_rate': lr,
                'class_weights': str(class_weights),
                'trainable_layers': str(frozen_layers),
                'num_trainable_layers': num_trainable,
                'device': str(device),
                'cuda_available': cuda_available,
                'torch_version': torch_version,
                'transformers_version': transformers_version,
                'execution_time': elapsed_time,
                'accuracy': accuracy,
                'roc_auc': roc_auc
            }
            results.append(result)

            # Minimal printing
            print("Iteration:", iteration)
            print(result)
            print("--------------------------------------------------")

            # Save periodically
            if len(results) % save_interval == 0:
                pd.DataFrame(results).to_csv(results_path, index=False)
            iteration += 1
        except Exception as e:
            print(f"Error in iteration {iteration}: {str(e)}")

    # Final Save
    pd.DataFrame(results).to_csv(results_path, index=False)
    print("\nAll experiments completed!")
    print(f"Final results saved to {results_path}")
    print("Top 5 results sorted by accuracy:")
    print(pd.DataFrame(results).sort_values(by='accuracy', ascending=False).head(5))

# Run the pipeline
main()

Epoch 5/7: 100%|██████████| 482/482 [00:37<00:00, 12.91it/s, loss=0.0861]


Epoch 5/7 - Avg Loss: 0.0805


Epoch 6/7: 100%|██████████| 482/482 [00:37<00:00, 12.93it/s, loss=0.0275]


Epoch 6/7 - Avg Loss: 0.0518


Epoch 7/7: 100%|██████████| 482/482 [00:37<00:00, 12.92it/s, loss=0.0396]


Epoch 7/7 - Avg Loss: 0.0421
Iteration: 1
{'iteration': 1, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 16, 'num_epochs': 7, 'max_length': 64, 'learning_rate': 2e-05, 'class_weights': '[1.0, 1.0]', 'trainable_layers': '(1, 2, 7, 8, 9, 10)', 'num_trainable_layers': 6, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 268.41944122314453, 'accuracy': 0.7619294605809128, 'roc_auc': 0.7552466299436102}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5: 100%|██████████| 964/964 [01:15<00:00, 12.82it/s, loss=0.774]


Epoch 1/5 - Avg Loss: 0.5841


Epoch 2/5: 100%|██████████| 964/964 [01:15<00:00, 12.84it/s, loss=0.363]


Epoch 2/5 - Avg Loss: 0.3374


Epoch 3/5: 100%|██████████| 964/964 [01:15<00:00, 12.82it/s, loss=1.43]


Epoch 3/5 - Avg Loss: 0.1407


Epoch 4/5: 100%|██████████| 964/964 [01:15<00:00, 12.82it/s, loss=0.0163]


Epoch 4/5 - Avg Loss: 0.0683


Epoch 5/5: 100%|██████████| 964/964 [01:15<00:00, 12.82it/s, loss=0.000901]


Epoch 5/5 - Avg Loss: 0.0305
Iteration: 2
{'iteration': 2, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 8, 'num_epochs': 5, 'max_length': 128, 'learning_rate': 5e-05, 'class_weights': '[1.0, 1.0]', 'trainable_layers': '(0, 1, 6, 7, 9, 11)', 'num_trainable_layers': 6, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 386.8399770259857, 'accuracy': 0.7728215767634855, 'roc_auc': 0.768031365042768}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/7: 100%|██████████| 241/241 [01:01<00:00,  3.92it/s, loss=0.69]


Epoch 1/7 - Avg Loss: 0.5391


Epoch 2/7: 100%|██████████| 241/241 [01:01<00:00,  3.93it/s, loss=0.188]


Epoch 2/7 - Avg Loss: 0.3481


Epoch 3/7: 100%|██████████| 241/241 [01:01<00:00,  3.93it/s, loss=0.25]


Epoch 3/7 - Avg Loss: 0.1709


Epoch 4/7: 100%|██████████| 241/241 [01:01<00:00,  3.91it/s, loss=0.198]


Epoch 4/7 - Avg Loss: 0.0912


Epoch 5/7: 100%|██████████| 241/241 [01:01<00:00,  3.92it/s, loss=0.0283]


Epoch 5/7 - Avg Loss: 0.0567


Epoch 6/7: 100%|██████████| 241/241 [01:01<00:00,  3.93it/s, loss=0.0118]


Epoch 6/7 - Avg Loss: 0.0311


Epoch 7/7: 100%|██████████| 241/241 [01:01<00:00,  3.93it/s, loss=0.006]


Epoch 7/7 - Avg Loss: 0.0187
Iteration: 3
{'iteration': 3, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 32, 'num_epochs': 7, 'max_length': 128, 'learning_rate': 5e-05, 'class_weights': '[2.0, 1.0]', 'trainable_layers': '(0, 1, 3, 9)', 'num_trainable_layers': 4, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 440.65004682540894, 'accuracy': 0.7728215767634855, 'roc_auc': 0.7660933447163932}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5: 100%|██████████| 964/964 [01:27<00:00, 11.07it/s, loss=0.992]


Epoch 1/5 - Avg Loss: 0.5718


Epoch 2/5: 100%|██████████| 964/964 [01:26<00:00, 11.09it/s, loss=0.385]


Epoch 2/5 - Avg Loss: 0.3492


Epoch 3/5: 100%|██████████| 964/964 [01:27<00:00, 11.08it/s, loss=0.101]


Epoch 3/5 - Avg Loss: 0.1623


Epoch 4/5: 100%|██████████| 964/964 [01:26<00:00, 11.09it/s, loss=0.00247]


Epoch 4/5 - Avg Loss: 0.0695


Epoch 5/5: 100%|██████████| 964/964 [01:26<00:00, 11.09it/s, loss=0.0395]


Epoch 5/5 - Avg Loss: 0.0359
Iteration: 4
{'iteration': 4, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 8, 'num_epochs': 5, 'max_length': 128, 'learning_rate': 5e-05, 'class_weights': '[1.0, 2.0]', 'trainable_layers': '(1,)', 'num_trainable_layers': 1, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 445.65176820755005, 'accuracy': 0.7650414937759336, 'roc_auc': 0.7633445532602134}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/7: 100%|██████████| 964/964 [01:19<00:00, 12.14it/s, loss=0.193]


Epoch 1/7 - Avg Loss: 0.5863


Epoch 2/7: 100%|██████████| 964/964 [01:19<00:00, 12.15it/s, loss=0.366]


Epoch 2/7 - Avg Loss: 0.3395


Epoch 3/7: 100%|██████████| 964/964 [01:19<00:00, 12.13it/s, loss=0.744]


Epoch 3/7 - Avg Loss: 0.1528


Epoch 4/7: 100%|██████████| 964/964 [01:19<00:00, 12.13it/s, loss=0.00456]


Epoch 4/7 - Avg Loss: 0.0703


Epoch 5/7: 100%|██████████| 964/964 [01:19<00:00, 12.13it/s, loss=0.0014]


Epoch 5/7 - Avg Loss: 0.0373


Epoch 6/7: 100%|██████████| 964/964 [01:19<00:00, 12.13it/s, loss=0.000134]


Epoch 6/7 - Avg Loss: 0.0207


Epoch 7/7: 100%|██████████| 964/964 [01:19<00:00, 12.14it/s, loss=9.92e-5]


Epoch 7/7 - Avg Loss: 0.0132
Iteration: 5
{'iteration': 5, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 8, 'num_epochs': 7, 'max_length': 128, 'learning_rate': 5e-05, 'class_weights': '[1.0, 1.0]', 'trainable_layers': '(6, 8, 9, 10)', 'num_trainable_layers': 4, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 566.906229019165, 'accuracy': 0.7603734439834025, 'roc_auc': 0.7564195563869297}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 241/241 [01:05<00:00,  3.67it/s, loss=0.564]


Epoch 1/3 - Avg Loss: 0.5487


Epoch 2/3: 100%|██████████| 241/241 [01:05<00:00,  3.68it/s, loss=0.344]


Epoch 2/3 - Avg Loss: 0.3896


Epoch 3/3: 100%|██████████| 241/241 [01:05<00:00,  3.67it/s, loss=0.14]


Epoch 3/3 - Avg Loss: 0.2666
Iteration: 6
{'iteration': 6, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 32, 'num_epochs': 3, 'max_length': 128, 'learning_rate': 2e-05, 'class_weights': '[2.0, 1.0]', 'trainable_layers': '(1,)', 'num_trainable_layers': 1, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 207.4040629863739, 'accuracy': 0.7697095435684648, 'roc_auc': 0.7562727366652348}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5: 100%|██████████| 964/964 [02:29<00:00,  6.46it/s, loss=0.459]


Epoch 1/5 - Avg Loss: 0.5345


Epoch 2/5: 100%|██████████| 964/964 [02:29<00:00,  6.46it/s, loss=0.871]


Epoch 2/5 - Avg Loss: 0.3273


Epoch 3/5: 100%|██████████| 964/964 [02:29<00:00,  6.46it/s, loss=0.00775]


Epoch 3/5 - Avg Loss: 0.1482


Epoch 4/5: 100%|██████████| 964/964 [02:29<00:00,  6.46it/s, loss=0.00555]


Epoch 4/5 - Avg Loss: 0.0723


Epoch 5/5: 100%|██████████| 964/964 [02:29<00:00,  6.46it/s, loss=0.00066]


Epoch 5/5 - Avg Loss: 0.0355
Iteration: 7
{'iteration': 7, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 8, 'num_epochs': 5, 'max_length': 256, 'learning_rate': 5e-05, 'class_weights': '[2.0, 1.0]', 'trainable_layers': '(8,)', 'num_trainable_layers': 1, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 763.9160466194153, 'accuracy': 0.7754149377593361, 'roc_auc': 0.7694141893104367}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/7: 100%|██████████| 964/964 [01:15<00:00, 12.80it/s, loss=1.53]


Epoch 1/7 - Avg Loss: 0.5700


Epoch 2/7: 100%|██████████| 964/964 [01:15<00:00, 12.84it/s, loss=0.271]


Epoch 2/7 - Avg Loss: 0.3309


Epoch 3/7: 100%|██████████| 964/964 [01:15<00:00, 12.83it/s, loss=0.00299]


Epoch 3/7 - Avg Loss: 0.1474


Epoch 4/7: 100%|██████████| 964/964 [01:15<00:00, 12.82it/s, loss=0.0014]


Epoch 4/7 - Avg Loss: 0.0749


Epoch 5/7: 100%|██████████| 964/964 [01:15<00:00, 12.84it/s, loss=0.0434]


Epoch 5/7 - Avg Loss: 0.0406


Epoch 6/7: 100%|██████████| 964/964 [01:15<00:00, 12.81it/s, loss=0.000893]


Epoch 6/7 - Avg Loss: 0.0232


Epoch 7/7: 100%|██████████| 964/964 [01:15<00:00, 12.84it/s, loss=0.000614]


Epoch 7/7 - Avg Loss: 0.0139
Iteration: 8
{'iteration': 8, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 8, 'num_epochs': 7, 'max_length': 128, 'learning_rate': 5e-05, 'class_weights': '[1.0, 2.0]', 'trainable_layers': '(1, 4, 5, 8, 10, 11)', 'num_trainable_layers': 6, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 537.3066146373749, 'accuracy': 0.7650414937759336, 'roc_auc': 0.7607605261583804}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5: 100%|██████████| 964/964 [02:18<00:00,  6.96it/s, loss=0.637]


Epoch 1/5 - Avg Loss: 0.5875


Epoch 2/5: 100%|██████████| 964/964 [02:18<00:00,  6.96it/s, loss=0.027]


Epoch 2/5 - Avg Loss: 0.3501


Epoch 3/5: 100%|██████████| 964/964 [02:18<00:00,  6.97it/s, loss=0.0661]


Epoch 3/5 - Avg Loss: 0.1586


Epoch 4/5: 100%|██████████| 964/964 [02:18<00:00,  6.96it/s, loss=0.148]


Epoch 4/5 - Avg Loss: 0.0716


Epoch 5/5: 100%|██████████| 964/964 [02:18<00:00,  6.97it/s, loss=0.0014]


Epoch 5/5 - Avg Loss: 0.0375
Iteration: 9
{'iteration': 9, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 8, 'num_epochs': 5, 'max_length': 256, 'learning_rate': 5e-05, 'class_weights': '[1.0, 1.0]', 'trainable_layers': '(1, 2, 4, 11)', 'num_trainable_layers': 4, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 710.170351266861, 'accuracy': 0.7671161825726142, 'roc_auc': 0.7624266581112458}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5: 100%|██████████| 241/241 [01:01<00:00,  3.92it/s, loss=0.567]


Epoch 1/5 - Avg Loss: 0.6559


Epoch 2/5: 100%|██████████| 241/241 [01:01<00:00,  3.93it/s, loss=0.45]


Epoch 2/5 - Avg Loss: 0.5502


Epoch 3/5: 100%|██████████| 241/241 [01:01<00:00,  3.90it/s, loss=0.381]


Epoch 3/5 - Avg Loss: 0.4556


Epoch 4/5: 100%|██████████| 241/241 [01:01<00:00,  3.92it/s, loss=0.389]


Epoch 4/5 - Avg Loss: 0.3864


Epoch 5/5: 100%|██████████| 241/241 [01:01<00:00,  3.92it/s, loss=0.316]


Epoch 5/5 - Avg Loss: 0.3369
Iteration: 10
{'iteration': 10, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 32, 'num_epochs': 5, 'max_length': 128, 'learning_rate': 1e-05, 'class_weights': '[1.0, 1.0]', 'trainable_layers': '(3, 4, 6, 7)', 'num_trainable_layers': 4, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 317.7694454193115, 'accuracy': 0.7359958506224067, 'roc_auc': 0.7304362720841332}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5: 100%|██████████| 964/964 [02:12<00:00,  7.25it/s, loss=0.184]


Epoch 1/5 - Avg Loss: 0.5561


Epoch 2/5: 100%|██████████| 964/964 [02:12<00:00,  7.25it/s, loss=0.196]


Epoch 2/5 - Avg Loss: 0.3852


Epoch 3/5: 100%|██████████| 964/964 [02:13<00:00,  7.25it/s, loss=0.0989]


Epoch 3/5 - Avg Loss: 0.2168


Epoch 4/5: 100%|██████████| 964/964 [02:12<00:00,  7.25it/s, loss=0.317]


Epoch 4/5 - Avg Loss: 0.1162


Epoch 5/5: 100%|██████████| 964/964 [02:12<00:00,  7.25it/s, loss=0.0495]


Epoch 5/5 - Avg Loss: 0.0726
Iteration: 11
{'iteration': 11, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 8, 'num_epochs': 5, 'max_length': 256, 'learning_rate': 2e-05, 'class_weights': '[2.0, 1.0]', 'trainable_layers': '(0, 3, 4, 6, 7, 11)', 'num_trainable_layers': 6, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 682.1273100376129, 'accuracy': 0.7567427385892116, 'roc_auc': 0.747312927204607}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5: 100%|██████████| 964/964 [02:25<00:00,  6.62it/s, loss=0.777]


Epoch 1/5 - Avg Loss: 0.6200


Epoch 2/5: 100%|██████████| 964/964 [02:25<00:00,  6.62it/s, loss=0.133]


Epoch 2/5 - Avg Loss: 0.4386


Epoch 3/5: 100%|██████████| 964/964 [02:25<00:00,  6.62it/s, loss=0.382]


Epoch 3/5 - Avg Loss: 0.2717


Epoch 4/5: 100%|██████████| 964/964 [02:25<00:00,  6.61it/s, loss=0.0285]


Epoch 4/5 - Avg Loss: 0.1619


Epoch 5/5: 100%|██████████| 964/964 [02:25<00:00,  6.62it/s, loss=0.0991]


Epoch 5/5 - Avg Loss: 0.1093
Iteration: 12
{'iteration': 12, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 8, 'num_epochs': 5, 'max_length': 256, 'learning_rate': 1e-05, 'class_weights': '[1.0, 1.0]', 'trainable_layers': '(9, 10)', 'num_trainable_layers': 2, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 746.0682466030121, 'accuracy': 0.7738589211618258, 'roc_auc': 0.7663880717132774}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5: 100%|██████████| 482/482 [00:39<00:00, 12.20it/s, loss=0.488]


Epoch 1/5 - Avg Loss: 0.5737


Epoch 2/5: 100%|██████████| 482/482 [00:39<00:00, 12.16it/s, loss=0.375]


Epoch 2/5 - Avg Loss: 0.4562


Epoch 3/5: 100%|██████████| 482/482 [00:39<00:00, 12.19it/s, loss=0.425]


Epoch 3/5 - Avg Loss: 0.3477


Epoch 4/5: 100%|██████████| 482/482 [00:39<00:00, 12.17it/s, loss=0.161]


Epoch 4/5 - Avg Loss: 0.2655


Epoch 5/5: 100%|██████████| 482/482 [00:39<00:00, 12.22it/s, loss=0.226]


Epoch 5/5 - Avg Loss: 0.2146
Iteration: 13
{'iteration': 13, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 16, 'num_epochs': 5, 'max_length': 64, 'learning_rate': 1e-05, 'class_weights': '[2.0, 1.0]', 'trainable_layers': '(3, 6, 8, 11)', 'num_trainable_layers': 4, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 204.7849771976471, 'accuracy': 0.7474066390041494, 'roc_auc': 0.7358316249680531}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/7: 100%|██████████| 964/964 [02:15<00:00,  7.12it/s, loss=0.563]


Epoch 1/7 - Avg Loss: 0.5466


Epoch 2/7: 100%|██████████| 964/964 [02:15<00:00,  7.12it/s, loss=0.215]


Epoch 2/7 - Avg Loss: 0.3546


Epoch 3/7: 100%|██████████| 964/964 [02:15<00:00,  7.12it/s, loss=0.144]


Epoch 3/7 - Avg Loss: 0.1854


Epoch 4/7: 100%|██████████| 964/964 [02:15<00:00,  7.12it/s, loss=0.431]


Epoch 4/7 - Avg Loss: 0.0997


Epoch 5/7: 100%|██████████| 964/964 [02:15<00:00,  7.13it/s, loss=0.0193]


Epoch 5/7 - Avg Loss: 0.0575


Epoch 6/7: 100%|██████████| 964/964 [02:15<00:00,  7.13it/s, loss=0.0114]


Epoch 6/7 - Avg Loss: 0.0400


Epoch 7/7: 100%|██████████| 964/964 [02:15<00:00,  7.12it/s, loss=0.000454]


Epoch 7/7 - Avg Loss: 0.0239
Iteration: 14
{'iteration': 14, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 8, 'num_epochs': 7, 'max_length': 256, 'learning_rate': 2e-05, 'class_weights': '[2.0, 1.0]', 'trainable_layers': '(2, 3, 6, 7, 11)', 'num_trainable_layers': 5, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 964.9430890083313, 'accuracy': 0.7671161825726142, 'roc_auc': 0.7612423123562391}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 964/964 [01:22<00:00, 11.75it/s, loss=0.823]


Epoch 1/3 - Avg Loss: 0.6273


Epoch 2/3: 100%|██████████| 964/964 [01:21<00:00, 11.77it/s, loss=0.0695]


Epoch 2/3 - Avg Loss: 0.4641


Epoch 3/3: 100%|██████████| 964/964 [01:21<00:00, 11.78it/s, loss=0.191]


Epoch 3/3 - Avg Loss: 0.3357
Iteration: 15
{'iteration': 15, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 8, 'num_epochs': 3, 'max_length': 128, 'learning_rate': 1e-05, 'class_weights': '[1.0, 1.0]', 'trainable_layers': '(3, 5, 8)', 'num_trainable_layers': 3, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 257.1941924095154, 'accuracy': 0.7437759336099585, 'roc_auc': 0.7420138228049092}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 964/964 [02:15<00:00,  7.11it/s, loss=0.692]


Epoch 1/3 - Avg Loss: 0.6031


Epoch 2/3: 100%|██████████| 964/964 [02:15<00:00,  7.10it/s, loss=0.184]


Epoch 2/3 - Avg Loss: 0.3978


Epoch 3/3: 100%|██████████| 964/964 [02:15<00:00,  7.10it/s, loss=0.0851]


Epoch 3/3 - Avg Loss: 0.2324
Iteration: 16
{'iteration': 16, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 8, 'num_epochs': 3, 'max_length': 256, 'learning_rate': 2e-05, 'class_weights': '[1.0, 1.0]', 'trainable_layers': '(2, 3, 4, 6, 8)', 'num_trainable_layers': 5, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 424.8686864376068, 'accuracy': 0.7660788381742739, 'roc_auc': 0.7610552531552646}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/7: 100%|██████████| 241/241 [00:32<00:00,  7.39it/s, loss=0.447]


Epoch 1/7 - Avg Loss: 0.5991


Epoch 2/7: 100%|██████████| 241/241 [00:32<00:00,  7.37it/s, loss=0.659]


Epoch 2/7 - Avg Loss: 0.3793


Epoch 3/7: 100%|██████████| 241/241 [00:32<00:00,  7.33it/s, loss=0.21]


Epoch 3/7 - Avg Loss: 0.1750


Epoch 4/7: 100%|██████████| 241/241 [00:33<00:00,  7.30it/s, loss=0.0885]


Epoch 4/7 - Avg Loss: 0.0855


Epoch 5/7: 100%|██████████| 241/241 [00:32<00:00,  7.30it/s, loss=0.014]


Epoch 5/7 - Avg Loss: 0.0491


Epoch 6/7: 100%|██████████| 241/241 [00:33<00:00,  7.29it/s, loss=0.00701]


Epoch 6/7 - Avg Loss: 0.0272


Epoch 7/7: 100%|██████████| 241/241 [00:33<00:00,  7.29it/s, loss=0.00207]


Epoch 7/7 - Avg Loss: 0.0193
Iteration: 17
{'iteration': 17, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 32, 'num_epochs': 7, 'max_length': 64, 'learning_rate': 5e-05, 'class_weights': '[1.0, 1.0]', 'trainable_layers': '(1, 2, 4, 6, 11)', 'num_trainable_layers': 5, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 237.36687994003296, 'accuracy': 0.7712655601659751, 'roc_auc': 0.7634978983028727}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/7: 100%|██████████| 482/482 [00:40<00:00, 11.91it/s, loss=0.32]


Epoch 1/7 - Avg Loss: 0.5220


Epoch 2/7: 100%|██████████| 482/482 [00:40<00:00, 11.89it/s, loss=0.171]


Epoch 2/7 - Avg Loss: 0.3109


Epoch 3/7: 100%|██████████| 482/482 [00:40<00:00, 11.92it/s, loss=0.239]


Epoch 3/7 - Avg Loss: 0.1413


Epoch 4/7: 100%|██████████| 482/482 [00:40<00:00, 11.90it/s, loss=0.793]


Epoch 4/7 - Avg Loss: 0.0716


Epoch 5/7: 100%|██████████| 482/482 [00:40<00:00, 11.93it/s, loss=0.00564]


Epoch 5/7 - Avg Loss: 0.0447


Epoch 6/7: 100%|██████████| 482/482 [00:40<00:00, 11.92it/s, loss=0.0286]


Epoch 6/7 - Avg Loss: 0.0250


Epoch 7/7: 100%|██████████| 482/482 [00:40<00:00, 11.91it/s, loss=0.00196]


Epoch 7/7 - Avg Loss: 0.0149
Iteration: 18
{'iteration': 18, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 16, 'num_epochs': 7, 'max_length': 64, 'learning_rate': 5e-05, 'class_weights': '[2.0, 1.0]', 'trainable_layers': '(1, 6, 9)', 'num_trainable_layers': 3, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 290.61433935165405, 'accuracy': 0.7795643153526971, 'roc_auc': 0.7721004464407093}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5: 100%|██████████| 482/482 [00:38<00:00, 12.52it/s, loss=0.509]


Epoch 1/5 - Avg Loss: 0.6187


Epoch 2/5: 100%|██████████| 482/482 [00:38<00:00, 12.51it/s, loss=0.384]


Epoch 2/5 - Avg Loss: 0.4431


Epoch 3/5: 100%|██████████| 482/482 [00:38<00:00, 12.51it/s, loss=0.326]


Epoch 3/5 - Avg Loss: 0.2774


Epoch 4/5: 100%|██████████| 482/482 [00:38<00:00, 12.51it/s, loss=0.184]


Epoch 4/5 - Avg Loss: 0.1594


Epoch 5/5: 100%|██████████| 482/482 [00:38<00:00, 12.51it/s, loss=0.0163]


Epoch 5/5 - Avg Loss: 0.1027
Iteration: 19
{'iteration': 19, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 16, 'num_epochs': 5, 'max_length': 64, 'learning_rate': 2e-05, 'class_weights': '[1.0, 1.0]', 'trainable_layers': '(0, 3, 4, 6, 11)', 'num_trainable_layers': 5, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 199.7041256427765, 'accuracy': 0.7671161825726142, 'roc_auc': 0.7630726648867041}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 482/482 [01:04<00:00,  7.51it/s, loss=0.737]


Epoch 1/3 - Avg Loss: 0.6048


Epoch 2/3: 100%|██████████| 482/482 [01:04<00:00,  7.50it/s, loss=0.507]


Epoch 2/3 - Avg Loss: 0.4607


Epoch 3/3: 100%|██████████| 482/482 [01:04<00:00,  7.42it/s, loss=0.441]


Epoch 3/3 - Avg Loss: 0.3405
Iteration: 20
{'iteration': 20, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 16, 'num_epochs': 3, 'max_length': 128, 'learning_rate': 2e-05, 'class_weights': '[1.0, 2.0]', 'trainable_layers': '(0, 2, 3, 5, 9, 11)', 'num_trainable_layers': 6, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 204.11483716964722, 'accuracy': 0.7349585062240664, 'roc_auc': 0.7396163111273035}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/7: 100%|██████████| 482/482 [00:40<00:00, 11.87it/s, loss=0.628]


Epoch 1/7 - Avg Loss: 0.6406


Epoch 2/7: 100%|██████████| 482/482 [00:40<00:00, 11.85it/s, loss=0.73]


Epoch 2/7 - Avg Loss: 0.4910


Epoch 3/7: 100%|██████████| 482/482 [00:40<00:00, 11.91it/s, loss=0.392]


Epoch 3/7 - Avg Loss: 0.3427


Epoch 4/7: 100%|██████████| 482/482 [00:40<00:00, 11.88it/s, loss=0.151]


Epoch 4/7 - Avg Loss: 0.2272


Epoch 5/7: 100%|██████████| 482/482 [00:40<00:00, 11.89it/s, loss=0.104]


Epoch 5/7 - Avg Loss: 0.1565


Epoch 6/7: 100%|██████████| 482/482 [00:40<00:00, 11.88it/s, loss=0.193]


Epoch 6/7 - Avg Loss: 0.1123


Epoch 7/7: 100%|██████████| 482/482 [00:40<00:00, 11.90it/s, loss=0.0893]


Epoch 7/7 - Avg Loss: 0.0873
Iteration: 21
{'iteration': 21, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 16, 'num_epochs': 7, 'max_length': 64, 'learning_rate': 1e-05, 'class_weights': '[1.0, 1.0]', 'trainable_layers': '(5, 10, 11)', 'num_trainable_layers': 3, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 291.20489025115967, 'accuracy': 0.7588174273858921, 'roc_auc': 0.7535011065856803}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5: 100%|██████████| 964/964 [01:27<00:00, 11.05it/s, loss=0.151]


Epoch 1/5 - Avg Loss: 0.5792


Epoch 2/5: 100%|██████████| 964/964 [01:27<00:00, 11.05it/s, loss=0.0909]


Epoch 2/5 - Avg Loss: 0.3410


Epoch 3/5: 100%|██████████| 964/964 [01:27<00:00, 11.03it/s, loss=0.183]


Epoch 3/5 - Avg Loss: 0.1537


Epoch 4/5: 100%|██████████| 964/964 [01:27<00:00, 11.04it/s, loss=0.0348]


Epoch 4/5 - Avg Loss: 0.0742


Epoch 5/5: 100%|██████████| 964/964 [01:27<00:00, 11.04it/s, loss=0.000893]


Epoch 5/5 - Avg Loss: 0.0380
Iteration: 22
{'iteration': 22, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 8, 'num_epochs': 5, 'max_length': 128, 'learning_rate': 5e-05, 'class_weights': '[1.0, 1.0]', 'trainable_layers': '(5,)', 'num_trainable_layers': 1, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 447.29387378692627, 'accuracy': 0.7754149377593361, 'roc_auc': 0.7722135520040893}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 241/241 [01:00<00:00,  3.95it/s, loss=0.592]


Epoch 1/3 - Avg Loss: 0.5753


Epoch 2/3: 100%|██████████| 241/241 [01:01<00:00,  3.92it/s, loss=0.44]


Epoch 2/3 - Avg Loss: 0.4372


Epoch 3/3: 100%|██████████| 241/241 [01:01<00:00,  3.91it/s, loss=0.449]


Epoch 3/3 - Avg Loss: 0.3298
Iteration: 23
{'iteration': 23, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 32, 'num_epochs': 3, 'max_length': 128, 'learning_rate': 2e-05, 'class_weights': '[2.0, 1.0]', 'trainable_layers': '(7, 8, 9, 11)', 'num_trainable_layers': 4, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 194.38501405715942, 'accuracy': 0.7297717842323651, 'roc_auc': 0.7119794017368228}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 964/964 [00:50<00:00, 19.03it/s, loss=0.422]


Epoch 1/3 - Avg Loss: 0.5803


Epoch 2/3: 100%|██████████| 964/964 [00:50<00:00, 19.04it/s, loss=0.356]


Epoch 2/3 - Avg Loss: 0.3710


Epoch 3/3: 100%|██████████| 964/964 [00:50<00:00, 18.98it/s, loss=0.0478]


Epoch 3/3 - Avg Loss: 0.1948
Iteration: 24
{'iteration': 24, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 8, 'num_epochs': 3, 'max_length': 64, 'learning_rate': 2e-05, 'class_weights': '[1.0, 2.0]', 'trainable_layers': '(2, 4, 6, 10)', 'num_trainable_layers': 4, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 158.99142718315125, 'accuracy': 0.7624481327800829, 'roc_auc': 0.7621770645843643}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/7: 100%|██████████| 964/964 [02:12<00:00,  7.27it/s, loss=0.827]


Epoch 1/7 - Avg Loss: 0.5956


Epoch 2/7: 100%|██████████| 964/964 [02:12<00:00,  7.26it/s, loss=0.255]


Epoch 2/7 - Avg Loss: 0.3873


Epoch 3/7: 100%|██████████| 964/964 [02:12<00:00,  7.26it/s, loss=0.0416]


Epoch 3/7 - Avg Loss: 0.2056


Epoch 4/7: 100%|██████████| 964/964 [02:12<00:00,  7.26it/s, loss=0.0144]


Epoch 4/7 - Avg Loss: 0.1039


Epoch 5/7: 100%|██████████| 964/964 [02:12<00:00,  7.26it/s, loss=0.000773]


Epoch 5/7 - Avg Loss: 0.0604


Epoch 6/7: 100%|██████████| 964/964 [02:12<00:00,  7.26it/s, loss=0.266]


Epoch 6/7 - Avg Loss: 0.0380


Epoch 7/7: 100%|██████████| 964/964 [02:12<00:00,  7.26it/s, loss=0.0021]


Epoch 7/7 - Avg Loss: 0.0268
Iteration: 25
{'iteration': 25, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 8, 'num_epochs': 7, 'max_length': 256, 'learning_rate': 2e-05, 'class_weights': '[1.0, 2.0]', 'trainable_layers': '(0, 3, 7, 8, 9, 11)', 'num_trainable_layers': 6, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 947.0400605201721, 'accuracy': 0.7624481327800829, 'roc_auc': 0.7597007052784408}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/7: 100%|██████████| 482/482 [02:01<00:00,  3.98it/s, loss=0.561]


Epoch 1/7 - Avg Loss: 0.6077


Epoch 2/7: 100%|██████████| 482/482 [02:01<00:00,  3.95it/s, loss=0.374]


Epoch 2/7 - Avg Loss: 0.4302


Epoch 3/7: 100%|██████████| 482/482 [02:01<00:00,  3.96it/s, loss=0.113]


Epoch 3/7 - Avg Loss: 0.2537


Epoch 4/7: 100%|██████████| 482/482 [02:01<00:00,  3.96it/s, loss=0.151]


Epoch 4/7 - Avg Loss: 0.1413


Epoch 5/7: 100%|██████████| 482/482 [02:01<00:00,  3.96it/s, loss=0.0223]


Epoch 5/7 - Avg Loss: 0.0875


Epoch 6/7: 100%|██████████| 482/482 [02:01<00:00,  3.95it/s, loss=0.087]


Epoch 6/7 - Avg Loss: 0.0584


Epoch 7/7: 100%|██████████| 482/482 [02:01<00:00,  3.96it/s, loss=0.00184]


Epoch 7/7 - Avg Loss: 0.0421
Iteration: 26
{'iteration': 26, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 16, 'num_epochs': 7, 'max_length': 256, 'learning_rate': 2e-05, 'class_weights': '[1.0, 2.0]', 'trainable_layers': '(3, 4, 5, 8, 10, 11)', 'num_trainable_layers': 6, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 870.0362865924835, 'accuracy': 0.7567427385892116, 'roc_auc': 0.7546343373264671}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/7: 100%|██████████| 482/482 [01:09<00:00,  6.97it/s, loss=0.42]


Epoch 1/7 - Avg Loss: 0.6093


Epoch 2/7: 100%|██████████| 482/482 [01:09<00:00,  6.95it/s, loss=0.385]


Epoch 2/7 - Avg Loss: 0.4698


Epoch 3/7: 100%|██████████| 482/482 [01:09<00:00,  6.93it/s, loss=0.508]


Epoch 3/7 - Avg Loss: 0.3427


Epoch 4/7: 100%|██████████| 482/482 [01:09<00:00,  6.94it/s, loss=0.158]


Epoch 4/7 - Avg Loss: 0.2337


Epoch 5/7: 100%|██████████| 482/482 [01:09<00:00,  6.94it/s, loss=0.0619]


Epoch 5/7 - Avg Loss: 0.1662


Epoch 6/7: 100%|██████████| 482/482 [01:09<00:00,  6.95it/s, loss=0.206]


Epoch 6/7 - Avg Loss: 0.1254


Epoch 7/7: 100%|██████████| 482/482 [01:09<00:00,  6.94it/s, loss=0.329]


Epoch 7/7 - Avg Loss: 0.0995
Iteration: 27
{'iteration': 27, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 16, 'num_epochs': 7, 'max_length': 128, 'learning_rate': 1e-05, 'class_weights': '[1.0, 2.0]', 'trainable_layers': '(1, 4, 5)', 'num_trainable_layers': 3, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 496.5348846912384, 'accuracy': 0.7546680497925311, 'roc_auc': 0.7522145308022338}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 964/964 [00:52<00:00, 18.38it/s, loss=0.302]


Epoch 1/3 - Avg Loss: 0.5279


Epoch 2/3: 100%|██████████| 964/964 [00:52<00:00, 18.41it/s, loss=0.49]


Epoch 2/3 - Avg Loss: 0.2955


Epoch 3/3: 100%|██████████| 964/964 [00:52<00:00, 18.42it/s, loss=0.00917]


Epoch 3/3 - Avg Loss: 0.1076
Iteration: 28
{'iteration': 28, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 8, 'num_epochs': 3, 'max_length': 64, 'learning_rate': 5e-05, 'class_weights': '[2.0, 1.0]', 'trainable_layers': '(5, 10, 11)', 'num_trainable_layers': 3, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 163.79389786720276, 'accuracy': 0.7785269709543569, 'roc_auc': 0.7707290414847281}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/7: 100%|██████████| 241/241 [02:12<00:00,  1.82it/s, loss=0.423]


Epoch 1/7 - Avg Loss: 0.5796


Epoch 2/7: 100%|██████████| 241/241 [02:12<00:00,  1.82it/s, loss=0.478]


Epoch 2/7 - Avg Loss: 0.3516


Epoch 3/7: 100%|██████████| 241/241 [02:12<00:00,  1.82it/s, loss=0.277]


Epoch 3/7 - Avg Loss: 0.1658


Epoch 4/7: 100%|██████████| 241/241 [02:12<00:00,  1.82it/s, loss=0.0463]


Epoch 4/7 - Avg Loss: 0.0816


Epoch 5/7: 100%|██████████| 241/241 [02:12<00:00,  1.82it/s, loss=0.0194]


Epoch 5/7 - Avg Loss: 0.0467


Epoch 6/7: 100%|██████████| 241/241 [02:12<00:00,  1.82it/s, loss=0.00391]


Epoch 6/7 - Avg Loss: 0.0279


Epoch 7/7: 100%|██████████| 241/241 [02:12<00:00,  1.82it/s, loss=0.051]


Epoch 7/7 - Avg Loss: 0.0172
Iteration: 29
{'iteration': 29, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 32, 'num_epochs': 7, 'max_length': 256, 'learning_rate': 5e-05, 'class_weights': '[1.0, 2.0]', 'trainable_layers': '(3, 6)', 'num_trainable_layers': 2, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 944.2103445529938, 'accuracy': 0.774896265560166, 'roc_auc': 0.7693744936079043}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 964/964 [02:25<00:00,  6.60it/s, loss=0.511]


Epoch 1/3 - Avg Loss: 0.5553


Epoch 2/3: 100%|██████████| 964/964 [02:25<00:00,  6.60it/s, loss=0.807]


Epoch 2/3 - Avg Loss: 0.4075


Epoch 3/3: 100%|██████████| 964/964 [02:25<00:00,  6.61it/s, loss=0.329]


Epoch 3/3 - Avg Loss: 0.2940
Iteration: 30
{'iteration': 30, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 8, 'num_epochs': 3, 'max_length': 256, 'learning_rate': 1e-05, 'class_weights': '[2.0, 1.0]', 'trainable_layers': '(3, 7)', 'num_trainable_layers': 2, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 455.6333649158478, 'accuracy': 0.7551867219917012, 'roc_auc': 0.7409491079342465}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5: 100%|██████████| 241/241 [01:05<00:00,  3.67it/s, loss=0.685]


Epoch 1/5 - Avg Loss: 0.6272


Epoch 2/5: 100%|██████████| 241/241 [01:05<00:00,  3.67it/s, loss=0.696]


Epoch 2/5 - Avg Loss: 0.5050


Epoch 3/5: 100%|██████████| 241/241 [01:05<00:00,  3.67it/s, loss=0.416]


Epoch 3/5 - Avg Loss: 0.3967


Epoch 4/5: 100%|██████████| 241/241 [01:05<00:00,  3.67it/s, loss=0.309]


Epoch 4/5 - Avg Loss: 0.3100


Epoch 5/5: 100%|██████████| 241/241 [01:05<00:00,  3.66it/s, loss=0.154]


Epoch 5/5 - Avg Loss: 0.2603
Iteration: 31
{'iteration': 31, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 32, 'num_epochs': 5, 'max_length': 128, 'learning_rate': 1e-05, 'class_weights': '[1.0, 2.0]', 'trainable_layers': '(7,)', 'num_trainable_layers': 1, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 338.69591999053955, 'accuracy': 0.7406639004149378, 'roc_auc': 0.7452210180588258}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 241/241 [00:31<00:00,  7.61it/s, loss=0.646]


Epoch 1/3 - Avg Loss: 0.6443


Epoch 2/3: 100%|██████████| 241/241 [00:32<00:00,  7.53it/s, loss=0.404]


Epoch 2/3 - Avg Loss: 0.4980


Epoch 3/3: 100%|██████████| 241/241 [00:31<00:00,  7.58it/s, loss=0.256]


Epoch 3/3 - Avg Loss: 0.3852
Iteration: 32
{'iteration': 32, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 32, 'num_epochs': 3, 'max_length': 64, 'learning_rate': 2e-05, 'class_weights': '[1.0, 1.0]', 'trainable_layers': '(1, 5, 6, 7, 8, 10)', 'num_trainable_layers': 6, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 102.50586271286011, 'accuracy': 0.729253112033195, 'roc_auc': 0.7253981805230045}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 482/482 [00:38<00:00, 12.51it/s, loss=0.692]


Epoch 1/3 - Avg Loss: 0.5508


Epoch 2/3: 100%|██████████| 482/482 [00:38<00:00, 12.51it/s, loss=0.375]


Epoch 2/3 - Avg Loss: 0.3988


Epoch 3/3: 100%|██████████| 482/482 [00:38<00:00, 12.51it/s, loss=0.225]


Epoch 3/3 - Avg Loss: 0.2686
Iteration: 33
{'iteration': 33, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 16, 'num_epochs': 3, 'max_length': 64, 'learning_rate': 2e-05, 'class_weights': '[2.0, 1.0]', 'trainable_layers': '(1, 3, 4, 10, 11)', 'num_trainable_layers': 5, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 123.27388525009155, 'accuracy': 0.7598547717842323, 'roc_auc': 0.7462590878688845}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 482/482 [01:06<00:00,  7.24it/s, loss=0.512]


Epoch 1/3 - Avg Loss: 0.5815


Epoch 2/3: 100%|██████████| 482/482 [01:06<00:00,  7.23it/s, loss=0.627]


Epoch 2/3 - Avg Loss: 0.4753


Epoch 3/3: 100%|██████████| 482/482 [01:06<00:00,  7.21it/s, loss=0.288]


Epoch 3/3 - Avg Loss: 0.4082
Iteration: 34
{'iteration': 34, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 16, 'num_epochs': 3, 'max_length': 128, 'learning_rate': 1e-05, 'class_weights': '[2.0, 1.0]', 'trainable_layers': '(1, 3, 4, 6, 9)', 'num_trainable_layers': 5, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 210.56858825683594, 'accuracy': 0.7074688796680498, 'roc_auc': 0.6840015443259615}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 964/964 [02:22<00:00,  6.78it/s, loss=0.52]


Epoch 1/3 - Avg Loss: 0.5658


Epoch 2/3: 100%|██████████| 964/964 [02:22<00:00,  6.78it/s, loss=0.105]


Epoch 2/3 - Avg Loss: 0.4328


Epoch 3/3: 100%|██████████| 964/964 [02:22<00:00,  6.78it/s, loss=0.355]


Epoch 3/3 - Avg Loss: 0.3396
Iteration: 35
{'iteration': 35, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 8, 'num_epochs': 3, 'max_length': 256, 'learning_rate': 1e-05, 'class_weights': '[2.0, 1.0]', 'trainable_layers': '(0, 2, 9)', 'num_trainable_layers': 3, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 444.59826922416687, 'accuracy': 0.7432572614107884, 'roc_auc': 0.7279773136341143}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/3: 100%|██████████| 241/241 [00:34<00:00,  7.05it/s, loss=0.586]


Epoch 1/3 - Avg Loss: 0.6557


Epoch 2/3: 100%|██████████| 241/241 [00:34<00:00,  7.02it/s, loss=0.586]


Epoch 2/3 - Avg Loss: 0.5474


Epoch 3/3: 100%|██████████| 241/241 [00:34<00:00,  7.06it/s, loss=0.562]


Epoch 3/3 - Avg Loss: 0.4749
Iteration: 36
{'iteration': 36, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 32, 'num_epochs': 3, 'max_length': 64, 'learning_rate': 1e-05, 'class_weights': '[1.0, 1.0]', 'trainable_layers': '(2, 5, 10)', 'num_trainable_layers': 3, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 109.3762526512146, 'accuracy': 0.7261410788381742, 'roc_auc': 0.7195612809205052}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5: 100%|██████████| 482/482 [01:09<00:00,  6.96it/s, loss=0.712]


Epoch 1/5 - Avg Loss: 0.5891


Epoch 2/5: 100%|██████████| 482/482 [01:09<00:00,  6.95it/s, loss=0.065]


Epoch 2/5 - Avg Loss: 0.3559


Epoch 3/5: 100%|██████████| 482/482 [01:09<00:00,  6.94it/s, loss=0.378]


Epoch 3/5 - Avg Loss: 0.1647


Epoch 4/5: 100%|██████████| 482/482 [01:09<00:00,  6.94it/s, loss=0.0446]


Epoch 4/5 - Avg Loss: 0.0786


Epoch 5/5: 100%|██████████| 482/482 [01:09<00:00,  6.95it/s, loss=0.0133]


Epoch 5/5 - Avg Loss: 0.0390
Iteration: 37
{'iteration': 37, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 16, 'num_epochs': 5, 'max_length': 128, 'learning_rate': 5e-05, 'class_weights': '[1.0, 1.0]', 'trainable_layers': '(0, 1, 4)', 'num_trainable_layers': 3, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 357.22591829299927, 'accuracy': 0.7712655601659751, 'roc_auc': 0.7674816067515321}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5: 100%|██████████| 482/482 [02:15<00:00,  3.56it/s, loss=0.38]


Epoch 1/5 - Avg Loss: 0.6010


Epoch 2/5: 100%|██████████| 482/482 [02:15<00:00,  3.55it/s, loss=0.134]


Epoch 2/5 - Avg Loss: 0.3911


Epoch 3/5: 100%|██████████| 482/482 [02:15<00:00,  3.55it/s, loss=0.389]


Epoch 3/5 - Avg Loss: 0.2016


Epoch 4/5: 100%|██████████| 482/482 [02:15<00:00,  3.55it/s, loss=0.11]


Epoch 4/5 - Avg Loss: 0.1003


Epoch 5/5: 100%|██████████| 482/482 [02:15<00:00,  3.55it/s, loss=0.0131]


Epoch 5/5 - Avg Loss: 0.0582
Iteration: 38
{'iteration': 38, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 16, 'num_epochs': 5, 'max_length': 256, 'learning_rate': 2e-05, 'class_weights': '[1.0, 1.0]', 'trainable_layers': '(2,)', 'num_trainable_layers': 1, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 696.1137125492096, 'accuracy': 0.770746887966805, 'roc_auc': 0.7645348805594375}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5: 100%|██████████| 241/241 [01:04<00:00,  3.75it/s, loss=0.583]


Epoch 1/5 - Avg Loss: 0.6245


Epoch 2/5: 100%|██████████| 241/241 [01:04<00:00,  3.76it/s, loss=0.583]


Epoch 2/5 - Avg Loss: 0.4503


Epoch 3/5: 100%|██████████| 241/241 [01:04<00:00,  3.75it/s, loss=0.156]


Epoch 3/5 - Avg Loss: 0.2815


Epoch 4/5: 100%|██████████| 241/241 [01:04<00:00,  3.75it/s, loss=0.11]


Epoch 4/5 - Avg Loss: 0.1707


Epoch 5/5: 100%|██████████| 241/241 [01:04<00:00,  3.75it/s, loss=0.157]


Epoch 5/5 - Avg Loss: 0.1162
Iteration: 39
{'iteration': 39, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 32, 'num_epochs': 5, 'max_length': 128, 'learning_rate': 2e-05, 'class_weights': '[1.0, 1.0]', 'trainable_layers': '(2, 4)', 'num_trainable_layers': 2, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 331.45816826820374, 'accuracy': 0.7660788381742739, 'roc_auc': 0.7606245819716256}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5: 100%|██████████| 241/241 [00:33<00:00,  7.17it/s, loss=0.357]


Epoch 1/5 - Avg Loss: 0.5342


Epoch 2/5: 100%|██████████| 241/241 [00:33<00:00,  7.15it/s, loss=0.127]


Epoch 2/5 - Avg Loss: 0.3267


Epoch 3/5: 100%|██████████| 241/241 [00:33<00:00,  7.15it/s, loss=0.0325]


Epoch 3/5 - Avg Loss: 0.1439


Epoch 4/5: 100%|██████████| 241/241 [00:33<00:00,  7.15it/s, loss=0.0261]


Epoch 4/5 - Avg Loss: 0.0678


Epoch 5/5: 100%|██████████| 241/241 [00:33<00:00,  7.15it/s, loss=0.0323]


Epoch 5/5 - Avg Loss: 0.0370
Iteration: 40
{'iteration': 40, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 32, 'num_epochs': 5, 'max_length': 64, 'learning_rate': 5e-05, 'class_weights': '[2.0, 1.0]', 'trainable_layers': '(1, 4, 7, 11)', 'num_trainable_layers': 4, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 175.31312894821167, 'accuracy': 0.7645228215767634, 'roc_auc': 0.7545837660889945}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5: 100%|██████████| 482/482 [00:38<00:00, 12.51it/s, loss=0.276]


Epoch 1/5 - Avg Loss: 0.5315


Epoch 2/5: 100%|██████████| 482/482 [00:38<00:00, 12.51it/s, loss=0.195]


Epoch 2/5 - Avg Loss: 0.3196


Epoch 3/5: 100%|██████████| 482/482 [00:38<00:00, 12.51it/s, loss=0.0221]


Epoch 3/5 - Avg Loss: 0.1392


Epoch 4/5: 100%|██████████| 482/482 [00:38<00:00, 12.51it/s, loss=0.0578]


Epoch 4/5 - Avg Loss: 0.0672


Epoch 5/5: 100%|██████████| 482/482 [00:38<00:00, 12.51it/s, loss=0.0011]


Epoch 5/5 - Avg Loss: 0.0330
Iteration: 41
{'iteration': 41, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 16, 'num_epochs': 5, 'max_length': 64, 'learning_rate': 5e-05, 'class_weights': '[2.0, 1.0]', 'trainable_layers': '(0, 2, 6, 7, 10)', 'num_trainable_layers': 5, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 199.93034720420837, 'accuracy': 0.7702282157676349, 'roc_auc': 0.76266483232644}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/7: 100%|██████████| 241/241 [02:09<00:00,  1.86it/s, loss=0.5]


Epoch 1/7 - Avg Loss: 0.5615


Epoch 2/7: 100%|██████████| 241/241 [02:09<00:00,  1.85it/s, loss=0.366]


Epoch 2/7 - Avg Loss: 0.4186


Epoch 3/7: 100%|██████████| 241/241 [02:09<00:00,  1.85it/s, loss=0.164]


Epoch 3/7 - Avg Loss: 0.2719


Epoch 4/7: 100%|██████████| 241/241 [02:10<00:00,  1.85it/s, loss=0.27]


Epoch 4/7 - Avg Loss: 0.1547


Epoch 5/7: 100%|██████████| 241/241 [02:09<00:00,  1.85it/s, loss=0.0483]


Epoch 5/7 - Avg Loss: 0.0992


Epoch 6/7: 100%|██████████| 241/241 [02:09<00:00,  1.85it/s, loss=0.0871]


Epoch 6/7 - Avg Loss: 0.0662


Epoch 7/7: 100%|██████████| 241/241 [02:09<00:00,  1.85it/s, loss=0.0113]


Epoch 7/7 - Avg Loss: 0.0445
Iteration: 42
{'iteration': 42, 'dataset_path': '/content/drive/MyDrive/arabic_fake_news/FASSILA/cleaned_data.csv', 'dataset_size': 9636, 'batch_size': 32, 'num_epochs': 7, 'max_length': 256, 'learning_rate': 2e-05, 'class_weights': '[2.0, 1.0]', 'trainable_layers': '(7, 9, 11)', 'num_trainable_layers': 3, 'device': 'cuda', 'cuda_available': True, 'torch_version': '2.5.1+cu124', 'transformers_version': '4.48.3', 'execution_time': 927.8011448383331, 'accuracy': 0.766597510373444, 'roc_auc': 0.7570035726132279}
--------------------------------------------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/7:  84%|████████▍ | 203/241 [01:53<00:21,  1.78it/s, loss=0.463]