In [1]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["TORCH_USE_CUDA_DSA"] = "1"

In [2]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks import (
    RichProgressBar,
    ModelCheckpoint,
    EarlyStopping,
)

from models.models import LitXLMRobertaModel
from models.data import ArabicAbstractsDataModule, AraSumDataModule, DataModuleFromDataModules

In [3]:
arasum_data_module = AraSumDataModule()
abstracts_data_module = ArabicAbstractsDataModule(balance_ai_with_human=True)

In [4]:
data_module = DataModuleFromDataModules([arasum_data_module, abstracts_data_module])

In [5]:
data_module.setup()
len(data_module.train_dataset),len(data_module.val_dataset),len(data_module.test_dataset)

(8394, 1799, 1799)

In [6]:
model = LitXLMRobertaModel()

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
# # Add EarlyStopping and ModelCheckpoint callbacks
early_stopping_callback = EarlyStopping(
    monitor="val_loss",  # Monitor validation loss
    min_delta=0.0,  # Minimum change to qualify as improvement
    patience=3,  # Stop training after this epochs without improvement
    verbose=True,  # Print information at each validation step
    mode="min",  # Mode to minimize the monitored metric
)

In [8]:
checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",  # Monitor validation loss
    dirpath="outputs/Arabic/AraSumAbstractsDetector/checkpoints",  # Directory to save checkpoints
    filename="best-checkpoint",  # Filename for the best checkpoint
    save_top_k=1,  # Save only the best checkpoint
    mode="min",  # Mode to minimize the monitored metric
)

In [9]:
# # Initialize a trainer with callbacks
trainer = pl.Trainer(
    devices=1,
    max_epochs=100,
    accelerator="auto",
    val_check_interval=0.25,
    check_val_every_n_epoch=1,
    callbacks=[early_stopping_callback, checkpoint_callback],
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/majed_alshaibani/Projects/ai-content-detection-dataset/venv/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default


In [10]:
# # Train the model
trainer.fit(model, datamodule=data_module)

You are using a CUDA device ('NVIDIA A100-SXM4-80GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name            | Type                                | Params | Mode 
---------------------------------------------------------------------------------
0  | val_accuracy    | BinaryAccuracy                      | 0      | train
1  | test_accuracy   | BinaryAccuracy                      | 0      | train
2  | train_accuracy  | BinaryAccuracy                      | 0      | train
3  | xlm_roberta     | XLMRobertaForSequenceClassification | 278 M  | eval 
4  | fc              | Linear                              | 3      | train
5  | activation      | Sigmoid                           

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/majed_alshaibani/Projects/ai-content-detection-dataset/venv/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=254` in the `DataLoader` to improve performance.
/home/majed_alshaibani/Projects/ai-content-detection-dataset/venv/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=254` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.308


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.116 >= min_delta = 0.0. New best score: 0.192


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.047 >= min_delta = 0.0. New best score: 0.145


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.046 >= min_delta = 0.0. New best score: 0.099


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.032 >= min_delta = 0.0. New best score: 0.066


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.024 >= min_delta = 0.0. New best score: 0.042


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.042. Signaling Trainer to stop.


In [11]:
model = model.__class__.load_from_checkpoint("outputs/Arabic/AraSumAbstractsDetector/checkpoints/best-checkpoint.ckpt")

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
trainer.test(model,datamodule=data_module)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/majed_alshaibani/Projects/ai-content-detection-dataset/venv/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=254` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.0449126698076725,
  'test_acc': 0.9927737712860107,
  'test_precision': 0.9962600469589233,
  'test_recall': 0.9889553785324097,
  'test_f1': 0.9922348856925964}]