In [1]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["TORCH_USE_CUDA_DSA"] = "1"

In [2]:
import logging

loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict]
for logger in loggers:
    if "transformers" in logger.name.lower():
        logger.setLevel(logging.ERROR)

In [3]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks import (
    RichProgressBar,
    ModelCheckpoint,
    EarlyStopping,
)

from models.models import LitXLMRobertaModel
from models.data import AraSumDataModule

In [4]:
data_module = AraSumDataModule(
    generated_by=[
        "allam",
    ]
)

In [5]:
data_module.setup()
len(data_module.train_dataset), len(data_module.val_dataset), len(data_module.test_dataset)

(4200, 900, 900)

In [6]:
model = LitXLMRobertaModel()

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
# # Add EarlyStopping and ModelCheckpoint callbacks
early_stopping_callback = EarlyStopping(
    monitor="val_loss",  # Monitor validation loss
    min_delta=0.0,  # Minimum change to qualify as improvement
    patience=3,  # Stop training after this epochs without improvement
    verbose=True,  # Print information at each validation step
    mode="min",  # Mode to minimize the monitored metric
)

In [10]:
checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",  # Monitor validation loss
    dirpath="trained_detectors/Arabic/AraSum/AllamAIDetector/checkpoints",  # Directory to save checkpoints
    filename="best-checkpoint",  # Filename for the best checkpoint
    save_top_k=1,  # Save only the best checkpoint
    mode="min",  # Mode to minimize the monitored metric
)

In [11]:
# # Initialize a trainer with callbacks
trainer = pl.Trainer(
    devices=1,
    max_epochs=100,
    accelerator="auto",
    val_check_interval=0.25,
    check_val_every_n_epoch=1,
    callbacks=[RichProgressBar(), early_stopping_callback, checkpoint_callback],
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [12]:
# # Train the model
trainer.fit(model, datamodule=data_module)

You are using a CUDA device ('NVIDIA A100-SXM4-80GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

Metric val_loss improved. New best score: 0.392


Metric val_loss improved by 0.087 >= min_delta = 0.0. New best score: 0.305


Metric val_loss improved by 0.098 >= min_delta = 0.0. New best score: 0.207


Metric val_loss improved by 0.065 >= min_delta = 0.0. New best score: 0.142


Metric val_loss improved by 0.072 >= min_delta = 0.0. New best score: 0.070


Metric val_loss improved by 0.016 >= min_delta = 0.0. New best score: 0.054


Metric val_loss improved by 0.007 >= min_delta = 0.0. New best score: 0.046


Monitored metric val_loss did not improve in the last 3 records. Best score: 0.046. Signaling Trainer to stop.


In [15]:
model = model.__class__.load_from_checkpoint(
    "trained_detectors/Arabic/AraSum/AllamAIDetector/checkpoints/best-checkpoint.ckpt"
)

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [16]:
trainer.test(
    model,
    datamodule=data_module,
    # ckpt_path=checkpoint_callback.best_model_path,
    # ckpt_path="trained_detectors/Arabic/AraSum/AllamAIDetector/checkpoints/best-checkpoint.ckpt",
)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

[{'test_loss': 0.054507941007614136,
  'test_acc': 0.9900000095367432,
  'test_precision': 0.991575300693512,
  'test_recall': 0.9890356063842773,
  'test_f1': 0.9899824857711792}]