# About

Examples on how to use the LuminarSequenceDetector on the basis of the LuminarSequenceClassifier.

In [41]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [42]:
import torch
import gc

from IPython.display import display, HTML
from luminar.detector import LuminarSequenceDetector
from luminar.utils.cuda import get_best_device
from luminar.sequence_classifier import LuminarSequence
from luminar.utils import LuminarSequenceTrainingConfig, ConvolutionalLayerSpec
from luminar.utils.visualization import visualize_detection

torch.cuda.empty_cache()
gc.collect()
if torch.cuda.is_available():
    with torch.cuda.device(torch.cuda.current_device()):
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()

In [43]:
class Config:
    MODEL_PATH = "/storage/projects/boenisch/PrismAI/models/luminar_sequence/PrismAI_v2-encoded-gpt2/e1s2k2du"

In [44]:
# tiiuae/falcon-7b
detector = LuminarSequenceDetector(model_path=Config.MODEL_PATH, feature_agent="gpt2", device=get_best_device())

Loading LuminarSequenceDetector from /storage/projects/boenisch/PrismAI/models/luminar_sequence/PrismAI_v2-encoded-gpt2/e1s2k2du to device cuda:3
LuminarSequenceTrainingConfig(feature_len=512, num_intermediate_likelihoods=13, apply_delta_augmentation=False, apply_product_augmentation=True, conv_layer_shapes=[[64, 5, 1], [128, 5, 1], [64, 3, 1]], projection_dim=64, lstm_hidden_dim=128, lstm_layers=1, stack_spans=4, hf_dataset='TheItCrOw/PrismAI_v2-encoded-gpt2', dataset_root_path='/storage/projects/stoeckel/prismai/encoded/fulltext/', models_root_path='/storage/projects/boenisch/PrismAI/models/luminar_sequence/', domain=None, agent='gpt_4o_mini_gemma2_9b', feature_agent='gpt2', max_epochs=100, batch_size=128, early_stopping_patience=8, rescale_features=False, kfold=3, learning_rate=0.004, seed=42)
Loaded.


[nltk_data] Downloading package punkt to
[nltk_data]     /home/staff_homes/kboenisc/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [45]:
document = """
What a moment to remember! Zlatan Ibrahimovic's overhead volley against England was nothing short of spectacular. Did you see that? The way he connected with the ball was pure magic! It’s not every day we witness such brilliance on the pitch. What are your thoughts on this jaw-dropping goal?
"""

In [46]:
print("Document length:", len(document))

result = detector.detect(document)
print(result)

You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Document length: 294
tensor([[[6.3622e-06, 7.7874e-08, 5.4592e-33, 0.0000e+00, 0.0000e+00,
          0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
          0.0000e+00, 0.0000e+00, 1.0516e-02],
         [3.2084e-05, 2.6067e-08, 1.8492e-08, 7.5986e-08, 1.3105e-07,
          1.1779e-09, 1.5003e-12, 1.5431e-15, 1.7344e-17, 1.8784e-19,
          8.3984e-30, 0.0000e+00, 1.5076e-03],
         [7.7152e-06, 9.8323e-05, 4.0813e-05, 1.8469e-05, 1.5947e-05,
          7.0180e-06, 8.3697e-05, 1.3514e-05, 1.4912e-09, 1.1850e-10,
          8.3702e-13, 1.8794e-21, 3.3894e-02],
         [4.8900e-05, 2.5890e-13, 2.0744e-09, 1.3011e-08, 3.6538e-08,
          7.7174e-10, 2.2685e-09, 1.6650e-11, 7.7822e-12, 1.1519e-09,
          2.3775e-13, 1.7161e-40, 7.4561e-02],
         [7.2630e-06, 4.6461e-10, 7.2275e-08, 1.2999e-09, 4.2903e-10,
          1.4010e-11, 2.1255e-11, 8.6406e-12, 4.5402e-12, 1.2517e-10,
          1.8555e-16, 1.6911e-35, 1.4446e-01],
         [1.6122e-05, 1.6164e-08, 2.3352e-08

In [47]:
html_output = visualize_detection(document, result)
HTML(html_output)