# Notebook for running predictions on text data

In [1]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
import torch

In [39]:
# Load the tokenizer and model
model_name = model_checkpoint = "distilbert-base-uncased-for-product-extraction/checkpoint-7050"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)



In [65]:
label_list = ['O', 'B-PRODUCT', 'I-PRODUCT']


example = """Orland single desk is finished in solid oak with oak veneers and features striking heavy round brass knobs. Whilst this Orland single desk matches other pieces in the Orland range, it also suits a wide range of decors. This beautifully crafted piece of furniture is delivered fully assembled to your home, for your convenience."""

In [66]:
inputs = tokenizer(example, return_tensors="pt", max_length=512, truncation=True)

# Get model predictions
with torch.no_grad():
    outputs = model(**inputs)

# Get the predicted token class indices
predictions = torch.argmax(outputs.logits, dim=2)

# Convert token IDs to words
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])

# Map predictions to labels
predicted_labels = [label_list[pred] for pred in predictions[0].numpy()]

In [67]:
# Print tokens with predicted labels
for token, label in zip(tokens, predicted_labels):
    print(f"{token:10} -> {label}")

[CLS]      -> O
or         -> O
##land     -> O
single     -> O
desk       -> O
is         -> O
finished   -> O
in         -> O
solid      -> O
oak        -> O
with       -> O
oak        -> O
ve         -> O
##neer     -> O
##s        -> O
and        -> O
features   -> O
striking   -> O
heavy      -> O
round      -> O
brass      -> O
knob       -> O
##s        -> O
.          -> O
whilst     -> O
this       -> O
or         -> B-PRODUCT
##land     -> I-PRODUCT
single     -> I-PRODUCT
desk       -> I-PRODUCT
matches    -> I-PRODUCT
other      -> I-PRODUCT
pieces     -> I-PRODUCT
in         -> I-PRODUCT
the        -> I-PRODUCT
or         -> I-PRODUCT
##land     -> I-PRODUCT
range      -> I-PRODUCT
,          -> O
it         -> O
also       -> O
suits      -> O
a          -> O
wide       -> O
range      -> O
of         -> O
decor      -> O
##s        -> O
.          -> O
this       -> O
beautifully -> O
crafted    -> O
piece      -> O
of         -> O
furniture  -> O
is         -> O
deliver