# Notebook for running predictions on text data

In [1]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
import torch

In [6]:
# Load the tokenizer and model
model_name = model_checkpoint = "distilbert-base-uncased-for-product-extraction/checkpoint-642"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

In [7]:
label_list = ['O', 'B-PRODUCT', 'I-PRODUCT']


example = """BeddingBeddingQuilts & Coverlets
Cozysoft Organic Cotton Jersey Heathered Ivory King QuiltSave to FavoritesCozysoft Organic Cotton Jersey Heathered Ivory King Quilt
$349.95
31 ReviewsSKU: 410550

Step1.
Color
Heathered Ivory
3 options

Heathered Ivory
Heathered Ivory
Grey
Grey
Midnight Navy
Midnight Navy

Step2.
Size
King
2 options

Full/Queen
King
Availability of Cozysoft Organic Cotton Jersey Heathered Ivory King Quilt
Ship It
In stock and ready to ship 
ZIP Code: 60540ZIP code. Update.
Change zip
Ships free
FREE & FAST STORE PICKUP
Select Store
Cozysoft Organic Cotton Jersey Heathered Ivory King Quilt
QuantityDecrease
1
Increase
$349.95
Add to Cart
Add To RegistrySave to FavoritesCozysoft Organic Cotton Jersey Heathered Ivory King Quilt
On DisplayOn Display in My Store?
Crate and Barrel credit cards
Learn how to earn $35 in Reward Dollars.*


Details
Traditional kantha quilting gets a modern casual-yet-chic update in heathered ivory jersey. Softer than your favorite sleeping tee, the organic cotton fabric is stitched by hand into narrow channels for a dotted effect and a supersoft ribbed feel. The perpendicularly sewn border finishes our quilt with subtle linear contrast. For an ultra-inviting bed, add the matching pillow shams and coordinating sheet set and duvet cover.

Cozysoft Organic Cotton Jersey Heathered Ivory King Quilt 96"W x108"D
Hand-embroidered
100% organic cotton
100% organic cotton fill
All-natural AZO-free dyes
No added phthalates
Binding tailoring
Machine wash cold, tumble dry low; use only non-chlorine bleach as needed
Do not dry clean or iron
Made in India"""

In [8]:
inputs = tokenizer(example, return_tensors="pt", max_length=512, truncation=True)

# Get model predictions
with torch.no_grad():
    outputs = model(**inputs)

# Get the predicted token class indices
predictions = torch.argmax(outputs.logits, dim=2)

# Convert token IDs to words
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])

# Map predictions to labels
predicted_labels = [label_list[pred] for pred in predictions[0].numpy()]

In [9]:
# Print tokens with predicted labels
for token, label in zip(tokens, predicted_labels):
    print(f"{token:10} -> {label}")

[CLS]      -> O
bed        -> O
##ding     -> O
##bed      -> O
##ding     -> O
##quil     -> O
##ts       -> O
&          -> O
cover      -> O
##lets     -> O
cozy       -> O
##so       -> O
##ft       -> O
organic    -> O
cotton     -> O
jersey     -> O
heather    -> I-PRODUCT
##ed       -> I-PRODUCT
ivory      -> O
king       -> O
quilt      -> O
##sa       -> O
##ve       -> O
to         -> O
favorites  -> O
##co       -> O
##zy       -> O
##so       -> O
##ft       -> O
organic    -> O
cotton     -> O
jersey     -> O
heather    -> I-PRODUCT
##ed       -> I-PRODUCT
ivory      -> I-PRODUCT
king       -> I-PRODUCT
quilt      -> I-PRODUCT
$          -> O
34         -> O
##9        -> O
.          -> O
95         -> O
31         -> O
reviews    -> O
##sk       -> O
##u        -> O
:          -> O
410        -> O
##55       -> O
##0        -> O
step       -> O
##1        -> O
.          -> O
color      -> O
heather    -> O
##ed       -> O
ivory      -> O
3          -> O
options    -> O
