## Step 1: Import Libraries

In [6]:
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification

## Step 2: Load Pretrained Tokenizer and Model

In [3]:
# Load pretrained tokenizer and model (fine-tuned for sentiment)
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")

# Move model to CPU or GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)




vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
 

## Step 3: Define Inference Function

In [7]:
def predict_sentiment(text, model, tokenizer, max_length=64, device='cpu'):
    """
    Predict sentiment of input text using DistilBERT model.

    Args:
        text (str): input sentence
        model: Huggingface model
        tokenizer: Huggingface tokenizer
        max_length (int): token limit
        device (str): 'cpu' or 'cuda'

    Returns:
        dict: {label: ..., confidence: ...}
    """
    model.eval()
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=max_length).to(device)

    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        probs = torch.softmax(logits, dim=1)
        pred_class = torch.argmax(probs, dim=1).item()
        confidence = torch.max(probs).item()

    label_map = {0: "Negative", 1: "Positive"}
    return {
        "label": label_map[pred_class],
        "confidence": round(confidence, 4)
    }


## Step 4: Try Inference

In [8]:
# Sample texts to test
texts = [
    "I absolutely loved this movie. It was fantastic!",
    "The food was terrible and the service was slow.",
    "It's just okay, not good but not bad either."
]

# Run predictions
for text in texts:
    result = predict_sentiment(text, model, tokenizer, device=device)
    print(f"Text: {text}\nPrediction: {result}\n")


Text: I absolutely loved this movie. It was fantastic!
Prediction: {'label': 'Positive', 'confidence': 0.9999}

Text: The food was terrible and the service was slow.
Prediction: {'label': 'Negative', 'confidence': 0.9998}

Text: It's just okay, not good but not bad either.
Prediction: {'label': 'Positive', 'confidence': 0.9954}

