In [None]:
{
  "developer": "Swapnendu Banik",
  "version": "1.0.0",
  "projectDescription": """This project fine-tunes a DistilBERT model on the Amazon Customer Review dataset for sentiment analysis, 
  classifying reviews into categories like very positive, positive, neutral, negative, and very negative. 
  The goal is to create an efficient sentiment analysis tool for Amazon product reviews."""
}

In [1]:
## Script for inferencing the model

import torch
from transformers import  AutoTokenizer,AutoModelForSequenceClassification
import pickle
import numpy as np

In [2]:
# Load the model and tokenizer
model_path = "artifacts\model\distilbert_amazon_review_model"
encoder_path ="artifacts\label_encoder.pkl"


In [3]:
## Functionize the whole thing

def load_model_and_binarizer(model_path, encoder_path):
  tokenizer = AutoTokenizer.from_pretrained(model_path)
  model = AutoModelForSequenceClassification.from_pretrained(model_path)

  ## Load the label binarizer
  with open(encoder_path, "rb") as f:
    label = pickle.load(f)

  return model, tokenizer, label

In [4]:
def run_inference(text):
  model, tokenizer, label = load_model_and_binarizer(model_path, encoder_path)
  encoding = tokenizer(text,
                              truncation=True,
                              padding="max_length",
                              max_length= 512,
                              return_tensors="pt")
  output = model(**encoding)
  sigmoid = torch.nn.Sigmoid()
  probs = sigmoid(torch.Tensor(output.logits[0].cpu()))
  preds = np.zeros(probs.shape)
  preds[np.where(probs >= 0.3)] = 1

  preds = preds.reshape(1, -1) ## Add extra dim for inverse_transform to work; it was encoded on a 2d array

  

    # Mapping Numerical Values to Test to make sense
  sentiment_map = {
        1: "Very Negative",
        2: "Negative",
        3: "Neutral",
        4: "Positive",
        5: "Very Positive"
    }
  

  pred_val= label.inverse_transform(preds)

  # Convert numerical sentiment value to label
  sentiment_label = sentiment_map.get(pred_val[0], "Unknown Sentiment")
  return sentiment_label


In [10]:
## Run Inference

run_inference("Not Sure how i feel about it, not  bad but not neutral either")

'Neutral'

In [11]:
run_inference("Amazing Product")

'Very Positive'

In [12]:
run_inference("Ok product, would recommend to friends!")

'Positive'

In [13]:
run_inference("Fine but not impressed")

'Neutral'

In [14]:
run_inference("Pathetic")

'Very Negative'