# BERT Product Rating Predictor


## 1. Import Statements

---



In [None]:
%%capture
!pip install transformers

In [None]:
import torch
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, BertModel, BertForSequenceClassification

In [None]:
# Set up the GPU.
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

## 2. Load the Data

---


Here, we get the data from the GitHub repository for this project.

In [None]:
github_url = 'https://raw.githubusercontent.com/csbanon/bert-product-rating-predictor/master/data/reviews/latest/reviews_comments_stars.csv'
df = pd.read_csv(github_url)
df = df[['comment', 'stars']]
df

Unnamed: 0,comment,stars
0,I could sit here and write all about the specs...,5
1,A very reasonably priced laptop for basic comp...,4
2,"This is the best laptop deal you can get, full...",5
3,A few months after the purchase....It is still...,5
4,BUYER BE AWARE: This computer has Microsoft 10...,1
...,...,...
195760,I have not tried this camera without the SD ca...,5
195761,"Hello, I bought this item months ago and I tho...",1
195762,This is an incredible camera for the money!! ...,5
195763,Great cameras. Purchased some for my mother af...,5


In [None]:
# Set up the training and test sets.
train_dataset, test_dataset = train_test_split(df, test_size=0.2, random_state=1)
test_dataset = test_dataset.reset_index(drop=True)

## 3. Define the BERT Model

---



The following code defines the BERT model to be used for star rating predictions.

In [None]:
%%capture
model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels = len(df['stars'].unique()), # Number of unique labels for our multi-class classification problem.
    output_attentions = False,
    output_hidden_states = False,
)
model.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## 4. Load the Trained Model

---

To load the trained model, download the pretrained file at https://bit.ly/2VENkSB. Once downloaded, set up the file path and include it here.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Set up the file path.
file_path = '/content/drive/MyDrive/trained-model.bin'

In [None]:
state_dict = torch.load(file_path)
state_dict.pop("bert.embeddings.position_ids")
model.load_state_dict(state_dict)
model.eval()

RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.

In [None]:
# Load the trained model.
# model.load_state_dict(torch.load(file_path))
# model.eval()

## 5. Define the Reviews Dataset

---



Here we define the reviews dataset, necessary for the model to work.

In [None]:
class ReviewsDataset(Dataset):
    def __init__(self, df, max_length=512):
        self.df = df
        self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
        self.max_length = max_length

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        # input=review, label=stars
        review = self.df.loc[idx, 'comment']
        # labels are 0-indexed
        label = int(self.df.loc[idx, 'stars']) - 1

        encoded = self.tokenizer(
            review,                      # Review to encode.
            add_special_tokens=True,
            max_length=self.max_length,  # Truncate all segments to max_length.
            padding='max_length',        # Pad all reviews with the [PAD] token to the max_length.
            return_attention_mask=True,  # Construct attention masks.
            truncation=True
        )

        input_ids = encoded['input_ids']
        attn_mask = encoded['attention_mask']

        return {
            'input_ids': torch.tensor(input_ids),
            'attn_mask': torch.tensor(attn_mask),
            'label': torch.tensor(label)
        }

## 6. Predict the Star Rating

---

The following code takes a string comment and returns a predicted star rating.

In [None]:
def get_single_prediction(comment, model):
  """
  Predict a star rating from a review comment.

  :comment: the string containing the review comment.
  :model: the model to be used for the prediction.
  """

  df = pd.DataFrame()
  df['comment'] = [comment]
  df['stars'] = ['0']

  dataset = ReviewsDataset(df)

  TEST_BATCH_SIZE = 1
  NUM_WORKERS = 1

  test_params = {'batch_size': TEST_BATCH_SIZE,
              'shuffle': True,
              'num_workers': NUM_WORKERS}

  data_loader = DataLoader(dataset, **test_params)

  total_examples = len(df)
  predictions = np.zeros([total_examples], dtype=object)

  for batch, data in enumerate(data_loader):

    # Get the tokenization values.
    input_ids = data['input_ids'].to(device)
    mask = data['attn_mask'].to(device)

    # Make the prediction with the trained model.
    outputs = model(input_ids, mask)
    # disp(outputs)
    # Get the star rating.
    big_val, big_idx = torch.max(outputs[0].data, dim=1)
    star_predictions = (big_idx + 1).cpu().numpy()

  return star_predictions[0]
  # return outputs

You can change the review text below to make a custom prediction.

In [None]:
# Write the review.
review = "This is a great product!"

# Get the star predictions.
prediction = get_single_prediction(review, model)

print(prediction)

In [None]:
!pip install gradio

In [None]:
import gradio as gr

# Create Gradio interface
review_input = gr.Textbox(lines=5, label="Enter your review")
rating_input = gr.Radio(choices=[1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0], label="Select the rating")

def predict(review, rating):
    predicted_rating = get_single_prediction(review, model)
    highLow = ""
    if predicted_rating < rating:
      highLow = "higher"
    elif predicted_rating > rating:
      highLow = "lower"
    else:
      highLow = "the same"
    return f"Your input rating ({rating}) is {highLow} than our predicted rating ({predicted_rating}). Would you like to give the predicted rating instead?"

gr.Interface(fn=predict, inputs=[review_input, rating_input], outputs="text", title="Rating Classifier").launch()


In [None]:
%%writefile app.py

import streamlit as st

#st.write('Hello, *World!* :sunglasses:')

# Define the Streamlit interface
st.title("Rating Classifier")

# Text input for the review
text = st.text_input("Enter your review:")

# Combo box for selecting the rating
number = st.selectbox("Select the rating:", [1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0])

# Button to trigger the classification
if st.button("Classify"):
    # Call the classify_rating function
    predicted_rating = get_single_prediction(text, model)
    # Show the output
    st.write(f"Your input rating ({number}) is higher than our predicted rating ({predicted_rating}). Would you like to give the predicted rating instead?")


In [None]:
! wget -q -O - ipv4.icanhazip.com

! streamlit run app.py & npx localtunnel --port 8501