# 1. Install and import dependencies

In [None]:
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

In [None]:
!pip install transformers requests beautifulsoup4

In [1]:
from transformers import AutoTokenizer , AutoModelForSequenceClassification
import torch 
import requests
from bs4 import BeautifulSoup
import re
import numpy as np
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


# 2. Instantiate model

In [2]:
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

# 3. Encode and calculate sentiment

In [3]:
tokens = tokenizer.encode('I Loved the product, it is great', return_tensors='pt') #tokenise the reviews

In [4]:
result = model(tokens) #run the tokens (reviews) in the model

In [5]:
torch.argmax(result.logits) # choose maximum from the classification score prediction for each class

tensor(4)

In [6]:
int(torch.argmax(result.logits))+1 # +1 coz our ratings are from 1-5

5

# 4. Collect Reviews

In [7]:
r = requests.get('https://www.yelp.com/biz/fog-harbor-fish-house-san-francisco-2?osq=Restaurants')
soup = BeautifulSoup(r.text, 'html.parser')  # Get the html for the page given 
regex = re.compile('.*comment.*') # comments are inside the comments class which is inside the p
results = soup.find_all('p',{'class':regex}) # find all comments within p -> .comment 
reviews = [result.text for result in results] # render each results and convert to text

# 5. Load Reviews into dataframe and score

In [8]:
df = pd.DataFrame(np.array(reviews), columns = ['Review']) # creatin df of reviews 

In [9]:
df['Review'].iloc[0] #sample

'Good view. Food is phenomenal. Scallops are the best. Our waiter Luis did a great job, too.'

In [10]:
def sentiment_score(review):
    tokens = tokenizer.encode(review, return_tensors='pt') #tokenise the reviews
    result = model(tokens) #run the tokens (reviews) in the model
    return int(torch.argmax(result.logits))+1 # get the predicted ratings

In [11]:
sentiment_score(df['Review'].iloc[0]) 

5

In [12]:
df['sentiment'] = df['Review'].apply(lambda x: sentiment_score(x[:512])) # our nlp model is limited to 512 tokens at a time hence [:512]

In [13]:
# final result
df

Unnamed: 0,Review,sentiment
0,Good view. Food is phenomenal. Scallops are th...,5
1,RayThank you for the review and for dining wit...,5
2,"From one coast to the other, by far the best f...",5
3,LeeThank you for the review and for dining wit...,5
4,The Sesame Ahi Tuna from the spring menu was A...,5
5,LaurenThank you for the review and for choosin...,5
6,I was taken here for dinner for my birthday an...,4
7,KimThank you for joining us on your birthday. ...,5
8,Our family stopped by for a light dinner here ...,5
9,Overpriced and overhyped - way better options ...,2
