In [1]:
import pandas as pd, numpy as np, torch, requests
from bs4 import BeautifulSoup
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import re

In [2]:
tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment") 

model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

In [3]:
token = tokenizer.encode("I is an okayish washing machine.", return_tensors='pt')

In [4]:
token #note there is a list inside a list, so to decode we have to select the internal list.

tensor([[  101,   151, 10127, 10144, 44810, 13474, 13457, 58080, 10285, 14338,
           119,   102]])

In [5]:
#decoding token for eg

tokenizer.decode(token[0]) #If you had passed just token instead of token[0] it would have given an error since it is a list inside a list. 

'[CLS] i is an okayish washing machine. [SEP]'

In [6]:
token

tensor([[  101,   151, 10127, 10144, 44810, 13474, 13457, 58080, 10285, 14338,
           119,   102]])

In [7]:
result = model(token)

In [8]:
result

SequenceClassifierOutput(loss=None, logits=tensor([[-1.6463,  0.4418,  2.2658,  0.6963, -1.5541]],
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [9]:
result.logits #ogits is the imp part it gives a rating from 1 o 5

tensor([[-1.6463,  0.4418,  2.2658,  0.6963, -1.5541]],
       grad_fn=<AddmmBackward0>)

In [10]:
int(torch.argmax(result.logits))+1  # so we use pytoch to get the highest value result amongst these and we then round it to int 

3

Collection reviews

In [11]:
r = requests.get("https://www.yelp.com/biz/xebec-san-francisco-2")
soup = BeautifulSoup(r.text, 'html.parser')
regex = re.compile(".*comment.*")
results = soup.find_all('p', {'class': regex})
reviews = [result.text for result in results]

In [12]:
r.text

'<!DOCTYPE html><html lang="en-US" prefix="og: http://ogp.me/ns#" style="margin: 0;padding: 0; border: 0; font-size: 100%; font: inherit; vertical-align: baseline;"><head><script>document.documentElement.className=document.documentElement.className.replace(/\x08no-js\x08/,"js");</script><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /><meta http-equiv="Content-Language" content="en-US" /><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><link rel="mask-icon" sizes="any" href="https://s3-media0.fl.yelpcdn.com/assets/srv0/yelp_large_assets/b2bb2fb0ec9c/assets/img/logos/yelp_burst.svg" content="#FF1A1A"><link rel="shortcut icon" href="https://s3-media0.fl.yelpcdn.com/assets/srv0/yelp_large_assets/dcfe403147fc/assets/img/logos/favicon.ico"><script> window.ga=window.ga||function(){(ga.q=ga.q||[]).push(arguments)};ga.l=+new Date;window.ygaPageStartTime=new Date().getTime();</script><script>\n            window.yelp = window.yelp || {};\

In [13]:
results[0].text

"Excellent addition tot he Hayes Valley neighborhood. Situated on the busy Gough Street, there is lots of nearby street parking. Look for the blue awning. This restaurant is just steps away from SF Wine and Cheese and Rich Table. Enter to a welcoming full bar with non-alcoholic options as well as unique and fresh cocktails. They have a happy hour that extends to the entire restaurant--not just the bar.Excellent service and fresh healthy food options. I tried the Fattoush salad with skirt steak and the focaccia bread. Both were amazing. My new favorites. I'll be back. Check this place out!"

In [14]:
reviews[0]

"Excellent addition tot he Hayes Valley neighborhood. Situated on the busy Gough Street, there is lots of nearby street parking. Look for the blue awning. This restaurant is just steps away from SF Wine and Cheese and Rich Table. Enter to a welcoming full bar with non-alcoholic options as well as unique and fresh cocktails. They have a happy hour that extends to the entire restaurant--not just the bar.Excellent service and fresh healthy food options. I tried the Fattoush salad with skirt steak and the focaccia bread. Both were amazing. My new favorites. I'll be back. Check this place out!"

In [15]:
df = pd.DataFrame(np.array(reviews), columns=['review'])

In [16]:
df.head()

Unnamed: 0,review
0,Excellent addition tot he Hayes Valley neighbo...
1,Thank you for joining us and this lovely write...
2,A nice little neighborhood restaurant! Full ba...
3,"The restaurant is beautiful, the ambiance is p..."
4,Great customer service! Food was also very del...


In [17]:
df['review'].iloc[0]

"Excellent addition tot he Hayes Valley neighborhood. Situated on the busy Gough Street, there is lots of nearby street parking. Look for the blue awning. This restaurant is just steps away from SF Wine and Cheese and Rich Table. Enter to a welcoming full bar with non-alcoholic options as well as unique and fresh cocktails. They have a happy hour that extends to the entire restaurant--not just the bar.Excellent service and fresh healthy food options. I tried the Fattoush salad with skirt steak and the focaccia bread. Both were amazing. My new favorites. I'll be back. Check this place out!"

<h1>Creating  a function to passen all reviews through the model</h1>

In [22]:
def sentiment_score(review):
    tokens = tokenizer.encode(review, return_tensors='pt')
    result = model(tokens)
    return int(torch.argmax(result.logits))+1

In [29]:
sentiment_score(df['review'].iloc[1])

5

In [30]:
df['sentiment'] = df['review'].apply(lambda x: sentiment_score(x[:512]))

In [31]:
df

Unnamed: 0,review,sentiment
0,Excellent addition tot he Hayes Valley neighbo...,5
1,Thank you for joining us and this lovely write...,5
2,A nice little neighborhood restaurant! Full ba...,4
3,"The restaurant is beautiful, the ambiance is p...",5
4,Great customer service! Food was also very del...,5
5,"Great food, great service, lovely ambience. Th...",5
6,"Had dinner on Saturday night, during their fir...",5
7,"one of the best restaurants in the area, highl...",5
8,"Excellent new restaurant, inventive dishes and...",5
