## Import

In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification 
import torch 
import requests 
from bs4 import BeautifulSoup 
import re 

## Instantiate Model

In [2]:
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment', from_tf=True)

All TF 2.0 model weights were used when initializing BertForSequenceClassification.

All the weights of BertForSequenceClassification were initialized from the TF 2.0 model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use BertForSequenceClassification for predictions without further training.


In [3]:
tokens = tokenizer.encode('I hated this, absolutely the worst', return_tensors='pt')

tokens[0]

tensor([  101,   151, 39487, 10163, 10372,   117, 35925, 10563, 10103, 43060,
          102])

In [4]:
result = model(tokens)

result

SequenceClassifierOutput(loss=None, logits=tensor([[ 4.8750,  1.7880, -0.8356, -3.0027, -2.0727]],
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [5]:
int(torch.argmax(result.logits))+1

1

## Collect Reviews

In [6]:
r = requests.get('https://www.yelp.com/biz/mejico-sydney-2')

soup = BeautifulSoup(r.text, 'html.parser')

regex = re.compile('.*comment.*')

results = soup.find_all('p', {'class': regex})

reviews = [result.text for result in results]

In [7]:
reviews[0]

'Great atmosphere, attentive service, solid margs, and a Tasty menu. The Brisket Tacos were substantial and delicious. The corn ribs??? \xa0Fawgetaboutit! \xa0Unreal. \xa0Wanted to order another plate.'

## Load into DataFrame

In [8]:
import pandas as pd 
import numpy as np

In [9]:
df = pd.DataFrame(np.array(reviews), columns=['review'])

In [10]:
df.tail()

Unnamed: 0,review
5,Have been here twice and have absolutely loved...
6,I was pleasantly surprised at what a great job...
7,If you're looking for a quiet little romantic ...
8,The service at this place was top notch - the ...
9,Really nice (upmarket) Mexican restaurant. Goo...


In [11]:
df['review'].iloc[0]

'Great atmosphere, attentive service, solid margs, and a Tasty menu. The Brisket Tacos were substantial and delicious. The corn ribs??? \xa0Fawgetaboutit! \xa0Unreal. \xa0Wanted to order another plate.'

In [12]:
def sentiment_score(review):
    tokens = tokenizer.encode(review, return_tensors='pt')
    result = model(tokens)
    return int(torch.argmax(result.logits))+1

In [13]:
sentiment_score(df['review'].iloc[4])

4

In [14]:
df['sentiment'] = df['review'].apply(lambda x: sentiment_score(x[:512]))

In [15]:
df

Unnamed: 0,review,sentiment
0,"Great atmosphere, attentive service, solid mar...",3
1,Don't come here expecting legit Mexican food b...,3
2,Out of all the restaurants that I tried in Syd...,5
3,The food is fresh and tasty. The scallop cevi...,4
4,We came here on a Thursday night @ 5pm and by ...,4
5,Have been here twice and have absolutely loved...,5
6,I was pleasantly surprised at what a great job...,5
7,If you're looking for a quiet little romantic ...,2
8,The service at this place was top notch - the ...,5
9,Really nice (upmarket) Mexican restaurant. Goo...,4
