In [2]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.17.0-py3-none-any.whl (3.8 MB)
[K     |████████████████████████████████| 3.8 MB 5.4 MB/s 
[?25hCollecting tokenizers!=0.11.3,>=0.11.1
  Downloading tokenizers-0.11.6-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.5 MB)
[K     |████████████████████████████████| 6.5 MB 26.1 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)
[K     |████████████████████████████████| 67 kB 2.8 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 24.4 MB/s 
[?25hCollecting sacremoses
  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 27.0 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attempting uninstall: pyyaml


**<h1> 1. Installing the dependencies**


In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
from bs4 import BeautifulSoup
import re

**<h1> 2. Instantiate the Model**



In [8]:
# Define the model repo
model_name = "nlptown/bert-base-multilingual-uncased-sentiment" 

# Download pytorch model
tokenizer = AutoTokenizer.from_pretrained(model_name) #Importing the pretrained model
model = AutoModelForSequenceClassification.from_pretrained(model_name) #Loading the model

Downloading:   0%|          | 0.00/638M [00:00<?, ?B/s]

**<h1> 3. Encode and calculate Sentiment**


In [24]:
#Encoding a token and setting the return to a pt=pytorch
tokens = tokenizer.encode('this is the worst', return_tensors='pt')

In [20]:
tokens[0]

tensor([  101, 10372, 10127, 39854,   102])

In [25]:
result = model(tokens)

In [22]:
result

SequenceClassifierOutput([('logits',
                           tensor([[-1.9988, -2.3604, -0.8269,  1.1844,  3.2305]],
                                  grad_fn=<AddmmBackward0>))])

In [18]:
result.logits

tensor([[ 4.6627,  1.9006, -0.5749, -2.7509, -2.3941]],
       grad_fn=<AddmmBackward0>)

In [26]:
int(torch.argmax(result.logits)) + 1

1

**<h1> 4. Collecting reviews**

In [105]:
r = requests.get('https://www.yelp.com/biz/santeria-san-francisco')
soup = BeautifulSoup(r.text, 'html.parser')
regex = re.compile('.*comment.*')
results = soup.find_all('p', {'class':regex})
reviews = [result.text for result in results]

In [107]:
reviews

['Gino and Alvaro are everything!!! The best service we have ever had!! If you want a wonderful experience with great food look no further!!!',
 'My friend, who has been going there once in a while, recommended it.Now I can see why she did!Wonderful staff, the manager Álvaro, And the waiter and the waitress, they were so lovely and engaging and helpful.Wonderful fresh margaritas. I requested one less sweet and they made it exactly like I was hoping...The inside decor and ambience is very nice also.I highly recommend it.Parking is free on the streets and not difficult to find.This was a Friday, happy hour , four to six p.m.',
 "Santeria has never failed me and it is one of my favorite places to go for a quick bite and good drinks. The atmosphere is also really cozy!There's sometimes a line out the door during peak times, coming on off times has helped. They also have a big table in the back that I think can be reserved for bigger parties!",
 'TL;DR - Dropped in for happy hour and was ve

**<h1> 5. Load Reviews into DataFrame and Score**


In [53]:
import numpy as np 
import pandas as pd

In [100]:
df = pd.DataFrame(np.array(reviews), columns=['reviews'])

In [101]:
df.shape

(10, 1)

In [102]:
df.head()

Unnamed: 0,reviews
0,Maybe this 9/23/2022 was a bad day for us ther...
1,A friend and I had an absolutely delightful ou...
2,"The drinks, food, and staff are top notch. A f..."
3,Went tonight for a drink and quesadilla with t...
4,Been waiting for outdoor eating to open back u...


In [67]:
df['reviews'].iloc[0]

'Gino and Alvaro are everything!!! The best service we have ever had!! If you want a wonderful experience with great food look no further!!!'

In [68]:
def sentiment_score(review):
  tokens = tokenizer.encode(review, return_tensors='pt')
  result = model(tokens)
  return int(torch.argmax(result.logits)) + 1

In [71]:
sentiment_score(df['reviews'].iloc[0])

5

In [73]:
df['sentiment'] = df['reviews'].apply(lambda x: sentiment_score(x[:512]))

In [81]:
df.value_counts('sentiment')

sentiment
5    7
4    2
1    1
dtype: int64