In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
from bs4 import BeautifulSoup
import re

In [2]:
#making sure torch is using the gpu for faster processing
torch.cuda.get_device_name(0)

'NVIDIA GeForce RTX 2060'

In [3]:
#loading in a pretrained bert model finetuned for product reviews
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

In [4]:
#quick test of the model
tokens = tokenizer.encode('not good', return_tensors='pt')

In [5]:
tokens

tensor([[  101, 10497, 12050,   102]])

In [6]:
result = model(tokens)

In [7]:
#outputs some information and what we need is the ligits=tensor. This represents the probability of the text being in one of the given classes
result

SequenceClassifierOutput(loss=None, logits=tensor([[ 3.2290,  2.7093,  0.7850, -2.3145, -3.6469]],
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [8]:
result.logits

tensor([[ 3.2290,  2.7093,  0.7850, -2.3145, -3.6469]],
       grad_fn=<AddmmBackward0>)

In [9]:
#the output for the string "not good" is 1. The model seems to be working well so far.
int(torch.argmax(result.logits))+1

1

In [10]:
r = requests.get('https://www.yelp.co.uk/biz/shoryu-ramen-london?osq=Korean+BBQ')
soup = BeautifulSoup(r.text, 'html.parser')
regex = re.compile('.*comment.*')
results = soup.find_all('p', {'class':regex})
reviews = [result.text for result in results]

In [11]:
reviews

["The ramen is so delicious And can't wait to be back And the service was awesomeThank you",
 'Great ramen. Great service. Loved it and will be back again. Not a long wait to get in on a busy Saturday night. We loved everything we ordered.',
 'A welcome respite amidst the bustle of Piccadilly Square. The rich broth, perfectly cooked noodles, and jammy eggs felt like a warm hug.',
 'Excellent service, food, and location! The ramen was fresh and hot, the wine selection was nice, and the restroom was clean! This place is a perfect match for cold weather. The pork bun is amazing! You have to try it!',
 "Solid tonkotsu ramen. Doesn't taste heavy but when you sip the soup, \xa0you know it's a well made broth. I ordered to go and it came well packaged. The noodles and toppings were separated properly to nothing got soggy. The texture of the noodle and egg were right and the cha Siu was good.",
 'Great place and the ramen is amazing. The portion is also great and very filling. We loved everyth

In [12]:
import numpy as np
import pandas as pd

In [13]:
df = pd.DataFrame(np.array(reviews), columns=['review'])

In [14]:
df['review'].iloc[0]

"The ramen is so delicious And can't wait to be back And the service was awesomeThank you"

In [15]:
df

Unnamed: 0,review
0,The ramen is so delicious And can't wait to be...
1,Great ramen. Great service. Loved it and will ...
2,A welcome respite amidst the bustle of Piccadi...
3,"Excellent service, food, and location! The ram..."
4,Solid tonkotsu ramen. Doesn't taste heavy but ...
5,Great place and the ramen is amazing. The port...
6,We dropped in here one afternoon and were luck...
7,"Honestly, the most rubbish ramen i have ever t..."
8,Delicious Tonkatsu ramen and yummy Japanese fr...
9,I would say I am pretty predictable when it co...


In [16]:
def sentiment_score(review):
    tokens = tokenizer.encode(review, return_tensors='pt')
    result = model(tokens)
    return int(torch.argmax(result.logits))+1

In [17]:
sentiment_score(df['review'].iloc[1])

5

In [18]:
df['sentiment'] = df['review'].apply(lambda x: sentiment_score(x[:512]))

In [19]:
df

Unnamed: 0,review,sentiment
0,The ramen is so delicious And can't wait to be...,5
1,Great ramen. Great service. Loved it and will ...,5
2,A welcome respite amidst the bustle of Piccadi...,5
3,"Excellent service, food, and location! The ram...",5
4,Solid tonkotsu ramen. Doesn't taste heavy but ...,4
5,Great place and the ramen is amazing. The port...,5
6,We dropped in here one afternoon and were luck...,4
7,"Honestly, the most rubbish ramen i have ever t...",1
8,Delicious Tonkatsu ramen and yummy Japanese fr...,5
9,I would say I am pretty predictable when it co...,4


In [20]:
df['review'].iloc[3]

'Excellent service, food, and location! The ramen was fresh and hot, the wine selection was nice, and the restroom was clean! This place is a perfect match for cold weather. The pork bun is amazing! You have to try it!'

In [21]:
df['sentiment'].iloc[3]

5