# Install & Import Dependencies

In [1]:
!pip install torch torchvision torchaudio



In [2]:
!pip install transformers requests beautifulsoup4 pandas numpy

Collecting transformers
  Downloading transformers-4.15.0-py3-none-any.whl (3.4 MB)
[K     |████████████████████████████████| 3.4 MB 7.5 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)
[K     |████████████████████████████████| 67 kB 4.5 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 56.7 MB/s 
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 51.5 MB/s 
[?25hCollecting sacremoses
  Downloading sacremoses-0.0.47-py2.py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 68.4 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  A

In [29]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import numpy as np

#AutoTokenizer allows to pass string and convert to sequence of numbers that we can then pass to NLP model
#AutoModelSequenceClassification gives architecture from transformers that we will be able to load in our NLP model
#argmax from torch will give us highest sequqnce result
#requests will help to grab data from Yelp web page
#BeautifulSoup allows to extract data from scraped web page
#re will help to form regex function to extract certain comments

# Instantiate Model

In [5]:
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

Downloading:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/953 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/851k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/638M [00:00<?, ?B/s]

#Encode and Calculate Sentiment

In [12]:
tokens = tokenizer.encode('I hated that, absolutely waste',return_tensors='pt')
#pt means pytorch

In [13]:
tokens

tensor([[  101,   151, 39487, 10163, 10203,   117, 35925, 10563, 43346,   102]])

In [14]:
tokenizer.decode(tokens[0])

'[CLS] i hated that, absolutely waste [SEP]'

In [15]:
result = model(tokens)

In [17]:
result
#the values in tensor represent score of the comment for diff sentiment classes

SequenceClassifierOutput([('logits',
                           tensor([[ 4.1835,  2.3260, -0.2570, -2.6977, -2.7281]],
                                  grad_fn=<AddmmBackward0>))])

In [19]:
# to get highest value results

int(torch.argmax(result.logits))+1

1

# Collect Reviews

In [21]:
r = requests.get('https://www.yelp.com/biz/the-farmers-kitchen-farmington?osq=Restaurants')
soup = BeautifulSoup(r.text,'html.parser')
regex = re.compile('.*comment.*') #get all has commment class
results = soup.find_all('p',{'class':regex}) 
reviews = [result.text for result in results]

In [22]:
r

<Response [200]>

In [23]:
r.text



In [24]:
soup

<!DOCTYPE html>
<html lang="en-US" prefix="og: http://ogp.me/ns#" style="margin: 0;padding: 0; border: 0; font-size: 100%; font: inherit; vertical-align: baseline;"><head><script>document.documentElement.className=document.documentElement.className.replace(/no-js/,"js");</script><meta content="text/html; charset=utf-8" http-equiv="Content-Type"/><meta content="en-US" http-equiv="Content-Language"/><meta content="width=device-width, initial-scale=1, shrink-to-fit=no" name="viewport"/><link content="#FF1A1A" href="https://s3-media0.fl.yelpcdn.com/assets/srv0/yelp_large_assets/b2bb2fb0ec9c/assets/img/logos/yelp_burst.svg" rel="mask-icon" sizes="any"/><link href="https://s3-media0.fl.yelpcdn.com/assets/srv0/yelp_large_assets/b05852393ae5/assets/img/logos/favicon.ico" rel="shortcut icon"/><script> window.ga=window.ga||function(){(ga.q=ga.q||[]).push(arguments)};ga.l=+new Date;window.ygaPageStartTime=new Date().getTime();</script><script async="" src="https://www.google-analytics.com/analy

In [26]:
results

[<p class="comment__09f24__gu0rG css-1sufhje"><span class=" raw__09f24__T4Ezm" lang="en">These pancakes are delicious! <br/><br/>I had blueberry and chocolate chip pancakes with a fruit cup and it was delicious!<br/><br/>I will want to eat there everyday.<br/><br/>From L.W. 7 years old</span></p>,
 <p class="comment__09f24__gu0rG css-1sufhje"><span class=" raw__09f24__T4Ezm" lang="en">Ordered recently from them the first time. Got burned fries and some sandwich that was full of shredded iceberg and some overly salted meat that lacked any flavor. I had brought a friend with me and the lady taking my order straight up walked off on us after my order was placed while he waa trying to talk with her to place his order. No eye contact, super rude. It was busy but other diners were seated and watching the lady walk off. Then the lady returned and began taking people behind us in line! Definitely poor quality service, bad food. Not worth it.</span></p>,
 <p class="comment__09f24__gu0rG css-1su

In [27]:
reviews

['These pancakes are delicious! I had blueberry and chocolate chip pancakes with a fruit cup and it was delicious!I will want to eat there everyday.From L.W. 7 years old',
 'Ordered recently from them the first time. Got burned fries and some sandwich that was full of shredded iceberg and some overly salted meat that lacked any flavor. I had brought a friend with me and the lady taking my order straight up walked off on us after my order was placed while he waa trying to talk with her to place his order. No eye contact, super rude. It was busy but other diners were seated and watching the lady walk off. Then the lady returned and began taking people behind us in line! Definitely poor quality service, bad food. Not worth it.',
 'Delicious ! Had the homemade hash and eggs. Large portions, perfectly cooked. Comes with side of home fries and either pancakes or French toast.Garden omelette was also delicious!',
 "Used to be one of our favorites for years....they however seem to have not rec

In [28]:
results[2].text

'Delicious ! Had the homemade hash and eggs. Large portions, perfectly cooked. Comes with side of home fries and either pancakes or French toast.Garden omelette was also delicious!'

# Load Reviews into DataFrame and Score

In [30]:
df = pd.DataFrame(np.array(reviews),columns=["review"])

In [31]:
df.head()

Unnamed: 0,review
0,These pancakes are delicious! I had blueberry ...
1,Ordered recently from them the first time. Got...
2,Delicious ! Had the homemade hash and eggs. La...
3,Used to be one of our favorites for years....t...
4,I dont think I've ever hurt myself on breakfas...


In [32]:
def sentiment_score(review):
  tokens = tokenizer.encode(review,return_tensors='pt')
  result = model(tokens)
  return int(torch.argmax(result.logits))+1

In [35]:
sentiment_score(df['review'].iloc[3])

3

In [36]:
#since our NLP pipeline is restricted to some limit of max tokens (here : 512)

df['sentiment'] = df['review'].apply(lambda x:sentiment_score(x[:512]))

In [38]:
df.head()

Unnamed: 0,review,sentiment
0,These pancakes are delicious! I had blueberry ...,5
1,Ordered recently from them the first time. Got...,1
2,Delicious ! Had the homemade hash and eggs. La...,5
3,Used to be one of our favorites for years....t...,3
4,I dont think I've ever hurt myself on breakfas...,4
