# Sentiment Analysis

In [15]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

Looking in indexes: https://download.pytorch.org/whl/cu121


In [16]:
!pip install transformers requests beautifulsoup4 pandas numpy



In [17]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
from bs4 import BeautifulSoup
import re
import numpy as np
import pandas as pd

from IPython.display import display, HTML

### Instantiate Model

In [18]:
tokeniser = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

### Encode and Calculate Sentiment

In [19]:
def sentiment_score(text):
  tokens = tokeniser.encode(text, return_tensors='pt')
  result = model(tokens)
  return int(torch.argmax(result.logits))+1

In [20]:
sentiment_score("Could have been better.")

3

### Collect Reviews from YELP

In [21]:
def headerAndReviews(html):
    r = requests.get(html)
    soup = BeautifulSoup(r.text, 'html.parser')
    regex_heading = re.compile('.*heading*')
    regex = re.compile('.*comment.*')
    title = soup.find_all('div', {'class': regex_heading})
    results = soup.find_all('p', {'class': regex})
    heading = title[0].text
    reviews = [result.text for result in results]
    return heading, reviews

### Show Reviews and overall rating

In [39]:
def show_company_review(html):
    heading, reviews = headerAndReviews(html)
    df = pd.DataFrame(reviews, columns=['Review'])
    
    # Calculate average sentiment score
    df['Sentiment Score'] = df['Review'].apply(sentiment_score)
    avg_sentiment = df['Sentiment Score'].mean()
    
    # Create an empty HTML string to accumulate the content
    html_content = ''
    
    # Add heading centered with larger font
    html_content += f'<div style="font-size: 20px; font-weight: bold; text-align: center; margin-bottom: 10px;">{heading}</div>'
    
    # Add average sentiment score centered
    html_content += f'<div style="font-weight: bold; text-align: center; margin-bottom: 20px;">Average Sentiment Score: {avg_sentiment:.1f}</div>'
    
    # Add column headers
    headers_html = '<div style="display: flex; align-items: center; font-weight: bold; text-align: center; padding: 10px;">'
    headers_html += '<div style="flex: 1;">Number</div>'
    headers_html += '<div style="flex: 4;">Review</div>'
    headers_html += '<div style="flex: 2;">Sentiment Score</div>'
    headers_html += '</div>'
    html_content += headers_html
    
    # Add rows for each review with more padding and no border lines
    for i, row in df.iterrows():
        html_content += '<div style="display: flex; align-items: center; text-align: center; padding: 15px;">'
        html_content += f'<div style="flex: 1;">{i + 1}</div>'
        html_content += f'<div style="flex: 4; max-width: 400px; overflow: hidden; text-overflow: ellipsis;">{row["Review"]}</div>'
        html_content += f'<div style="flex: 2;">{row["Sentiment Score"]:.1f}</div>'
        html_content += '</div>'
    
    # Display the accumulated HTML content
    display(HTML(html_content))

In [40]:
html_content = 'https://www.yelp.com/biz/honest-burgers-meard-st-soho-london?osq=Burgers'
show_company_review(html_content)