# Using BERT model for Sentiment Analysis for TripAdvisor Reviews 

### 1. Load relevant libraries


In [5]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch 
import re

import pandas as pd 
import numpy as np 

### 2. Instantiate Model

In [48]:
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
model = AutoModelForSequenceClassification.from_pretrained ('nlptown/bert-base-multilingual-uncased-sentiment')

### 3. Encode and Calculate Sentiment

In [43]:
tokens = tokenizer.encode ('not as bad', return_tensors = 'pt')

In [44]:
result = model(tokens)

In [45]:
result

SequenceClassifierOutput(loss=None, logits=tensor([[-1.0146,  0.7619,  2.3528,  0.3902, -1.8820]],
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [46]:
result.logits

tensor([[-1.0146,  0.7619,  2.3528,  0.3902, -1.8820]],
       grad_fn=<AddmmBackward0>)

In [47]:
int(torch.argmax(result.logits))+1

3

### 4. Load TripAdvisor Reviews 

In [49]:
df = pd.read_csv('Reviews TA.csv')

In [51]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13465 entries, 0 to 13464
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    13465 non-null  object
 1   Title   13464 non-null  object
 2   Review  13465 non-null  object
 3   Rating  13465 non-null  int64 
dtypes: int64(1), object(3)
memory usage: 420.9+ KB


In [53]:
reviews= df.drop(['Date', 'Title', 'Rating'], axis = 1)

In [55]:
reviews.head()

Unnamed: 0,Review
0,Very good location. Reasonable price. The room...
1,We stayed in this hotel just before Christmas ...
2,Pleasant staff and security in place. Stayed h...
3,"Pricing was okay. Very noisy, small room. The ..."
4,"Rooms are filthy, elevators are dangerous and ..."


In [58]:
def sentiment_score(review):
    tokens = tokenizer.encode (review, return_tensors = 'pt')
    result = model(tokens)
    result = int(torch.argmax(result.logits))+1
    
    if result == 1 or result == 2:
        return 'Negative'
    elif result == 3:
        return 'Neutral'
    else:
        return 'Positive'

In [None]:
reviews['Sentiment'] = reviews['Review'].apply(sentiment_score)

In [67]:
sentiment_score(' Hi you are not that lousy')

'Neutral'