In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd

In [2]:
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

In [3]:
tokens = tokenizer.encode('It was good but couldve been better. Great', return_tensors='pt')

In [4]:
result = model(tokens)

In [5]:
result.logits

tensor([[-2.7768, -1.2353,  1.4419,  1.9804,  0.4584]],
       grad_fn=<AddmmBackward0>)

In [6]:
int(torch.argmax(result.logits))+1

4

In [7]:
r = requests.get('https://www.yelp.com/biz/um-ma-san-francisco-3?osq=Restaurants')
soup = BeautifulSoup(r.text, 'html.parser')
regex = re.compile('.*comment.*')
results = soup.find_all('p', {'class':regex})
reviews = [result.text for result in results]

In [8]:
reviews

["One of the best dining experiences I've had in a very long time. My stomach is in bliss :)",
 "Love coming here! Service is always so friendly. My favorite dish here is the rockin roe - it's amazing!",
 'We came to Um Ma for my friends birthday and we left feeling quite disappointed. I heard this place used to be on the Michelin guide? So I was a really excited but the food was so poor quality. We ordered the chicken wings, seafood pancake, pork belly, Korean ribs and tteokbokki. The chicken wings were ok - very standard. The seafood pancake was a bit different, lots of batter not a lot of seafood. The pork belly was lack luster and seasoning seemed like a normal bbq sauce. The Korean ribs were hard to chew and not tender at all. Tteokbokki had a nice texture and an interesting citrus tang to it. Overall, everything was just kind of  - it felt very non-authentic. Especially for the price, I would stick to finding better Korean food somewhere else. I would say the only perk was their 

In [9]:
df = pd.DataFrame({'Review': reviews})
print(df)

                                              Review
0  One of the best dining experiences I've had in...
1  Love coming here! Service is always so friendl...
2  We came to Um Ma for my friends birthday and w...
3  If you're looking for good Korean food, find s...
4  Large covered outdoor patio area with heat lam...
5  Got the kimchi fried rice with bulgogi and bib...
6  Very cute modern Korean spot in the Sunset. Th...
7  Family went to Um Ma over the weekend for lunc...
8  Adding .5 stars more since they had us in the ...
9  Dec 2022High notes and low notes. We came by h...


In [10]:
df.head(20)

Unnamed: 0,Review
0,One of the best dining experiences I've had in...
1,Love coming here! Service is always so friendl...
2,We came to Um Ma for my friends birthday and w...
3,"If you're looking for good Korean food, find s..."
4,Large covered outdoor patio area with heat lam...
5,Got the kimchi fried rice with bulgogi and bib...
6,Very cute modern Korean spot in the Sunset. Th...
7,Family went to Um Ma over the weekend for lunc...
8,Adding .5 stars more since they had us in the ...
9,Dec 2022High notes and low notes. We came by h...


In [19]:
df['review']=df['review'].astype('str')

In [20]:
df['result']=df['review'].apply(lambda x:sentiment_analysis(x))

In [50]:
df['sentiment']=df['result'].apply(lambda x:(x[0]['label']))

In [51]:
df.columns

Index(['review', 'result', 'sentiment', 'score'], dtype='object')

In [52]:
df.head(20)

Unnamed: 0,review,result,sentiment,score
0,One of the best dining experiences I've had in...,"[{'label': 'POSITIVE', 'score': 0.998926699161...",POSITIVE,0.998927
1,Love coming here! Service is always so friendl...,"[{'label': 'POSITIVE', 'score': 0.998940050601...",POSITIVE,0.99894
2,We came to Um Ma for my friends birthday and w...,"[{'label': 'NEGATIVE', 'score': 0.999512314796...",NEGATIVE,0.999512
3,"If you're looking for good Korean food, find s...","[{'label': 'NEGATIVE', 'score': 0.999515295028...",NEGATIVE,0.999515
4,Large covered outdoor patio area with heat lam...,"[{'label': 'POSITIVE', 'score': 0.998869717121...",POSITIVE,0.99887
5,Got the kimchi fried rice with bulgogi and bib...,"[{'label': 'POSITIVE', 'score': 0.998916745185...",POSITIVE,0.998917
6,Very cute modern Korean spot in the Sunset. Th...,"[{'label': 'POSITIVE', 'score': 0.998929798603...",POSITIVE,0.99893
7,Family went to Um Ma over the weekend for lunc...,"[{'label': 'POSITIVE', 'score': 0.998914480209...",POSITIVE,0.998914
8,Adding .5 stars more since they had us in the ...,"[{'label': 'POSITIVE', 'score': 0.998905658721...",POSITIVE,0.998906
9,Dec 2022High notes and low notes. We came by h...,"[{'label': 'POSITIVE', 'score': 0.998561441898...",POSITIVE,0.998561


In [53]:
from transformers import pipeline
sentiment_analysis = pipeline("sentiment-analysis",model="siebert/sentiment-roberta-large-english")

In [13]:
import numpy as np
import pandas as pd

In [14]:
df = pd.DataFrame(np.array(reviews), columns=['review'])

In [15]:
df['review'].iloc[2]

'We came to Um Ma for my friends birthday and we left feeling quite disappointed. I heard this place used to be on the Michelin guide? So I was a really excited but the food was so poor quality. We ordered the chicken wings, seafood pancake, pork belly, Korean ribs and tteokbokki. The chicken wings were ok - very standard. The seafood pancake was a bit different, lots of batter not a lot of seafood. The pork belly was lack luster and seasoning seemed like a normal bbq sauce. The Korean ribs were hard to chew and not tender at all. Tteokbokki had a nice texture and an interesting citrus tang to it. Overall, everything was just kind of  - it felt very non-authentic. Especially for the price, I would stick to finding better Korean food somewhere else. I would say the only perk was their herbal tea. It has free refills!'

In [63]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Data loading and preprocessing
X = df['review']
y = df['sentiment']

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

max_words = 10000
tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(X)
X_sequences = tokenizer.texts_to_sequences(X)

max_len = 100  # Adjust as needed
X_padded = pad_sequences(X_sequences, maxlen=max_len)

X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=42)

# Build the RNN model with LSTM
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=128, input_length=max_len))
model.add(LSTM(64))  # Use LSTM instead of SimpleRNN
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=1, batch_size=32, validation_split=0.2)

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test loss: {loss}, Test accuracy: {accuracy}')


Test loss: 0.6781981587409973, Test accuracy: 1.0


In [58]:
# Function to predict sentiment for a sample input
def predict_sentiment(model, tokenizer, text, max_len):
    text_sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(text_sequence, maxlen=max_len)
    predicted_prob = model.predict(padded_sequence)
    predicted_label = (predicted_prob > 0.5).astype(int)
    predicted_label = label_encoder.inverse_transform(predicted_label.flatten())
    return predicted_label[0]

# Example usage with a sample input
sample_input = "This restaurant was amazing!"  # Adjust the input as needed
predicted_sentiment = predict_sentiment(model, tokenizer, sample_input, max_len)

print(f"Predicted sentiment for the sample input: {predicted_sentiment}")

Predicted sentiment for the sample input: POSITIVE


In [61]:
# Assuming you have already trained the model (the code you provided)

# Function to preprocess input text and predict sentiment
def predict_sentiment(model, tokenizer, text, max_len):
    # Preprocess the input text
    text_sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(text_sequence, maxlen=max_len)

    # Predict sentiment
    predicted_prob = model.predict(padded_sequence)
    predicted_label = (predicted_prob > 0.5).astype(int)
    predicted_label = label_encoder.inverse_transform(predicted_label.flatten())

    return predicted_label[0]

# Example usage with a sample input
sample_input = "The food was bad and ambience was not good"  # Adjust the input as needed
predicted_sentiment = predict_sentiment(model, tokenizer, sample_input, max_len)

print(f"Predicted sentiment for the sample input: {predicted_sentiment}")


Predicted sentiment for the sample input: NEGATIVE
