In [1]:
import joblib
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from nltk.tokenize import word_tokenize 

nltk.download('stopwords')
nltk.download('punkt')

def text_preprocessing(text):
    stop_words = set(stopwords.words('english')) 
    stemmer = PorterStemmer(PorterStemmer.ORIGINAL_ALGORITHM)
  
    tokens = word_tokenize(text)
    splitted_words_without_stops = [word for word in tokens if not word in stop_words]
    splitted_words_without_puncs = [word for word in splitted_words_without_stops if word.isalnum()] 
    stemmed_words = [stemmer.stem(word) for word in splitted_words_without_puncs]

    return ' '.join(stemmed_words)

# carregar o modelo de Naive Bayes treinado
pipe_NB_loaded = joblib.load('naive_bayes_model.pkl')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Igor\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Igor\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
import pandas as pd

df = pd.read_csv('C:\\data_to_analyze.csv')
df['TOTAL_REVIEW'] = df['TOTAL_REVIEW'].fillna('')

sentiments = []
for review in df['TOTAL_REVIEW']:
    sentiment = pipe_NB_loaded.predict([text_preprocessing(str(review))])[0]
    if sentiment == 1:
        sentiments.append('Positive')
    elif sentiment == 0:
        sentiments.append('Neutral')
    else:
        sentiments.append('Negative')

df['sentiment'] = sentiments

df.head()


Unnamed: 0,HOTEL_NAME,HOTEL_ADDRESS,AVEREGE_SCORE,LAT,LNG,REVIEW_DATE,REVIEWER_SCORE,TOTAL_REVIEW,POSITIVE_REVIEW,NEGATIVE_REVIEW,TAGS,sentiment
0,A La Villa Madame,44 Rue Madame 6th arr 75006 Paris France,8.8,48.848861,2.331526,5/29/2017,10.0,The bed was extra comfy the street is really ...,The bed was extra comfy the street is really ...,,"[' Leisure trip ', ' Solo traveler ', ' Classi...",Positive
1,A La Villa Madame,44 Rue Madame 6th arr 75006 Paris France,8.8,48.848861,2.331526,8/15/2015,10.0,no dogs allowed policy Adorable little hotel ...,Adorable little hotel beautifully decorated c...,no dogs allowed policy,"[' Leisure trip ', ' Solo traveler ', ' Deluxe...",Positive
2,A La Villa Madame,44 Rue Madame 6th arr 75006 Paris France,8.8,48.848861,2.331526,8/26/2015,9.6,Everything was above expectation Will definit...,Everything was above expectation Will definit...,,"[' Leisure trip ', ' Couple ', ' Executive Dou...",Positive
3,A La Villa Madame,44 Rue Madame 6th arr 75006 Paris France,8.8,48.848861,2.331526,9/30/2015,9.6,Next to an elementary school noisy from 8 30 ...,Pleasant service great breakfast walking dist...,Next to an elementary school noisy from 8 30 ...,"[' Leisure trip ', ' Couple ', ' Deluxe Double...",Positive
4,A La Villa Madame,44 Rue Madame 6th arr 75006 Paris France,8.8,48.848861,2.331526,10/13/2015,7.9,Location et breakfast There is a Italian rest...,Location et breakfast There is a Italian rest...,,"[' Leisure trip ', ' Couple ', ' Classic Room ...",Positive


In [3]:
df.to_csv('data_analyzed_nb.csv', index=False)