# **Libraries**

In [39]:
from google.colab import drive
import pandas as pd
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from collections import Counter
import matplotlib.pyplot as plt
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
import string
nltk.download('punkt')
nltk.download('stopwords')


nltk.download('vader_lexicon')
drive.mount('/content/drive')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **Connection to Data**

In [22]:
data_df = pd.read_excel('/content/drive/MyDrive/Courses/Specialization/Analytical Modelling/Dataset.xlsx', sheet_name='Sheet1')


# **Initialize model and creation of sentiment analysis function**


In [23]:
sia = SentimentIntensityAnalyzer()

def classify_sentiment(review):
    scores = sia.polarity_scores(review)
    compound_score = scores['compound']
    if compound_score >= 0.05:
        return 'Positive'
    elif compound_score <= -0.05:
        return 'Negative'
    else:
        return 'Neutral'

# **Apply new function and create column with classifications**

In [27]:
data_df['Review'] = data_df['Review'].astype(str)
data_df['Sentiment_Analysis'] = data_df['Review'].apply(classify_sentiment)

# **Clean text in order to analyze most present words**

In [40]:
stop_words = set(stopwords.words('english'))

In [30]:
def limpiar_texto(texto):
    texto = texto.lower()
    texto = ''.join([char for char in texto if char not in string.punctuation])
    tokens = word_tokenize(texto)
    tokens = [word for word in tokens if word not in stop_words]
    return tokens


# **Separate the data set into 2 (One for Positive reviews, other for Negative reviews)**

In [32]:
positive_reviews_df = data_df[data_df['Sentiment_Analysis'] == 'Positive']
negative_reviews_df = data_df[data_df['Sentiment_Analysis'] == 'Negative']

# **Join all reviews for each data set and recover the TOP 30 most present words**

In [33]:
positive_reviews_text = ' '.join(positive_reviews_df['Review'])
negative_reviews_text = ' '.join(negative_reviews_df['Review'])

In [41]:
positive_tokens = limpiar_texto(positive_reviews_text)
negative_tokens = limpiar_texto(negative_reviews_text)

In [42]:
positive_word_counts = Counter(positive_tokens)
negative_word_counts = Counter(negative_tokens)

In [47]:
most_common_positive = positive_word_counts.most_common(30)
most_common_negative = negative_word_counts.most_common(30)

In [48]:
print("Most Common Positive Words:")
for word, count in most_common_positive:
    print(f"{word}: {count}")

print("\nMost Common Negative Words:")
for word, count in most_common_negative:
    print(f"{word}: {count}")

Most Common Positive Words:
food: 123
good: 123
service: 101
’: 60
great: 59
taco: 55
place: 45
bell: 45
nice: 35
excellent: 34
fast: 33
time: 31
like: 30
clean: 30
order: 29
staff: 28
restaurant: 26
well: 25
us: 25
best: 24
love: 24
always: 24
drive: 24
delicious: 22
amazing: 22
thru: 21
better: 20
friendly: 20
tacos: 20
would: 19

Most Common Negative Words:
order: 118
service: 78
’: 71
taco: 70
food: 70
bad: 58
place: 56
bell: 56
dont: 56
drive: 55
one: 51
worst: 49
never: 46
people: 43
rude: 40
time: 40
terrible: 39
thru: 38
ever: 34
even: 34
orders: 34
like: 32
get: 32
customer: 31
horrible: 30
always: 29
employees: 29
go: 28
location: 27
manager: 26


# **Create new dataframes with frequency for each word in order to analyze in Tableau, after we export the data**

In [49]:
positive_df = pd.DataFrame(most_common_positive, columns=['Word', 'Frequency'])
negative_df = pd.DataFrame(most_common_negative, columns=['Word', 'Frequency'])

In [51]:
file_path = '/content/drive/My Drive/positive_reviews_words.xlsx'

# Exportar el DataFrame a un archivo Excel
positive_df.to_excel(file_path, index=False)

In [50]:
file_path = '/content/drive/My Drive/negative_reviews_words.xlsx'

# Exportar el DataFrame a un archivo Excel
negative_df.to_excel(file_path, index=False)

In [52]:
file_path = '/content/drive/My Drive/dataset.xlsx'

# Exportar el DataFrame a un archivo Excel
data_df.to_excel(file_path, index=False)