In [None]:
import requests
import csv
from dotenv import load_dotenv
import os
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from transformers import pipeline
import re
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\diogo\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\diogo\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
viana_axis_hotel = 'AXIS VIANA BUSINESS & SPA HOTEL'
ponte_lima_axis_hotel = 'Axis Ponte de Lima Golf Resort Hotel'
ofir_axis_hotel = 'Axis Ofir Beach Resort Hotel'
braga_axis_hotel = 'Basic Braga by Axis'
vermar_axis_hotel = 'Hotel Axis Vermar Conference & Beach Hotel'
porto_axis_hotel = 'Axis Porto Business & SPA Hotel'
porto_club_axis_hotel = 'Axis Porto Club Hotel'


def get_hotel_reviews(hotel_name, api_key):
    import googlemaps
    """Fetches all reviews for a hotel using the Google Places API, handling pagination."""
    gmaps = googlemaps.Client(key=api_key)

    place_id = gmaps.find_place(input=hotel_name, input_type='textquery', fields=['place_id'])['candidates'][0]['place_id']
    all_reviews = []
    next_page_token = None

    while True:
        place_details = gmaps.place(place_id, fields=['name', 'reviews'])
        reviews = place_details['result']['reviews']
        all_reviews.extend(reviews)

        if 'next_page_token' in place_details['result']:
            next_page_token = place_details['result']['next_page_token']
        else:
            break

    return all_reviews


def save_reviews_to_csv(reviews, file_name):
    """Saves hotel reviews to a CSV file."""
    with open(file_name, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Review']) 
        for review in reviews:
            writer.writerow([review['text']]) 

In [None]:
load_dotenv()
API_KEY = os.getenv("MAPS_API_KEY")

viana_reviews = get_hotel_reviews(viana_axis_hotel, API_KEY)
save_reviews_to_csv(viana_reviews, 'viana_reviews.csv')

ponte_lima_reviews = get_hotel_reviews(ponte_lima_axis_hotel, API_KEY)
save_reviews_to_csv(ponte_lima_reviews, 'ponte_lima_reviews.csv')

ofir_reviews = get_hotel_reviews(ofir_axis_hotel, API_KEY)
save_reviews_to_csv(ofir_reviews, 'ofir_reviews.csv')

braga_reviews = get_hotel_reviews(braga_axis_hotel, API_KEY)
save_reviews_to_csv(braga_reviews, 'braga_reviews.csv')

vermar_reviews = get_hotel_reviews(vermar_axis_hotel, API_KEY)
save_reviews_to_csv(vermar_reviews, 'vermar_reviews.csv')

porto_business_reviews = get_hotel_reviews(porto_axis_hotel, API_KEY)
save_reviews_to_csv(porto_business_reviews, 'porto_business_reviews.csv')

porto_club_reviews = get_hotel_reviews(porto_club_axis_hotel, API_KEY)
save_reviews_to_csv(porto_club_reviews, 'porto_club_reviews.csv')

In [30]:
file_paths = ['viana_reviews.csv', 'ponte_lima_reviews.csv', 'ofir_reviews.csv', 
              'braga_reviews.csv', 'vermar_reviews.csv', 'porto_business_reviews.csv', 'porto_club_reviews.csv']

combined_df = pd.read_csv(file_paths[0])

for file_path in file_paths[1:]:
    temp_df = pd.read_csv(file_path, header=0) 
    combined_df = pd.concat([combined_df, temp_df], ignore_index=True)

output_path = 'combined.csv'
with open(output_path, 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(combined_df.columns)
    writer.writerows(combined_df.values)

for file_path in file_paths:
    if os.path.exists(file_path):
        os.remove(file_path)

In [31]:
stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()

def preprocess_text(text):
    if not isinstance(text, str):
        return ''
    text = text.lower() 
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE) 
    text = re.sub(r'\W', ' ', text)  
    text = re.sub(r'\d+', '', text)  
    words = word_tokenize(text) 
    words = [word for word in words if word not in stop_words] 
    words = [stemmer.stem(word) for word in words]  
    return ' '.join(words)

In [32]:
file_path = 'combined.csv'
df = pd.read_csv(file_path)

if 'Review' in df.columns:
    # Adding a new column with the original reviews
    df['Original_Review'] = df['Review']
    # Apply the preprocessing function in the data
    df['Review'] = df['Review'].apply(preprocess_text)

    # Saving the result in a new csv file
    processed_file_path = 'Processed_Project.csv'
    with open(processed_file_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(df.columns) # Writing the headers
        writer.writerows(df.values) # Writing the DataFrame lines

os.remove(file_path) # Removing the original file

In [33]:
file_path = 'Processed_Project.csv' 
df = pd.read_csv(file_path)

review_column = 'Review'

sentiment_pipeline = pipeline('sentiment-analysis')

def analyze_sentiment(text):
    if isinstance(text, str) and pd.notnull(text): # Check if the input is a string and a not-null value
        return sentiment_pipeline(text)[0]['label'] # Calling the pipeline from transformers and returns the sentiment analysis
    else:
        return "No analysis" # If the input is not a string or a null (NaN/None) value, does nothing

df['Sentiment'] = df[review_column].apply(analyze_sentiment)
sentiment_map = {'POSITIVE': 1, 'NEGATIVE': 0}
df['Binary_Sentiment'] = df['Sentiment'].map(sentiment_map)

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Device set to use cuda:0


In [35]:
# Saving the results in a new CSV file
output_path = 'Sentiment_analysis.csv'

with open(output_path, 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(df.columns)
    writer.writerows(df.values)

# Deleting the original file
if os.path.exists(file_path):
    os.remove(file_path)