In [35]:
import requests
import csv
from dotenv import load_dotenv
import os
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
import re
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\diogo\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\diogo\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [36]:
viana_axis_hotel = 'AXIS VIANA BUSINESS & SPA HOTEL'
ponte_lima_axis_hotel = 'Axis Ponte de Lima Golf Resort Hotel'
ofir_axis_hotel = 'Axis Ofir Beach Resort Hotel'
braga_axis_hotel = 'Basic Braga by Axis'
vermar_axis_hotel = 'Hotel Axis Vermar Conference & Beach Hotel'
porto_axis_hotel = 'Axis Porto Business & SPA Hotel'
porto_club_axis_hotel = 'Axis Porto Club Hotel'


def get_hotel_reviews(hotel_name, api_key):
    import googlemaps
    """Fetches all reviews for a hotel using the Google Places API, handling pagination."""
    gmaps = googlemaps.Client(key=api_key)

    place_id = gmaps.find_place(
        input=hotel_name,
        input_type='textquery',
        fields=['place_id']
    )['candidates'][0]['place_id']

    all_reviews = []
    next_page_token = None

    while True:
        place_details = gmaps.place(
            place_id,
            fields=['name', 'reviews']
        )

        reviews = place_details['result']['reviews']

        for review in reviews:
            all_reviews.append({
                'review_text': review.get('text', ''),
                'rating': review.get('rating', None)
            })

        if 'next_page_token' in place_details['result']:
            next_page_token = place_details['result']['next_page_token']
        else:
            break

    return all_reviews


def save_reviews_to_csv(reviews, file_name):
    """Saves hotel reviews to a CSV file."""
    with open(file_name, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Review', 'Classification'])
        for review in reviews:
            writer.writerow([review.get('review_text', ''), review.get('rating', '')])

In [37]:
load_dotenv()
API_KEY = os.getenv("MAPS_API_KEY")

viana_reviews = get_hotel_reviews(viana_axis_hotel, API_KEY)
save_reviews_to_csv(viana_reviews, 'viana_reviews.csv')

ponte_lima_reviews = get_hotel_reviews(ponte_lima_axis_hotel, API_KEY)
save_reviews_to_csv(ponte_lima_reviews, 'ponte_lima_reviews.csv')

ofir_reviews = get_hotel_reviews(ofir_axis_hotel, API_KEY)
save_reviews_to_csv(ofir_reviews, 'ofir_reviews.csv')

braga_reviews = get_hotel_reviews(braga_axis_hotel, API_KEY)
save_reviews_to_csv(braga_reviews, 'braga_reviews.csv')

vermar_reviews = get_hotel_reviews(vermar_axis_hotel, API_KEY)
save_reviews_to_csv(vermar_reviews, 'vermar_reviews.csv')

porto_business_reviews = get_hotel_reviews(porto_axis_hotel, API_KEY)
save_reviews_to_csv(porto_business_reviews, 'porto_business_reviews.csv')

porto_club_reviews = get_hotel_reviews(porto_club_axis_hotel, API_KEY)
save_reviews_to_csv(porto_club_reviews, 'porto_club_reviews.csv')

In [38]:
viana_reviews

[{'review_text': 'I have a wonderful stay in this hotel. I got room upgrade, the room is spacious, and has everything I need, such as fridge and kettle. The breakfast and dinner buffets are tasty. The staff in this hotel can speak English and even Chinese very well.\nFree access to the gym and swimming pool, the gym and swimming pool are big and well furnished.\nAbout 20 minutes walk to the train station, and all the tourist attractions.',
  'rating': 5},
 {'review_text': 'Poor quality hotel\n- Their towel policy is just ridiculous. If you are a guest in the hotel, you are told you are not allowed a gym towel, yet they ask you to use one when you are in there? Very rude reception team about it. Then, when you check out, you have to bring the wet towel back down to reception.\n- Room is very hot, uncomfortable bed and lots of wasted space.\n- They then overcharged me nearly 400 euro and refusing to give it back, so I’ve had to claim it back through the bank.',
  'rating': 1},
 {'review_

In [39]:
file_paths = ['viana_reviews.csv', 'ponte_lima_reviews.csv', 'ofir_reviews.csv', 
              'braga_reviews.csv', 'vermar_reviews.csv', 'porto_business_reviews.csv', 'porto_club_reviews.csv']

combined_df = pd.read_csv(file_paths[0])

for file_path in file_paths[1:]:
    temp_df = pd.read_csv(file_path, header=0) 
    combined_df = pd.concat([combined_df, temp_df], ignore_index=True)

output_path = 'combined.csv'
with open(output_path, 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(combined_df.columns)
    writer.writerows(combined_df.values)

for file_path in file_paths:
    if os.path.exists(file_path):
        os.remove(file_path)

In [40]:
def removing_stop_words(text: str):
    # Ensure required NLTK resources are downloaded
    nltk.download('punkt')
    nltk.download('stopwords')
    
    # Initialize stop sword set and stemmer
    stop_words = set(stopwords.words('english'))

    # Validate input type
    if not isinstance(text, str):
        return ''

    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE) # Remove URLs 
    text = re.sub(r'\W', ' ', text) # Remove non-alphanumeric characters (punctuation, special symbols, etc)
    text = re.sub(r'\d+', '', text) # Remove numeric values
    tokenize = word_tokenize(text) 
    words = [word for word in tokenize if word not in stop_words] # Remove stop words

    # Join the processed words back into a single string with spaces
    return ' '.join(words)

In [41]:
file_path = 'combined.csv'
df = pd.read_csv(file_path)

if 'Review' in df.columns:
    # Adding a new column with the original reviews
    df['Original_Review'] = df['Review']
    # Apply the preprocessing function in the data
    df['Review'] = df['Review'].apply(removing_stop_words)

    # Saving the result in a new csv file
    processed_file_path = 'Processed_Project.csv'
    with open(processed_file_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(df.columns) # Writing the headers
        writer.writerows(df.values) # Writing the DataFrame lines

os.remove(file_path) # Removing the original file

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\diogo\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\diogo\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\diogo\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\diogo\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\diogo\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\diogo\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\User

In [42]:
from transformers.utils import logging

logging.set_verbosity_error()

file_path = 'Processed_Project.csv' 
df = pd.read_csv(file_path)

review_column = 'Review'

def analyze_sentiment(text: str):
    """
    Performs sentiment analysis on a given text using a pre-trained model from Hugging Face Transformers.

    Args:
        text (str): The input text to be analyzed

    Returns:
        str: The sentiment label predicted by the model ('POSITIVE' or 'NEGATIVE').env
            Returns 'No analysis' if the input is invalid or null.
    """
    from transformers import pipeline

    # Initialize the sentiment-analysis pipeline
    # The pipeline uses a pre-trained model to classify sentiment (e.g., POSITIVE or NEGATIVE)
    sentiment_pipeline = pipeline(task='sentiment-analysis', device="cuda")

    # Check if the input is a valid, no-null string
    if isinstance(text, str) and pd.notnull(text): 
        # If the input is valid, call the sentiment-analysis pipeline
        # The pipeline returns a list of dictionares; [0] accesses the first result, and ['label'] gets the sentiment label
        return sentiment_pipeline(text)[0]['label'] 
    else:
         # If the input is invalid (not a string or null), return "No analysis"
        return "No analysis"

df['Sentiment'] = df[review_column].apply(analyze_sentiment)
sentiment_map = {'POSITIVE': 1, 'NEGATIVE': 0}
df['Binary_Sentiment'] = df['Sentiment'].map(sentiment_map)

In [43]:
# Saving the results in a new CSV file
output_path = 'Sentiment_analysis.csv'

with open(output_path, 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(df.columns)
    writer.writerows(df.values)

# Deleting the original file
if os.path.exists(file_path):
    os.remove(file_path)