In [123]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import openai
import re
import time
import os

openai.api_key = os.environ.get("OPENAI_API_KEY")

Dataset Description:
- Hotel_Address: Address of hotel.
- Review_Date: Date when reviewer posted the corresponding review.
- Average_Score: Average Score of the hotel, calculated based on the latest comment in the last year.
- Hotel_Name: Name of Hotel
- Reviewer_Nationality: Nationality of Reviewer
- Negative_Review: Negative Review the reviewer gave to the hotel. If the reviewer does not give the negative review, then it should be: 'No Negative'
- Review_Total_Negative_Word_Counts: Total number of words in the negative review.
- Positive_Review: Positive Review the reviewer gave to the hotel. If the reviewer does not give the negative review, then it should be: 'No Positive'
- Review_Total_Positive_Word_Counts: Total number of words in the positive review.
- Reviewer_Score: Score the reviewer has given to the hotel, based on his/her experience
- Total_Number_of_Reviews_Reviewer_Has_Given: Number of Reviews the reviewers has given in the past.
- Total_Number_of_Reviews: Total number of valid reviews the hotel has.
- Tags: Tags reviewer gave the hotel.
- days_since_review: Duration between the review date and scrape date.
- Additional_Number_of_Scoring: There are also some guests who just made a scoring on the service rather than a review. This number indicates how many valid scores without review in there.
- lat: Latitude of the hotel
- lng: longtitude of the hotel

https://www.kaggle.com/datasets/jiashenliu/515k-hotel-reviews-data-in-europe

In [124]:
reviews = pd.read_csv('data/reviews.csv')

reviews.head()

Unnamed: 0,Hotel_Address,Additional_Number_of_Scoring,Review_Date,Average_Score,Hotel_Name,Reviewer_Nationality,Negative_Review,Review_Total_Negative_Word_Counts,Total_Number_of_Reviews,Positive_Review,Review_Total_Positive_Word_Counts,Total_Number_of_Reviews_Reviewer_Has_Given,Reviewer_Score,Tags,days_since_review,lat,lng
0,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,194,8/3/2017,7.7,Hotel Arena,Russia,I am so angry that i made this post available...,397,1403,Only the park outside of the hotel was beauti...,11,7,2.9,"[' Leisure trip ', ' Couple ', ' Duplex Double...",0 days,52.360576,4.915968
1,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,194,8/3/2017,7.7,Hotel Arena,Ireland,No Negative,0,1403,No real complaints the hotel was great great ...,105,7,7.5,"[' Leisure trip ', ' Couple ', ' Duplex Double...",0 days,52.360576,4.915968
2,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,194,7/31/2017,7.7,Hotel Arena,Australia,Rooms are nice but for elderly a bit difficul...,42,1403,Location was good and staff were ok It is cut...,21,9,7.1,"[' Leisure trip ', ' Family with young childre...",3 days,52.360576,4.915968
3,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,194,7/31/2017,7.7,Hotel Arena,United Kingdom,My room was dirty and I was afraid to walk ba...,210,1403,Great location in nice surroundings the bar a...,26,1,3.8,"[' Leisure trip ', ' Solo traveler ', ' Duplex...",3 days,52.360576,4.915968
4,s Gravesandestraat 55 Oost 1092 AA Amsterdam ...,194,7/24/2017,7.7,Hotel Arena,New Zealand,You When I booked with your company on line y...,140,1403,Amazing location and building Romantic setting,8,3,6.7,"[' Leisure trip ', ' Couple ', ' Suite ', ' St...",10 days,52.360576,4.915968


In [125]:
# Get the hotel with the most reviews
reviews['Hotel_Name'].value_counts().head(1)

Britannia International Hotel Canary Wharf    4789
Name: Hotel_Name, dtype: int64

In [126]:
# Keep only the reviews for the hotel with the most reviews
reviews = reviews[reviews['Hotel_Name'] == 'Britannia International Hotel Canary Wharf']

reviews = reviews.drop(['Hotel_Name', 'Hotel_Address', 'Additional_Number_of_Scoring', 'Total_Number_of_Reviews', 'days_since_review', 'lat', 'lng', 'Review_Total_Positive_Word_Counts', 'Review_Total_Negative_Word_Counts'], axis=1)

# Rename the columns to lowercase
reviews.columns = reviews.columns.str.lower()

reviews.reset_index(drop=True, inplace=True)

reviews["positive_touchpoints"] = None
reviews["negative_touchpoints"] = None

reviews.head()

Unnamed: 0,review_date,average_score,reviewer_nationality,negative_review,positive_review,total_number_of_reviews_reviewer_has_given,reviewer_score,tags,positive_touchpoints,negative_touchpoints
0,8/3/2017,7.1,United Kingdom,The car park was small and unpleasant People ...,The location was excellent for getting to the O2,3,7.9,"[' Leisure trip ', ' Group ', ' Standard Doubl...",,
1,8/3/2017,7.1,United Kingdom,We weren t told that the only spa facility op...,The house keeping lady made my boyfriends day...,3,8.3,"[' Leisure trip ', ' Couple ', ' Standard Doub...",,
2,8/2/2017,7.1,United Kingdom,I asked how far the O2 was and got told a 7 m...,No Positive,1,6.3,"[' Leisure trip ', ' Solo traveler ', ' Standa...",,
3,8/2/2017,7.1,United Kingdom,Hot stuffy room air con not working properly ...,The bed was OK,2,5.4,"[' Couple ', ' Standard Twin Room ', ' Stayed ...",,
4,8/2/2017,7.1,United Kingdom,Although the price seems like it is cheap you...,The Receptionists and Cleaners are very polit...,4,6.3,"[' Leisure trip ', ' Family with young childre...",,


We are performing a digital transformation of our hotel. In order to do so we are evaluating reviews from clients. We have the following customer touch points:
-1 Hotel search and selection
-2 Room booking
-3 Booking confirmation
-4 Reception and registration
-5 Room assignment
-6 Room key handing
-7 Room service and personnel
-8 Facilities and room coditions and usage
-9 Customer service
-10 Food and drinks
-11 Payment
-12 Comments and reviewing
-13 Room key return and leave

Answer which of these touchpoints are mentioned in the following review. Answer with only numbers, or say "None" if none apply. Do not include any other text in the answer:

The Receptionists and Cleaners are very polite friendly and hard working staff We enjoyed our stay and would come again Rooms were clean and had enough amenities or one could request anytime without a problem

In [127]:
# Show the first positive reviews
with pd.option_context('display.max_colwidth', None):
    display(reviews['positive_review'].head(n=10))

0                                                                                                                                                                                                                                               The location was excellent for getting to the O2
1                                                                                                                                                                                                                           The house keeping lady made my boyfriends day with how funny she was
2                                                                                                                                                                                                                                                                                    No Positive
3                                                                                                                                    

In [128]:
# Show the first negative reviews
with pd.option_context('display.max_colwidth', None):
    display(reviews['negative_review'].head(n=10))

0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        The car park was small and unpleasant People with Mercedes and BMWs took over 2 spaces We were lucky to get a space after driving around about 10 times 
1                                                                                                                                                                                     

In [None]:
positive_touchpoint_scores = np.zeros((13)).astype(int)
negative_touchpoint_scores = np.zeros((13)).astype(int)

for i in range(0, len(reviews)):
        
    review_text = reviews['positive_review'][i]
    if review_text == 'No Positive':
        continue
    
    while True:
        try:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "You are a helpful abedient assistant"},
                    {"role": "user", "content": f"We are performing a digital transformation of our hotel in England. In order to do so we are evaluating reviews from clients. We have the following customer touch points:\n-1 Hotel search and selection\n-2 Room booking\n-3 Booking confirmation\n-4 Reception and registration\n-5 Room assignment\n-6 Room key handing\n-7 Room service and personnel\n-8 Facilities and room coditions and usage\n-9 Customer service\n-10 Food and drinks\n-11 Payment\n-12 Comments and reviewing\n-13 Room key return and leave\n\nAnswer which of these touchpoints are mentioned in the following review. Answer with only numbers, or say \"None\" if none apply. Do not include any other text in the answer:\n\n{review_text}"}
                ],
                temperature=0.2,
                max_tokens=50,
                top_p=1,
                frequency_penalty=0.0,
                presence_penalty=0.0
            )
            break
        except Exception as e:
            print(f"Review {i} failed, retrying in 60 seconds: {e}")
            time.sleep(60)

    try:
        touchpoints_idxs = [int(s) for s in re.findall(r'\d+', response["choices"][0]["message"]["content"])]
    except Exception as e:
        print(e)
        continue
    
    reviews['positive_touchpoints'][i] = touchpoints_idxs if touchpoints_idxs else None

    for touchpoint_idx in touchpoints_idxs:
        positive_touchpoint_scores[touchpoint_idx-1] += 1

In [131]:
reviews.head()


Unnamed: 0,review_date,average_score,reviewer_nationality,negative_review,positive_review,total_number_of_reviews_reviewer_has_given,reviewer_score,tags,positive_touchpoints,negative_touchpoints
0,8/3/2017,7.1,United Kingdom,The car park was small and unpleasant People ...,The location was excellent for getting to the O2,3,7.9,"[' Leisure trip ', ' Group ', ' Standard Doubl...",[1],
1,8/3/2017,7.1,United Kingdom,We weren t told that the only spa facility op...,The house keeping lady made my boyfriends day...,3,8.3,"[' Leisure trip ', ' Couple ', ' Standard Doub...","[7, 8]",
2,8/2/2017,7.1,United Kingdom,I asked how far the O2 was and got told a 7 m...,No Positive,1,6.3,"[' Leisure trip ', ' Solo traveler ', ' Standa...",,
3,8/2/2017,7.1,United Kingdom,Hot stuffy room air con not working properly ...,The bed was OK,2,5.4,"[' Couple ', ' Standard Twin Room ', ' Stayed ...",[8],
4,8/2/2017,7.1,United Kingdom,Although the price seems like it is cheap you...,The Receptionists and Cleaners are very polit...,4,6.3,"[' Leisure trip ', ' Family with young childre...","[4, 7, 8]",


In [133]:
reviews.to_csv('data/reviews_with_touchpoints_positive.csv', index=False)

In [None]:

for i in range(0, len(reviews)):
    review_text = reviews['negative_review'][i]
    if review_text == 'No Negative':
        continue

    while True:
        try:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "You are a helpful abedient assistant"},
                    {"role": "user", "content": f"We are performing a digital transformation of our hotel in England. In order to do so we are evaluating reviews from clients. We have the following customer touch points:\n-1 Hotel search and selection\n-2 Room booking\n-3 Booking confirmation\n-4 Reception and registration\n-5 Room assignment\n-6 Room key handing\n-7 Room service and personnel\n-8 Facilities and room coditions and usage\n-9 Customer service\n-10 Food and drinks\n-11 Payment\n-12 Comments and reviewing\n-13 Room key return and leave\n\nAnswer which of these touchpoints are mentioned in the following review. Answer with only numbers, or say \"None\" if none apply. Do not include any other text in the answer:\n\n{review_text}"}
                ],
                temperature=0.2,
                max_tokens=50,
                top_p=1,
                frequency_penalty=0.0,
                presence_penalty=0.0
            )
            break
        except Exception as e:
            print(f"Review {i} failed, retrying in 60 seconds: {e}")
            time.sleep(60)

    try:
        touchpoints_idxs = [int(s) for s in re.findall(r'\d+', response["choices"][0]["message"]["content"])]
    except Exception as e:
        print(e)
        continue
    
    reviews['negative_touchpoints'][i] = touchpoints_idxs if touchpoints_idxs else None

    for touchpoint_idx in touchpoints_idxs:
        negative_touchpoint_scores[touchpoint_idx-1] += 1

print(negative_touchpoint_scores)

In [139]:
reviews.head()


Unnamed: 0,review_date,average_score,reviewer_nationality,negative_review,positive_review,total_number_of_reviews_reviewer_has_given,reviewer_score,tags,positive_touchpoints,negative_touchpoints
0,8/3/2017,7.1,United Kingdom,The car park was small and unpleasant People ...,The location was excellent for getting to the O2,3,7.9,"[' Leisure trip ', ' Group ', ' Standard Doubl...",[1],[8]
1,8/3/2017,7.1,United Kingdom,We weren t told that the only spa facility op...,The house keeping lady made my boyfriends day...,3,8.3,"[' Leisure trip ', ' Couple ', ' Standard Doub...","[7, 8]",[8]
2,8/2/2017,7.1,United Kingdom,I asked how far the O2 was and got told a 7 m...,No Positive,1,6.3,"[' Leisure trip ', ' Solo traveler ', ' Standa...",,
3,8/2/2017,7.1,United Kingdom,Hot stuffy room air con not working properly ...,The bed was OK,2,5.4,"[' Couple ', ' Standard Twin Room ', ' Stayed ...",[8],[8]
4,8/2/2017,7.1,United Kingdom,Although the price seems like it is cheap you...,The Receptionists and Cleaners are very polit...,4,6.3,"[' Leisure trip ', ' Family with young childre...","[4, 7, 8]","[8, 10, 11]"


In [140]:
reviews.to_csv('data/reviews_with_touchpoints.csv', index=False)

In [152]:
touchpoint_diff = positive_touchpoint_scores - negative_touchpoint_scores
# Normalize the touchpoint diff to be between 0 and 1
normalized_touchpoint_diff = (touchpoint_diff - touchpoint_diff.min()) / (touchpoint_diff.max() - touchpoint_diff.min())

np.round(normalized_touchpoint_diff, 2)

array([1.  , 0.83, 0.85, 0.68, 0.79, 0.87, 0.81, 0.  , 0.62, 0.62, 0.68,
       0.7 , 0.81])

| Index | Touchpoint | Score | Assessment |
| ------|------------|-------|------------|
| 1 | Hotel search and selection | 100% | 😄  |
| 2 | Room booking | 83% |  😄 |
| 3 | Booking confirmation | 85% | 😄 |
| 4 | Reception and registration | 68% | 😒  |
| 5 | Room assignment | 79% | 😒 |
| 6 | Room key handing | 87% | 😄 |
| 7 | Room service and personnel | 81% | 😄 |
| 8 | Facilities and room coditions and usage | 0% | 😞 |
| 9 | Customer service | 62% | 😒 |
| 10 | Food and drinks | 62% | 😒 |
| 11 | Payment | 68% | 😒 |
| 12 | Comments and reviewing | 70% | 😒 |
| 13 | Room key return and leave | 81% | 😄 |