# Recommendation System 1.0 (Current status)

## Downloading & Cleaning Data

### Downloading Original Dataset from Kaggle

In [1]:
!pip install opendatasets



In [2]:
# Eric's Kaggle API key
# "username":"ericthedataguy",
# "key":"875c0de0faea2fefa081c78eb470a347"
import opendatasets as od
import pandas as pd

od.download(
    "https://www.kaggle.com/datasets/jiashenliu/515k-hotel-reviews-data-in-europe")

df = pd.read_csv("515k-hotel-reviews-data-in-europe/Hotel_Reviews.csv")

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username:Your Kaggle Key:Downloading 515k-hotel-reviews-data-in-europe.zip to ./515k-hotel-reviews-data-in-europe


100%|██████████| 45.1M/45.1M [00:01<00:00, 47.2MB/s]





### Fill in geographical data using Google API

In [3]:
from geopy.geocoders import GoogleV3
import pandas as pd
missing_geo_hotel = list(df[df['lat'].isna()]['Hotel_Name'].value_counts().reset_index()['Hotel_Name'])
missing_geo_address = list(df[df['lat'].isna()]['Hotel_Address'].value_counts().reset_index()['Hotel_Address'])

# Create a geocoder object
geolocator = GoogleV3(api_key='AIzaSyCo0MJ4SypoxliSIn-yyNG4F_eCFncRXoU')

# Define a function to get the latitude and longitude of an address
def get_coordinates(address):
    location = geolocator.geocode(address)
    if location:
        return location.latitude, location.longitude
    else:
        return None

# Get the coordinates for each address in missing_geo_address
coordinates = [get_coordinates(address) for address in missing_geo_address]

# Create a new dataframe with hotel name, longitude, and latitude
new_df = pd.DataFrame({'Hotel_Name': missing_geo_hotel, 'Longitude': [coord[1] if coord else None for coord in coordinates], 'Latitude': [coord[0] if coord else None for coord in coordinates]})

In [4]:
# Ensure both key columns are of the same data type, here assuming 'Hotel_Name' column exists in `df`
df['Hotel_Name'] = df['Hotel_Name'].astype(str)  # Adjust if 'Hotel_Name' is the actual column you want to merge on in `df`
new_df['Hotel_Name'] = new_df['Hotel_Name'].astype(str)

# Merge using columns instead of trying to merge an index with a column
df_filled = df.merge(new_df, left_on='Hotel_Name', right_on='Hotel_Name', how='left')
df_filled['Latitude'] = df_filled['Latitude'].fillna(df_filled['lat'])
df_filled['Longitude'] = df_filled['Longitude'].fillna(df_filled['lng'])
df_filled = df_filled.drop(columns=['lat', 'lng'])

In [5]:
df_filled.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 515738 entries, 0 to 515737
Data columns (total 17 columns):
 #   Column                                      Non-Null Count   Dtype  
---  ------                                      --------------   -----  
 0   Hotel_Address                               515738 non-null  object 
 1   Additional_Number_of_Scoring                515738 non-null  int64  
 2   Review_Date                                 515738 non-null  object 
 3   Average_Score                               515738 non-null  float64
 4   Hotel_Name                                  515738 non-null  object 
 5   Reviewer_Nationality                        515738 non-null  object 
 6   Negative_Review                             515738 non-null  object 
 7   Review_Total_Negative_Word_Counts           515738 non-null  int64  
 8   Total_Number_of_Reviews                     515738 non-null  int64  
 9   Positive_Review                             515738 non-null  object 
 

## Preprocessing Data for Recommendation System

### Subsetting random 10k rows (1729)

In [7]:
import random
hotelname = df_filled['Hotel_Name'].unique()
random.seed(42)  # Set the random seed for reproducibility
random_hotel_names = random.sample(list(hotelname), 30)  # Randomly pick 30 hotel names

subset = df_filled[df_filled['Hotel_Name'].isin(random_hotel_names)]  # Filter rows based on the selected hotel names
subset.to_csv('subset.csv', index=False)  # Save the subset to a CSV file

### Extract all tags from Positive_Review for each review

In [None]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# Function to extract keywords/tags from a positive review
def extract_keywords(review):
    tokens = nltk.word_tokenize(review)
    tagged_tokens = nltk.pos_tag(tokens)
    keywords = []
    noun = None
    for word, pos in tagged_tokens:
        if pos.startswith('NN'):  # Look for nouns
            noun = word
        elif pos.startswith('JJ') and noun:  # Look for adjectives describing nouns
            keywords.append(f"{word} {noun}")
            noun = None
    return keywords

subset['Keywords'] = subset['Positive_Review'].apply(extract_keywords)

### Pool tags for each hotel & add hotel name (groupby hotel name)

In [None]:
# Function to extract the noun from a keyword combo of adjective + noun
def extract_noun(keyword):
    tokens = nltk.word_tokenize(keyword)
    tagged_tokens = nltk.pos_tag(tokens)
    nouns = [word for word, pos in tagged_tokens if pos.startswith('NN')]
    return nouns[0] if nouns else None


subset['text_tags'] = subset['Keywords'].apply(lambda keywords: list(set([extract_noun(keyword) for keyword in keywords])))
subset['text_tags'] = subset['text_tags'].apply(lambda tags: [tag.capitalize() if tag is not None else None for tag in tags])

subset['transactions'] = subset.apply(lambda row: [row['Hotel_Name']] + row['text_tags'], axis=1)

### Filter & keep the most popular 10 tags for each hotel

In [None]:
# Find the top 10 tags for each hotel
from collections import Counter

table1 = subset.groupby('Hotel_Name')['transactions'].sum().reset_index()

hotel_lists = {}

for hotel_name, transactions in table1.groupby('Hotel_Name')['transactions']:
    transaction_list = transactions.tolist()
    flattened_list = [item for sublist in transaction_list for item in sublist]
    counter = Counter(flattened_list)
    top_50_elements = [element for element, count in counter.most_common(10)]
    hotel_lists[hotel_name] = top_50_elements

In [None]:
top_tags = pd.DataFrame({
    'Hotel_Name': list(hotel_lists.keys()),
    'Top_Tags': list(hotel_lists.values())
})

merged_table = subset.merge(top_tags, on='Hotel_Name')

# Check each item in the list in transactions and add to a new list if it's in the list in Top_Tags
merged_table['new_transactions'] = merged_table.apply(lambda row: [item for item in row['transactions'] if item in row['Top_Tags']], axis=1)
merged_table.head()

## Recommendation System

### Conduct Market Basket Analysis 

In [None]:
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder


# apply transactionencoder
te = TransactionEncoder()
merged_table['new_transactions'] = merged_table['new_transactions'].apply(lambda x: [item for item in x if item is not None])
te_ary = te.fit(merged_table['new_transactions']).transform(merged_table['new_transactions'])
df_te = pd.DataFrame(te_ary, columns=te.columns_)

# create a unique list of hotel name
antecedents_hotel = merged_table['Hotel_Name'].unique()

# apply Apriori algorithm
frequent_itemsets = apriori(df_te, min_support=0.0000001, use_colnames=True)

# Generate association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.1)
rules['antecedents'] = rules['antecedents'].apply(lambda x: set(x))
rules['consequents'] = rules['consequents'].apply(lambda x: set(x))

# filter rules
single_antecedent_consequent_rules = rules[(rules['antecedents'].map(len) == 1) &
                                           (rules['consequents'].map(len) == 1) &
                                           (rules['antecedents'].apply(lambda x: list(x)[0]).isin(antecedents_hotel))]

rules_table = single_antecedent_consequent_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]

### Function that returns most popular tags for given hotel name

In [None]:
# Function that finds the most popular tags of chosen hotel

def popular_tags(hotel_name):
    print(rules_table[rules_table['antecedents'] == {hotel_name}]['consequents'])

popular_tags('Hotel Arena')

# Recommendation System 2.0 (Improvement)

### Plan for improvement

**Better text-to-tags results**

1. Try n-grams

2. Try TF-IDF

3. Try Sentiment Analysis

**Better user experiences**

1. Word cloud with dropdown

2. Input tags -> output hotel name

3. MAAAAAAAPPPPPPs

**Other issues:**

1. Subset using random seed

2. Adjust rules

3. Choose top tags first, then add hotel name

### Better text-tags results

#### Current version

In [None]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# Function to extract keywords/tags from a positive review
def extract_keywords(review):
    tokens = nltk.word_tokenize(review)
    tagged_tokens = nltk.pos_tag(tokens)
    keywords = []
    noun = None
    for word, pos in tagged_tokens:
        if pos.startswith('NN'):  # Look for nouns
            noun = word
        elif pos.startswith('JJ') and noun:  # Look for adjectives describing nouns
            keywords.append(f"{word} {noun}")
            noun = None
    return keywords

subset['Keywords'] = subset['Positive_Review'].apply(extract_keywords)

In [60]:
hotel_review = subset[['Hotel_Name', 'Positive_Review']]
hotel_review.head()

Unnamed: 0,Hotel_Name,Positive_Review
22321,Les Plumes Hotel,1 The room was well decorated modern small as per Parisian standards however cozy 2 The shower was amazing and had great LED lights to enjoy your long nice shower 3 The room offered a free Samsung phone to make free local and international calls to certain destinations along with free data 4 Staff were great and polite and extremely helpful They were always ready to assist and guide us before we stepped out 5 Great location 3 mins walking distance from Cadet Metro Pink Line which runs through the city various restaurants and cafes Located in 9th arrondissement safe neighbourhood
22322,Les Plumes Hotel,Lovely shower comfy bed
22323,Les Plumes Hotel,Great neighbourhood I did not know so glad to discover it esp rue des Martyrs which has many lovely shops bakeries eateries and grocers and seemed quite family friendly There are three metro lines nearby one gets quickly to any part of the city and the Gare du Nord is a 13 min walk away The hotel is super well equipped the wifi works very well and there is a nice honesty tea coffee bar and everything seems to be thought through for the enjoyment of guests Air conditioning very good not loud at all and you can control it in the room
22324,Les Plumes Hotel,The bed was super comfy the room was pretty decently sized the shower was excellent and it was quiet So it was a little warm in our room it was 86 outside and I asked the front desk about it They turned it down for us and left it that way They re so accomodating and friendly I would book them again in a heartbeat
22325,Les Plumes Hotel,This time we had a good size room clean and modern with a big shower Liked that we could drink in the bar area the wine and champagne bought from the outside shops


In [62]:
import nltk
from nltk import word_tokenize
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('punkt')

hotel_review["text_processed"] = hotel_review["Positive_Review"].str.lower()
hotel_review["text_processed"] = hotel_review["text_processed"].replace('<br />', ' ', regex=True)
hotel_review["text_processed"] = hotel_review["text_processed"].replace('[^\w\d\s]', ' ', regex=True)

# Tokenize the reviews
hotel_review['Tokenize'] = hotel_review['text_processed'].apply(word_tokenize)

# Drop stopwords English
stop_words = set(stopwords.words('english'))
hotel_review['stopwords_drop'] = hotel_review['Tokenize'].apply(lambda x: [item for item in x if item not in stop_words])
hotel_review.head()

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/hakukazuho/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/hakukazuho/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hotel_review["text_processed"] = hotel_review["Positive_Review"].str.lower()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hotel_review["text_processed"] = hotel_review["text_processed"].replace('<br />', ' ', regex=True)
A value 

Unnamed: 0,Hotel_Name,Positive_Review,text_processed,Tokenize,stopwords_drop
22321,Les Plumes Hotel,1 The room was well decorated modern small as per Parisian standards however cozy 2 The shower was amazing and had great LED lights to enjoy your long nice shower 3 The room offered a free Samsung phone to make free local and international calls to certain destinations along with free data 4 Staff were great and polite and extremely helpful They were always ready to assist and guide us before we stepped out 5 Great location 3 mins walking distance from Cadet Metro Pink Line which runs through the city various restaurants and cafes Located in 9th arrondissement safe neighbourhood,1 the room was well decorated modern small as per parisian standards however cozy 2 the shower was amazing and had great led lights to enjoy your long nice shower 3 the room offered a free samsung phone to make free local and international calls to certain destinations along with free data 4 staff were great and polite and extremely helpful they were always ready to assist and guide us before we stepped out 5 great location 3 mins walking distance from cadet metro pink line which runs through the city various restaurants and cafes located in 9th arrondissement safe neighbourhood,"[1, the, room, was, well, decorated, modern, small, as, per, parisian, standards, however, cozy, 2, the, shower, was, amazing, and, had, great, led, lights, to, enjoy, your, long, nice, shower, 3, the, room, offered, a, free, samsung, phone, to, make, free, local, and, international, calls, to, certain, destinations, along, with, free, data, 4, staff, were, great, and, polite, and, extremely, helpful, they, were, always, ready, to, assist, and, guide, us, before, we, stepped, out, 5, great, location, 3, mins, walking, distance, from, cadet, metro, pink, line, which, runs, through, the, city, various, restaurants, and, cafes, located, in, 9th, arrondissement, safe, ...]","[1, room, well, decorated, modern, small, per, parisian, standards, however, cozy, 2, shower, amazing, great, led, lights, enjoy, long, nice, shower, 3, room, offered, free, samsung, phone, make, free, local, international, calls, certain, destinations, along, free, data, 4, staff, great, polite, extremely, helpful, always, ready, assist, guide, us, stepped, 5, great, location, 3, mins, walking, distance, cadet, metro, pink, line, runs, city, various, restaurants, cafes, located, 9th, arrondissement, safe, neighbourhood]"
22322,Les Plumes Hotel,Lovely shower comfy bed,lovely shower comfy bed,"[lovely, shower, comfy, bed]","[lovely, shower, comfy, bed]"
22323,Les Plumes Hotel,Great neighbourhood I did not know so glad to discover it esp rue des Martyrs which has many lovely shops bakeries eateries and grocers and seemed quite family friendly There are three metro lines nearby one gets quickly to any part of the city and the Gare du Nord is a 13 min walk away The hotel is super well equipped the wifi works very well and there is a nice honesty tea coffee bar and everything seems to be thought through for the enjoyment of guests Air conditioning very good not loud at all and you can control it in the room,great neighbourhood i did not know so glad to discover it esp rue des martyrs which has many lovely shops bakeries eateries and grocers and seemed quite family friendly there are three metro lines nearby one gets quickly to any part of the city and the gare du nord is a 13 min walk away the hotel is super well equipped the wifi works very well and there is a nice honesty tea coffee bar and everything seems to be thought through for the enjoyment of guests air conditioning very good not loud at all and you can control it in the room,"[great, neighbourhood, i, did, not, know, so, glad, to, discover, it, esp, rue, des, martyrs, which, has, many, lovely, shops, bakeries, eateries, and, grocers, and, seemed, quite, family, friendly, there, are, three, metro, lines, nearby, one, gets, quickly, to, any, part, of, the, city, and, the, gare, du, nord, is, a, 13, min, walk, away, the, hotel, is, super, well, equipped, the, wifi, works, very, well, and, there, is, a, nice, honesty, tea, coffee, bar, and, everything, seems, to, be, thought, through, for, the, enjoyment, of, guests, air, conditioning, very, good, not, loud, at, all, and, you, can, control, it, ...]","[great, neighbourhood, know, glad, discover, esp, rue, des, martyrs, many, lovely, shops, bakeries, eateries, grocers, seemed, quite, family, friendly, three, metro, lines, nearby, one, gets, quickly, part, city, gare, du, nord, 13, min, walk, away, hotel, super, well, equipped, wifi, works, well, nice, honesty, tea, coffee, bar, everything, seems, thought, enjoyment, guests, air, conditioning, good, loud, control, room]"
22324,Les Plumes Hotel,The bed was super comfy the room was pretty decently sized the shower was excellent and it was quiet So it was a little warm in our room it was 86 outside and I asked the front desk about it They turned it down for us and left it that way They re so accomodating and friendly I would book them again in a heartbeat,the bed was super comfy the room was pretty decently sized the shower was excellent and it was quiet so it was a little warm in our room it was 86 outside and i asked the front desk about it they turned it down for us and left it that way they re so accomodating and friendly i would book them again in a heartbeat,"[the, bed, was, super, comfy, the, room, was, pretty, decently, sized, the, shower, was, excellent, and, it, was, quiet, so, it, was, a, little, warm, in, our, room, it, was, 86, outside, and, i, asked, the, front, desk, about, it, they, turned, it, down, for, us, and, left, it, that, way, they, re, so, accomodating, and, friendly, i, would, book, them, again, in, a, heartbeat]","[bed, super, comfy, room, pretty, decently, sized, shower, excellent, quiet, little, warm, room, 86, outside, asked, front, desk, turned, us, left, way, accomodating, friendly, would, book, heartbeat]"
22325,Les Plumes Hotel,This time we had a good size room clean and modern with a big shower Liked that we could drink in the bar area the wine and champagne bought from the outside shops,this time we had a good size room clean and modern with a big shower liked that we could drink in the bar area the wine and champagne bought from the outside shops,"[this, time, we, had, a, good, size, room, clean, and, modern, with, a, big, shower, liked, that, we, could, drink, in, the, bar, area, the, wine, and, champagne, bought, from, the, outside, shops]","[time, good, size, room, clean, modern, big, shower, liked, could, drink, bar, area, wine, champagne, bought, outside, shops]"


In [29]:
pd.set_option('display.max_colwidth', None)

#### Original

In [56]:
import nltk
from nltk import pos_tag
nltk.download('averaged_perceptron_tagger')

original = hotel_review.copy()
original['Tags'] = original['stopwords_drop'].apply(lambda x: [word for word, pos in pos_tag(x) if pos.startswith('NN')])
original['Tags'] = original['Tags'].apply(lambda x: list(set(x)))

col_list = ['Positive_Review', 'Tags']
original[col_list].head()

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/hakukazuho/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Unnamed: 0,Positive_Review,Tags
22321,1 The room was well decorated modern small as per Parisian standards however cozy 2 The shower was amazing and had great LED lights to enjoy your long nice shower 3 The room offered a free Samsung phone to make free local and international calls to certain destinations along with free data 4 Staff were great and polite and extremely helpful They were always ready to assist and guide us before we stepped out 5 Great location 3 mins walking distance from Cadet Metro Pink Line which runs through the city various restaurants and cafes Located in 9th arrondissement safe neighbourhood,"[staff, mins, standards, distance, cadet, assist, metro, room, polite, data, line, samsung, city, phone, location, restaurants, lights, runs, shower, neighbourhood, calls, destinations]"
22322,Lovely shower comfy bed,"[comfy, bed]"
22323,Great neighbourhood I did not know so glad to discover it esp rue des Martyrs which has many lovely shops bakeries eateries and grocers and seemed quite family friendly There are three metro lines nearby one gets quickly to any part of the city and the Gare du Nord is a 13 min walk away The hotel is super well equipped the wifi works very well and there is a nice honesty tea coffee bar and everything seems to be thought through for the enjoyment of guests Air conditioning very good not loud at all and you can control it in the room,"[lines, discover, wifi, nord, coffee, tea, control, rue, metro, room, part, loud, grocers, hotel, bar, gare, bakeries, enjoyment, everything, min, city, du, air, honesty, guests, neighbourhood, family, des]"
22324,The bed was super comfy the room was pretty decently sized the shower was excellent and it was quiet So it was a little warm in our room it was 86 outside and I asked the front desk about it They turned it down for us and left it that way They re so accomodating and friendly I would book them again in a heartbeat,"[shower, room, desk, book, bed, heartbeat, way, comfy]"
22325,This time we had a good size room clean and modern with a big shower Liked that we could drink in the bar area the wine and champagne bought from the outside shops,"[time, area, wine, shower, shops, size, room, champagne]"


#### Try n-grams

In [64]:
# 3-grams on stopwords_drop
import nltk
from nltk.util import ngrams
from nltk import pos_tag
from nltk.stem import WordNetLemmatizer
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')


grams = hotel_review.copy()
grams['3-grams'] = grams['stopwords_drop'].apply(lambda x: list(ngrams(x, 3)))

grams['3-grams_with_noun'] = grams['3-grams'].apply(lambda x: [gram for gram in x if any(pos.startswith('NN') for _, pos in pos_tag(gram))])
grams['3-grams_with_noun&adj'] = grams['3-grams_with_noun'].apply(lambda x: [gram for gram in x if any(pos.startswith('JJ') for _, pos in pos_tag(gram))])

def flatten_tuples(tuples_list):
    # Use a list comprehension to iterate through each tuple in the list and each word in the tuple
    return [word for a_tuple in tuples_list for word in a_tuple]

# Apply the function to each row in the column
grams['flattened_words'] = grams['3-grams_with_noun&adj'].apply(flatten_tuples)

nltk.download('averaged_perceptron_tagger')

# Function to extract nouns from a list of words
def extract_nouns(words):
    tagged_words = pos_tag(words)
    return [word for word, pos in tagged_words if pos.startswith('NN')]

# Apply the function to each row in the column
grams['Nouns'] = grams['flattened_words'].apply(extract_nouns)
grams['Unique_nouns'] = grams['Nouns'].apply(lambda x: list(set(x)))

# Lemmatize the tags
lemmatizer = WordNetLemmatizer()
grams['Tags'] = grams['Unique_nouns'].apply(lambda x: [lemmatizer.lemmatize(tag) for tag in x])

grams[col_list].head()

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/hakukazuho/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/hakukazuho/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/hakukazuho/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Unnamed: 0,Positive_Review,Tags
22321,1 The room was well decorated modern small as per Parisian standards however cozy 2 The shower was amazing and had great LED lights to enjoy your long nice shower 3 The room offered a free Samsung phone to make free local and international calls to certain destinations along with free data 4 Staff were great and polite and extremely helpful They were always ready to assist and guide us before we stepped out 5 Great location 3 mins walking distance from Cadet Metro Pink Line which runs through the city various restaurants and cafes Located in 9th arrondissement safe neighbourhood,"[light, run, staff, restaurant, shower, samsung, call, guide, room, destination, city, neighbourhood, data, phone, standard, location]"
22322,Lovely shower comfy bed,"[comfy, bed]"
22323,Great neighbourhood I did not know so glad to discover it esp rue des Martyrs which has many lovely shops bakeries eateries and grocers and seemed quite family friendly There are three metro lines nearby one gets quickly to any part of the city and the Gare du Nord is a 13 min walk away The hotel is super well equipped the wifi works very well and there is a nice honesty tea coffee bar and everything seems to be thought through for the enjoyment of guests Air conditioning very good not loud at all and you can control it in the room,"[conditioning, discover, bakery, esp, neighbourhood, family, room, de, tea, control, air, loud, honesty]"
22324,The bed was super comfy the room was pretty decently sized the shower was excellent and it was quiet So it was a little warm in our room it was 86 outside and I asked the front desk about it They turned it down for us and left it that way They re so accomodating and friendly I would book them again in a heartbeat,"[shower, room, desk, way, comfy]"
22325,This time we had a good size room clean and modern with a big shower Liked that we could drink in the bar area the wine and champagne bought from the outside shops,"[time, shower, shop, size, room, champagne]"


In [84]:
from collections import Counter

# Flatten the list of tags
all_tags = [tag for tags in grams['Tags'] for tag in tags]

# Count the frequency of each tag
tag_counts = Counter(all_tags)

# Get the most frequent 50 tags
most_frequent_tags = tag_counts.most_common(50)

# Extract the tags from the most frequent tags list
most_frequent_tags_list = [tag for tag, count in most_frequent_tags]
# Elements to drop
elements_to_drop = ['london', 'everything', 'excellent', 'day', 'perfect', 'victoria', 'money', 
                    'lot', 'nothing', 'stay', 'place', 'time', 'walk', 'night', 'value', 'access', 
                    'choice', 'experience', 'attraction', 'comfy']

# Drop elements from the list
shortened_list = [tag for tag in most_frequent_tags_list if tag not in elements_to_drop]


# Create a new column with only the most frequent tags
grams['Top_Tags'] = grams['Tags'].apply(lambda x: [tag for tag in x if tag in shortened_list])
grams['Top_Tags'] = grams.apply(lambda row: [row['Hotel_Name']] + row['Top_Tags'], axis=1)
grams['Top_Tags'] = grams['Top_Tags'].apply(lambda tags: [tag.capitalize() for tag in tags])

grams[['Hotel_Name', 'Top_Tags']].head()

Unnamed: 0,Hotel_Name,Top_Tags
22321,Les Plumes Hotel,"[Les plumes hotel, Staff, Restaurant, Shower, Room, City, Location]"
22322,Les Plumes Hotel,"[Les plumes hotel, Bed]"
22323,Les Plumes Hotel,"[Les plumes hotel, Room]"
22324,Les Plumes Hotel,"[Les plumes hotel, Shower, Room]"
22325,Les Plumes Hotel,"[Les plumes hotel, Shower, Size, Room]"
