In [3]:
import pandas as pd
df = pd.read_csv('/content/subset.csv')

In [None]:
import nltk
from nltk import word_tokenize
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('punkt')

df["text_processed"] = df["Positive_Review"].str.lower()
df["text_processed"] = df["text_processed"].replace('<br />', ' ', regex=True)
df["text_processed"] = df["text_processed"].replace('[^\w\d\s]', ' ', regex=True)

# Tokenize the reviews
df['Tokenize'] = df['text_processed'].apply(word_tokenize)

# Drop stopwords English
stop_words = set(stopwords.words('english'))
df['stopwords_drop'] = df['Tokenize'].apply(lambda x: [item for item in x if item not in stop_words])
df.head()

In [22]:
# 3-grams on stopwords_drop
import nltk
from nltk.util import ngrams
from nltk import pos_tag
from nltk.stem import WordNetLemmatizer
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')


grams = df.copy()
grams['3-grams'] = grams['stopwords_drop'].apply(lambda x: list(ngrams(x, 3)))

grams['3-grams_with_noun'] = grams['3-grams'].apply(lambda x: [gram for gram in x if any(pos.startswith('NN') for _, pos in pos_tag(gram))])
grams['3-grams_with_noun&adj'] = grams['3-grams_with_noun'].apply(lambda x: [gram for gram in x if any(pos.startswith('JJ') for _, pos in pos_tag(gram))])

def flatten_tuples(tuples_list):
    # Use a list comprehension to iterate through each tuple in the list and each word in the tuple
    return [word for a_tuple in tuples_list for word in a_tuple]

# Apply the function to each row in the column
grams['flattened_words'] = grams['3-grams_with_noun&adj'].apply(flatten_tuples)

nltk.download('averaged_perceptron_tagger')

# Function to extract nouns from a list of words
def extract_nouns(words):
    tagged_words = pos_tag(words)
    return [word for word, pos in tagged_words if pos.startswith('NN')]

# Apply the function to each row in the column
grams['Nouns'] = grams['flattened_words'].apply(extract_nouns)
grams['Unique_nouns'] = grams['Nouns'].apply(lambda x: list(set(x)))

# Lemmatize the tags
lemmatizer = WordNetLemmatizer()
grams['Tags'] = grams['Unique_nouns'].apply(lambda x: [lemmatizer.lemmatize(tag) for tag in x])

# col_list = ['Positive_Review', 'Tags']
# grams[col_list].head()

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [25]:
from collections import Counter

# Flatten the list of tags
all_tags = [tag for tags in grams['Tags'] for tag in tags]

# Count the frequency of each tag
tag_counts = Counter(all_tags)

# Get the most frequent 50 tags
most_frequent_tags = tag_counts.most_common(50)

# Extract the tags from the most frequent tags list
most_frequent_tags_list = [tag for tag, count in most_frequent_tags]
# Elements to drop
elements_to_drop = ['london', 'everything', 'excellent', 'day', 'perfect', 'victoria', 'money',
                    'lot', 'nothing', 'stay', 'place', 'time', 'walk', 'night', 'value', 'access',
                    'choice', 'experience', 'attraction', 'comfy']

# Drop elements from the list
shortened_list = [tag for tag in most_frequent_tags_list if tag not in elements_to_drop]


# Create a new column with only the most frequent tags
grams['Top_Tags'] = grams['Tags'].apply(lambda x: [tag for tag in x if tag in shortened_list])
grams['hotel_transactions'] = grams.apply(lambda row: [row['Hotel_Name']] + row['Top_Tags'], axis=1)
grams['hotel_transactions'] = grams['Top_Tags'].apply(lambda tags: [tag.capitalize() for tag in tags])

In [26]:
len(shortened_list)

30

In [27]:
df = grams.copy()

In [31]:
top_nouns = shortened_list
top_nouns

['staff',
 'location',
 'room',
 'hotel',
 'breakfast',
 'bed',
 'station',
 'service',
 'bar',
 'restaurant',
 'clean',
 'bathroom',
 'area',
 'helpful',
 'view',
 'food',
 'city',
 'facility',
 'metro',
 'reception',
 'coffee',
 'drink',
 'shower',
 'price',
 'size',
 'street',
 'train',
 'distance',
 'quality',
 'wifi']

In [29]:
grams

Unnamed: 0,Hotel_Address,Additional_Number_of_Scoring,Review_Date,Average_Score,Hotel_Name,Reviewer_Nationality,Negative_Review,Review_Total_Negative_Word_Counts,Total_Number_of_Reviews,Positive_Review,...,Tokenize,stopwords_drop,3-grams,3-grams_with_noun,3-grams_with_noun&adj,flattened_words,Nouns,Unique_nouns,Top_Tags,hotel_transactions
0,10 rue Lamartine 9th arr 75009 Paris France,42,8/2/2017,8.8,Les Plumes Hotel,Bahrain,Absolutely nothing,4,514,1 The room was well decorated modern small as...,...,"[1, the, room, was, well, decorated, modern, s...","[1, room, well, decorated, modern, small, per,...","[(1, room, well), (room, well, decorated), (we...","[(1, room, well), (room, well, decorated), (sm...","[(small, per, parisian), (per, parisian, stand...","[small, per, parisian, per, parisian, standard...","[standards, standards, lights, shower, room, r...","[guide, runs, lights, data, shower, city, neig...","[shower, city, restaurant, location, room, staff]","[Shower, City, Restaurant, Location, Room, Staff]"
1,10 rue Lamartine 9th arr 75009 Paris France,42,7/20/2017,8.8,Les Plumes Hotel,United Kingdom,Bit small it was noisy outside which I know i...,37,514,Lovely shower comfy bed,...,"[lovely, shower, comfy, bed]","[lovely, shower, comfy, bed]","[(lovely, shower, comfy), (shower, comfy, bed)]","[(lovely, shower, comfy), (shower, comfy, bed)]","[(lovely, shower, comfy), (shower, comfy, bed)]","[lovely, shower, comfy, shower, comfy, bed]","[comfy, comfy, bed]","[bed, comfy]",[bed],[Bed]
2,10 rue Lamartine 9th arr 75009 Paris France,42,7/2/2017,8.8,Les Plumes Hotel,United Kingdom,The room is not only very small but mine on t...,109,514,Great neighbourhood I did not know so glad to...,...,"[great, neighbourhood, i, did, not, know, so, ...","[great, neighbourhood, know, glad, discover, e...","[(great, neighbourhood, know), (neighbourhood,...","[(great, neighbourhood, know), (neighbourhood,...","[(great, neighbourhood, know), (glad, discover...","[great, neighbourhood, know, glad, discover, e...","[neighbourhood, discover, esp, des, bakeries, ...","[discover, control, conditioning, loud, bakeri...",[room],[Room]
3,10 rue Lamartine 9th arr 75009 Paris France,42,6/1/2017,8.8,Les Plumes Hotel,United States of America,No Negative,0,514,The bed was super comfy the room was pretty d...,...,"[the, bed, was, super, comfy, the, room, was, ...","[bed, super, comfy, room, pretty, decently, si...","[(bed, super, comfy), (super, comfy, room), (c...","[(bed, super, comfy), (super, comfy, room), (c...","[(super, comfy, room), (shower, excellent, qui...","[super, comfy, room, shower, excellent, quiet,...","[comfy, room, shower, room, room, desk, way, way]","[desk, way, shower, comfy, room]","[shower, room]","[Shower, Room]"
4,10 rue Lamartine 9th arr 75009 Paris France,42,5/25/2017,8.8,Les Plumes Hotel,United Kingdom,Breakfast a bit bland The scrambled eggs look...,26,514,This time we had a good size room clean and m...,...,"[this, time, we, had, a, good, size, room, cle...","[time, good, size, room, clean, modern, big, s...","[(time, good, size), (good, size, room), (size...","[(time, good, size), (good, size, room), (size...","[(time, good, size), (good, size, room), (room...","[time, good, size, good, size, room, room, cle...","[time, size, size, room, room, shower, shower,...","[size, shower, room, champagne, shops, time]","[size, shower, room]","[Size, Shower, Room]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9531,W hringer Stra e 33 35 09 Alsergrund 1090 Vien...,214,8/16/2015,7.8,Hotel Atlanta,Italy,soundy floor poor Wi Fi,7,2724,Good value for money good position for moving...,...,"[good, value, for, money, good, position, for,...","[good, value, money, good, position, moving, c...","[(good, value, money), (value, money, good), (...","[(good, value, money), (value, money, good), (...","[(good, value, money), (value, money, good), (...","[good, value, money, value, money, good, money...","[value, money, value, money, money, position, ...","[payment, center, value, money, position, park...",[],[]
9532,W hringer Stra e 33 35 09 Alsergrund 1090 Vien...,214,8/13/2015,7.8,Hotel Atlanta,Canada,No air conditioner Bad experience in summer,9,2724,No Positive,...,"[no, positive]",[positive],[],[],[],[],[],[],[],[]
9533,W hringer Stra e 33 35 09 Alsergrund 1090 Vien...,214,8/12/2015,7.8,Hotel Atlanta,Pakistan,nothing,2,2724,nothing,...,[nothing],[nothing],[],[],[],[],[],[],[],[]
9534,W hringer Stra e 33 35 09 Alsergrund 1090 Vien...,214,8/11/2015,7.8,Hotel Atlanta,Romania,It was very very very hot At 5Th floor with n...,21,2724,No Positive,...,"[no, positive]",[positive],[],[],[],[],[],[],[],[]


In [15]:
df['filtered_nouns'] = df['nouns'].apply(lambda nouns_list: [noun for noun in nouns_list if noun in top_nouns])

KeyError: 'nouns'

In [None]:
df['transactions'] = df.apply(lambda row: [row['Hotel_Name']] + row['filtered_nouns'], axis=1)

  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


In [None]:
hotel_transactions = df['transactions']


  and should_run_async(code)


In [None]:
hotel_transactions

  and should_run_async(code)


0       [Les Plumes Hotel, room, room, Staff, Great, l...
1                                      [Les Plumes Hotel]
2                  [Les Plumes Hotel, Great, hotel, room]
3                          [Les Plumes Hotel, room, room]
4                                [Les Plumes Hotel, room]
                              ...                        
9531                                      [Hotel Atlanta]
9532                            [Hotel Atlanta, Positive]
9533                                      [Hotel Atlanta]
9534                            [Hotel Atlanta, Positive]
9535                           [Hotel Atlanta, breakfast]
Name: transactions, Length: 9536, dtype: object

In [None]:
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()
te_ary = te.fit_transform(hotel_transactions)
df_te = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = apriori(df_te, min_support=0.00001, use_colnames=True)

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.00001)

rules['antecedents'] = rules['antecedents'].apply(lambda x: set(x).intersection(set(top_nouns)))
rules = rules[rules['antecedents'].apply(len) > 0]
rules

  and should_run_async(code)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,{Great},(Ako Suites Hotel),0.073301,0.013737,0.001363,0.018598,1.353821,0.000356,1.004953,0.282022
2,{Location},(Ako Suites Hotel),0.113045,0.013737,0.001468,0.012987,0.945375,-0.000085,0.999240,-0.061161
5,{Positive},(Ako Suites Hotel),0.072462,0.013737,0.001154,0.015919,1.158803,0.000158,1.002217,0.147747
7,{Staff},(Ako Suites Hotel),0.072043,0.013737,0.001363,0.018923,1.377468,0.000374,1.005285,0.295305
9,{breakfast},(Ako Suites Hotel),0.082110,0.013737,0.000105,0.001277,0.092968,-0.001023,0.987524,-0.914009
...,...,...,...,...,...,...,...,...,...,...
56804,{Staff},"(staff, Great, Location, rooms, breakfast, hot...",0.072043,0.000105,0.000105,0.001456,13.880640,0.000097,1.001353,1.000000
56805,{rooms},"(staff, Great, Location, Staff, breakfast, hot...",0.066590,0.000105,0.000105,0.001575,15.017323,0.000098,1.001472,1.000000
56806,{breakfast},"(staff, Great, Location, Staff, rooms, hotel, ...",0.082110,0.000105,0.000105,0.001277,12.178799,0.000096,1.001174,1.000000
56807,{hotel},"(staff, Great, Location, Staff, rooms, breakfa...",0.188339,0.000105,0.000105,0.000557,5.309577,0.000085,1.000452,1.000000


In [None]:
# filter
filtered_rules = rules[rules['antecedents'].apply(lambda x: any(noun in x for noun in top_nouns))]
consequents_hotel = df['Hotel_Name'].unique()
single_antecedent_consequent_rules = filtered_rules[(rules['antecedents'].map(len) == 1) &
                                           (rules['consequents'].map(len) == 1) &
                                           (rules['consequents'].apply(lambda x: list(x)[0]).isin(consequents_hotel))]

  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])


In [None]:
single_antecedent_consequent_rules

  and should_run_async(code)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,{Great},(Ako Suites Hotel),0.073301,0.013737,0.001363,0.018598,1.353821,0.000356,1.004953,0.282022
2,{Location},(Ako Suites Hotel),0.113045,0.013737,0.001468,0.012987,0.945375,-0.000085,0.999240,-0.061161
5,{Positive},(Ako Suites Hotel),0.072462,0.013737,0.001154,0.015919,1.158803,0.000158,1.002217,0.147747
7,{Staff},(Ako Suites Hotel),0.072043,0.013737,0.001363,0.018923,1.377468,0.000374,1.005285,0.295305
9,{breakfast},(Ako Suites Hotel),0.082110,0.013737,0.000105,0.001277,0.092968,-0.001023,0.987524,-0.914009
...,...,...,...,...,...,...,...,...,...,...
631,{hotel},(W London Leicester Square),0.188339,0.027999,0.005348,0.028396,1.014189,0.000075,1.000409,0.017237
633,{location},(W London Leicester Square),0.269820,0.027999,0.007865,0.029149,1.041062,0.000310,1.001184,0.054017
635,{room},(W London Leicester Square),0.200713,0.027999,0.005558,0.027691,0.988983,-0.000062,0.999683,-0.013745
637,{rooms},(W London Leicester Square),0.066590,0.027999,0.002202,0.033071,1.181138,0.000338,1.005245,0.164299


In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output

dropdown = widgets.Dropdown(options=top_nouns, description='Noun :')


button = widgets.Button(description='Recommend Hotels')

output = widgets.Output()

def recommend_hotels(b):
    with output:
        clear_output(wait=True)
        selected_nouns = {dropdown.value}
        matching_rules = single_antecedent_consequent_rules[single_antecedent_consequent_rules['antecedents'].apply(lambda antecedents: antecedents.issubset(selected_nouns))]
        matching_rules = matching_rules.sort_values(by='confidence', ascending=False)
        if not matching_rules.empty:
            for hotel in matching_rules['consequents'].iloc[0]:
                print(f"Recommended hotel: {hotel}")
        else:
            print("No recommendations available for the selected nouns.")

button.on_click(recommend_hotels)
display(dropdown, button, output)


  and should_run_async(code)


Dropdown(description='Noun :', options=('staff', 'location', 'room', 'hotel', 'Location', 'breakfast', 'Great'…

Button(description='Recommend Hotels', style=ButtonStyle())

Output()