In [4]:
import pandas as pd

data = pd.read_csv("csv/top10_destinations_review_posts.csv")
tourist_destinations = data['tourist_destination']

# Train

In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer
import scipy

tfv = TfidfVectorizer(stop_words = 'english')
tfv_text = tfv.fit_transform(data['lemmatized_words_review_post'].values.astype('U'))

# For Machine Based
from sklearn.model_selection import train_test_split
vectors_train, vectors_test, dest_train, dest_test = train_test_split(tfv_text, tourist_destinations, test_size = 0.1, random_state = 50)

In [6]:
x = tfv_text.toarray()

# For the template for input
df_tfidf_model = pd.DataFrame(x, columns = tfv.get_feature_names())
df_tfidf_model['tourist_destination'] = tourist_destinations
df_tfidf_model



Unnamed: 0,aaaaand,aakyatan,abad,abanao,abandon,abandoned,abby,abd,ability,abit,...,zone,zoned,zoning,zoo,zoom,zt,ztrawberrry,zumba,zzz,tourist_destination
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Burnham Park
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Burnham Park
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Burnham Park
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Burnham Park
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Burnham Park
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9636,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Lion’s Head
9637,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Lion’s Head
9638,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Lion’s Head
9639,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Lion’s Head


# Support Vector Machine

In [7]:
# Will only take around a minute since pkl
from sklearn import svm
svm = svm.SVC(kernel='rbf', probability=True)
svm.fit(vectors_train, dest_train)


SVC(probability=True)

In [8]:
dest_pred = svm.predict(vectors_test)
actual_predict = pd.DataFrame()
actual_predict["actual"] = dest_test
actual_predict["predict"] = dest_pred
actual_predict

Unnamed: 0,actual,predict
5052,Baguio Botanical Garden,Baguio Botanical Garden
3884,Baguio Night Market,Baguio Night Market
3687,Baguio Night Market,Baguio City Market
9026,Lion’s Head,Wright Park
4046,Wright Park,Wright Park
...,...,...
6047,Our Lady of the Atonement Cathedral (Baguio Ca...,Our Lady of the Atonement Cathedral (Baguio Ca...
5811,Baguio Botanical Garden,Baguio Botanical Garden
9162,Lion’s Head,Lion’s Head
7777,Strawberry Farm - Home of Giant Strawberry Cake,Mines View Observation Deck


In [9]:
from sklearn.metrics import classification_report
print(classification_report(dest_test, dest_pred))

# calculate accuracy of class predictions
from sklearn import metrics
print(f'Accuracy -> ', metrics.accuracy_score(dest_test, dest_pred))

                                                                                       precision    recall  f1-score   support

                                                              Baguio Botanical Garden       0.61      0.68      0.64       101
                                                                   Baguio City Market       0.75      0.82      0.78        97
                                                                  Baguio Night Market       0.79      0.85      0.82       112
                                                                        BenCab Museum       0.88      0.72      0.79        98
                                                                         Burnham Park       0.69      0.62      0.65       100
                                                                          Lion’s Head       0.55      0.62      0.58        94
                                                          Mines View Observation Deck       0.62      0.63    

# Proof of Concept

In [11]:
import tkinter as tk 
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from tkinter import ttk
from tkinter import messagebox

root= tk.Tk()

canvas1 = tk.Canvas(root, width = 500, height = 300)
canvas1.pack()

# Year and input box
label2 = tk.Label(root, text='What do you want to do?:')
canvas1.create_window(120, 120, window=label2)

entry2 = tk.Entry (root) # create 2nd entry box
canvas1.create_window(270, 120, window=entry2)

def on_closing():
    if messagebox.askokcancel("Quit", "Do you want to quit?"):
        root.destroy()


def values(): 
    global year 

    import numpy as np

    # Make a dictionary from the columns of the dataframe and make the values 0
    dict_tfidf_model_template = dict.fromkeys(df_tfidf_model.loc[:, df_tfidf_model.columns != 'tourist_destination'].columns, [0])

    # sample input 
    text = str(entry2.get()).lower()
    split = text.split()

    for word in split:
        if word in dict_tfidf_model_template:
            val = dict_tfidf_model_template[word][0]
            val += 1
            val_list = [val]
            dict_tfidf_model_template[word] = val_list
        else:
            pass


    df_tfidf_model_template = pd.DataFrame(dict_tfidf_model_template)

    import scipy
    uns_sparsed_data = scipy.sparse.csr_matrix(df_tfidf_model_template)
    uns_dest_pred = svm.predict_proba(uns_sparsed_data)

    top_n_category_predictions = np.sort(uns_dest_pred)


    dict_comparison = {'Tourist_Destination' : svm.classes_.tolist(), 'Weights' : uns_dest_pred[0].tolist()}

    comparison = pd.DataFrame(dict_comparison)

    top10_destinations_reviews_df = pd.read_csv(r'../csv/cleaned/cleaned_variations_review_posts_features.csv')

    sentiments = top10_destinations_reviews_df['sent_analysis'].unique()
    tourist_destinations = top10_destinations_reviews_df['tourist_destination'].unique()
    tourist_destinations[6] = 'Atonement Cathedral'

    sentiments = {}

    for tourist_destination in tourist_destinations:
        contain_values = top10_destinations_reviews_df[top10_destinations_reviews_df['tourist_destination'].str.contains(tourist_destination)]
        #display(pd.DataFrame(contain_values.sent_analysis.value_counts()))
        sentiments[str(tourist_destination)] = []
        for idx, name in enumerate(contain_values.sent_analysis.value_counts().tolist()):
            sentiments[str(tourist_destination)].append(contain_values.sent_analysis.value_counts()[idx])
    #sentiments

    positive = []
    neutral = []
    negative = []

    for idx, value in sentiments.items():
        positive.append(value[0])
        neutral.append(value[1])
        negative.append(value[2])


    sentiment_analysis_df = pd.DataFrame(zip(tourist_destinations, positive, neutral, negative), columns = ['Tourist Destination', 'Positive', 'Neutral', 'Negative'])

    positive_sentiment_percentage = sentiment_analysis_df.loc[:, "Positive"] / (sentiment_analysis_df.loc[:, "Positive"] + sentiment_analysis_df.loc[:, "Neutral"] + sentiment_analysis_df.loc[:, "Negative"])
    sentiment_analysis_df['Positive Sentiment Percentage'] = positive_sentiment_percentage
    sentiment_analysis_df['Positive_%'] = sentiment_analysis_df['Positive Sentiment Percentage'].map(lambda x: "{:.0%}".format(x))

    hy = comparison.sort_values(by=['Weights'], ascending=False)[0:2].to_numpy()

    first_choice = sentiment_analysis_df[sentiment_analysis_df['Tourist Destination'] == hy[0][0] ]
    second_choice = sentiment_analysis_df[sentiment_analysis_df['Tourist Destination'] == hy[1][0] ]
    recommend = pd.concat([first_choice, second_choice])
    recommend.sort_values(by='Positive Sentiment Percentage', ascending=False)

    recommend_list = list(recommend['Tourist Destination'])
    sentiment_list = list(recommend['Positive Sentiment Percentage'])

    #predicted_list = predicted['Tourist_Destination']
   
    Prediction_result1 = (f'Recommendation 1: {recommend_list[0]}, Overall Sent: {"{:.0%}".format(sentiment_list[0])}')
    Prediction_result2 = (f'Recommendation 2: {recommend_list[1]}, Overall Sent: {"{:.0%}".format(sentiment_list[1])}')

    label_Prediction1 = tk.Label(root, text= Prediction_result1, bg='white')
    canvas1.create_window(260, 215, window=label_Prediction1)
    canvas1.pack(fill="none", expand=True)
    label_Prediction2 = tk.Label(root, text= Prediction_result2, bg='white')
    canvas1.create_window(260, 265, window=label_Prediction2)
    canvas1.pack(fill="none", expand=True)


button1 = tk.Button (root, text='Recommend',command=values, bg='green', fg='white') # button to call the 'values' command above 
canvas1.create_window(270, 150, window=button1)
 
root.protocol("WM_DELETE_WINDOW", on_closing)
root.mainloop()
