# Recommendation System Item Based Collaborative Filtering

In [1]:
# ! pip install surprise
! pip install nlp-id

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting nlp-id
  Downloading nlp_id-0.1.12.0.tar.gz (7.9 MB)
[K     |████████████████████████████████| 7.9 MB 16.2 MB/s 
[?25hCollecting scikit-learn==0.22
  Downloading scikit_learn-0.22-cp37-cp37m-manylinux1_x86_64.whl (7.0 MB)
[K     |████████████████████████████████| 7.0 MB 41.9 MB/s 
[?25hCollecting nltk==3.4.5
  Downloading nltk-3.4.5.zip (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 38.2 MB/s 
[?25hCollecting wget==3.2
  Downloading wget-3.2.zip (10 kB)
Building wheels for collected packages: nlp-id, nltk, wget
  Building wheel for nlp-id (setup.py) ... [?25l[?25hdone
  Created wheel for nlp-id: filename=nlp_id-0.1.12.0-py3-none-any.whl size=8074104 sha256=45217ab225454361489d062237c21dfbc99d6ce712d7c79eba37bdc7639716d0
  Stored in directory: /root/.cache/pip/wheels/b2/50/48/da59531125bd94f48dfe66140f41d8fd8a4f04062050375013
  Building wheel for nltk (setup.p

Library Import

In [2]:
import pandas as pd
import numpy as np
import sklearn
import pickle
import string
import math

from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from nlp_id.tokenizer import Tokenizer
from nlp_id.lemmatizer import Lemmatizer
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from nltk.corpus import stopwords
from nltk.corpus import stopwords
from sklearn.neighbors import DistanceMetric
from scipy.sparse import hstack, vstack

import nltk
# import surprise

nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

## Data Loading

Tourism Description dataset.

In [3]:
dataset_path = "tourism_summarized.csv"
data_df = pd.read_csv(dataset_path, delimiter = ';', decimal = ',')

data_df.head(5)

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description
0,0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,Monumen Nasional (Monas) mulai dibangun pada 1...
1,1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,Kota Tua Jakarta dapat dijadikan tujuan wisata...
2,2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,Wahana yang ada di kelompokkan menjadi: Baca j...
3,3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,Bisa dibilang kalau Taman Mini Indonesia Indah...
4,4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,Atlantis Water Adventures mengajak para pengun...


In [4]:
data_df.describe()

Unnamed: 0,Index,Place_Id,Price,Lat,Long
count,437.0,437.0,437.0,437.0,437.0
mean,219.491991,219.0,24652.173913,-7.095438,109.160142
std,127.738024,126.295289,66446.374709,0.727241,1.962848
min,0.0,1.0,0.0,-8.197894,103.931398
25%,109.0,110.0,0.0,-7.74959,107.578369
50%,219.0,219.0,5000.0,-7.020524,110.237468
75%,330.0,328.0,20000.0,-6.829411,110.431869
max,441.0,437.0,900000.0,1.07888,112.821662


Attribute Variables

In [5]:
summarized_description_field = "Summarized_Description"
place_name_field = "Place_Name"
category_field = "Category"
preprocessed_description_field = "Preprocessed_Description"
preprocessed_summarized_description_field = "Preprocessed_Summarized_Description"
city_field = "City"
latitude_field = "Lat"
longitude_field = "Long"
budget_field = "Price"

## Data Preprocessing

Remove place_name from summarized_text_description

In [6]:
stopwords_list = data_df[place_name_field].str.split(' ').to_numpy()
place_name_stopwords = set(np.hstack(stopwords_list))
place_name_stopwords = [word.lower() for word in place_name_stopwords]

def remove_stopwords(txt):
    txt_nostop = ' '.join([word for word in txt.split(" ") if str(word).lower() not in place_name_stopwords])
    return txt_nostop

data_df[preprocessed_summarized_description_field] = data_df[summarized_description_field].apply(lambda x: remove_stopwords(x))
data_df.head()

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description
0,0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,Monumen Nasional (Monas) mulai dibangun pada 1...,(Monas) mulai dibangun pada 17 Agustus 1961 di...
1,1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,Kota Tua Jakarta dapat dijadikan tujuan wisata...,dapat dijadikan tujuan kamu ketika berada di D...
2,2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,Wahana yang ada di kelompokkan menjadi: Baca j...,Wahana yang ada di kelompokkan menjadi: Baca j...
3,3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,Bisa dibilang kalau Taman Mini Indonesia Indah...,"Bisa dibilang kalau adalah Indonesia, mulai da..."
4,4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,Atlantis Water Adventures mengajak para pengun...,Adventures mengajak para pengunjung untuk mera...


Concatenate Place Name and Summarized_Description

In [7]:
concat_place_name_and_description = []

for index, row in data_df.iterrows():
  place_name_row = row[place_name_field]
  place_description_row = row[preprocessed_summarized_description_field]

  concat_row = place_name_row + " " + place_description_row
  concat_place_name_and_description.append(concat_row)
  
data_df[preprocessed_summarized_description_field] = concat_place_name_and_description
data_df.head(5)

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description
0,0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,Monumen Nasional (Monas) mulai dibangun pada 1...,Monumen Nasional (Monas) mulai dibangun pada 1...
1,1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,Kota Tua Jakarta dapat dijadikan tujuan wisata...,Kota Tua dapat dijadikan tujuan kamu ketika be...
2,2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,Wahana yang ada di kelompokkan menjadi: Baca j...,Dunia Fantasi Wahana yang ada di kelompokkan m...
3,3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,Bisa dibilang kalau Taman Mini Indonesia Indah...,Taman Mini Indonesia Indah (TMII) Bisa dibilan...
4,4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,Atlantis Water Adventures mengajak para pengun...,Atlantis Water Adventure Adventures mengajak p...


Remove Punctuations

In [8]:
removed_punctuation_sentences = []

for index, row in data_df.iterrows():
  place_description_row = row[preprocessed_summarized_description_field]

  concat_row = place_description_row.translate(str.maketrans('', '', string.punctuation))
  removed_punctuation_sentences.append(concat_row)
  
data_df[preprocessed_summarized_description_field] = removed_punctuation_sentences
data_df.head(5)

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description
0,0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,Monumen Nasional (Monas) mulai dibangun pada 1...,Monumen Nasional Monas mulai dibangun pada 17 ...
1,1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,Kota Tua Jakarta dapat dijadikan tujuan wisata...,Kota Tua dapat dijadikan tujuan kamu ketika be...
2,2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,Wahana yang ada di kelompokkan menjadi: Baca j...,Dunia Fantasi Wahana yang ada di kelompokkan m...
3,3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,Bisa dibilang kalau Taman Mini Indonesia Indah...,Taman Mini Indonesia Indah TMII Bisa dibilang ...
4,4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,Atlantis Water Adventures mengajak para pengun...,Atlantis Water Adventure Adventures mengajak p...


Lemmatize Text.

In [9]:
indo_lemmatizer = Lemmatizer()

index = 0

tokenized_descriptions = []

for index, row in data_df.iterrows():
  description = row[summarized_description_field]
  tokenized_sentence = indo_lemmatizer.lemmatize(description)
  tokenized_descriptions.append(tokenized_sentence)

data_df[preprocessed_description_field] = tokenized_descriptions
data_df.head(5)

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description,Preprocessed_Description
0,0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,Monumen Nasional (Monas) mulai dibangun pada 1...,Monumen Nasional Monas mulai dibangun pada 17 ...,monumen nasional monas mulai bangun pada 17 ag...
1,1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,Kota Tua Jakarta dapat dijadikan tujuan wisata...,Kota Tua dapat dijadikan tujuan kamu ketika be...,kota tua jakarta dapat jadi tuju wisata kamu k...
2,2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,Wahana yang ada di kelompokkan menjadi: Baca j...,Dunia Fantasi Wahana yang ada di kelompokkan m...,wahana yang ada di kelompok jadi baca juga daf...
3,3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,Bisa dibilang kalau Taman Mini Indonesia Indah...,Taman Mini Indonesia Indah TMII Bisa dibilang ...,bisa bilang kalau taman mini indonesia indah a...
4,4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,Atlantis Water Adventures mengajak para pengun...,Atlantis Water Adventure Adventures mengajak p...,atlantis water adventures ajak para kunjung un...


In [10]:
data_df[city_field].unique()

array(['Jakarta', 'Yogyakarta', 'Bandung', 'Semarang', 'Surabaya'],
      dtype=object)

In [11]:
data_df[category_field].unique()

array(['Budaya', 'Taman Hiburan', 'Cagar Alam', 'Bahari',
       'Pusat Perbelanjaan', 'Tempat Ibadah'], dtype=object)

## Data Modelling

In [12]:
def pickle_dump(model, model_name):
  pickle.dump(model, open(model_name, 'wb'))

### Based on User Query

Make Machine Learning Model (KNN Recommendation System).

In [13]:
def train_description(description_data_df):
  from nltk.corpus import stopwords
  stopwords = stopwords.words("indonesian")
  tf_idf_vectorizer = TfidfVectorizer(stop_words = stopwords)
  vector_components = tf_idf_vectorizer.fit_transform(description_data_df)
  index_to_word_mapping = tf_idf_vectorizer.get_feature_names()

  pickle_dump(tf_idf_vectorizer, "tf_idf_vectorizer_descriptions.pkl")

  return vector_components

def train_model_for_user_query(data_df):
  dataset_df = data_df

  description_trained_vector_components = train_description(
    dataset_df[preprocessed_summarized_description_field]
  )


  nearest_neighbors = NearestNeighbors(n_neighbors = 10)
  nearest_neighbors.fit(description_trained_vector_components)
  pickle_dump(nearest_neighbors, "tourism_place_user_query_nearest_neighbors.pkl")

  return description_trained_vector_components

all_vector_components = train_model_for_user_query(
    data_df
)

print(f"Current Shape: {all_vector_components.shape}")

Current Shape: (437, 6317)


  'stop_words.' % sorted(inconsistent))


### Based on User Location

Make Machine Learning Model (KNN Recommendation System).

In [14]:
MULTIPLIER = 1000
LOCATION_MULTIPLIER = 1

def train_city(city_df):
  count_vectorizer = CountVectorizer()
  city_vector_components = count_vectorizer.fit_transform(city_df.to_numpy()) * MULTIPLIER

  pickle_dump(count_vectorizer, "city_count_vectorizer.pkl")
  return city_vector_components

def train_categories(categories_df):
  categories_count_vectorizer = CountVectorizer()
  categories_vector_components = categories_count_vectorizer.fit_transform(categories_df.to_numpy()) * MULTIPLIER
  
  pickle_dump(categories_count_vectorizer, "categories_count_vectorizer.pkl")

  return categories_vector_components

def train_location(latitude_df, longitude_df):
  lat_df = latitude_df * LOCATION_MULTIPLIER
  long_df = longitude_df * LOCATION_MULTIPLIER
  
  return lat_df, long_df

def train_model_for_user_location(data_df):
  # Preprocess Dataset
  dataset_df = data_df
  # categories_trained_vector_components = train_categories(dataset_df[category_field])
  city_trained_vector_components = train_city(dataset_df[city_field])
  categories_trained_vector_components = train_categories(dataset_df[category_field])
  latitude_trained_vector_components, longitude_trained_vector_components = train_location(dataset_df[latitude_field], dataset_df[longitude_field])
  
  latitude_trained_vector_components = np.array([latitude_trained_vector_components]).T
  longitude_trained_vector_components = np.array([longitude_trained_vector_components]).T

  print(latitude_trained_vector_components.shape)
  print(longitude_trained_vector_components.shape)

  all_vector_components = hstack([city_trained_vector_components, 
                            categories_trained_vector_components,
                            longitude_trained_vector_components, 
                            latitude_trained_vector_components])

  # max_abs_scaler = MaxAbsScaler()
  # all_vector_components = max_abs_scaler.fit_transform(all_vector_components)
  # pickle_dump(max_abs_scaler, "max_abs_scaler_location.pkl")

  nearest_neighbors = NearestNeighbors(n_neighbors = 10)
  nearest_neighbors.fit(all_vector_components)
  pickle_dump(nearest_neighbors, "tourism_place_user_location_nearest_neighbors.pkl")
  
  return all_vector_components

all_vector_components = train_model_for_user_location(
    data_df
)

print(f"Current Shape: {all_vector_components.shape}")

(437, 1)
(437, 1)
Current Shape: (437, 17)


### Based on Budget

In [15]:
MULTIPLIER = 100000000

def train_city(city_df):
  count_vectorizer = CountVectorizer()
  city_vector_components = count_vectorizer.fit_transform(city_df.to_numpy()) * MULTIPLIER

  pickle_dump(count_vectorizer, "city_count_vectorizer.pkl")
  return city_vector_components

def train_categories(categories_df):
  categories_count_vectorizer = CountVectorizer()
  categories_vector_components = categories_count_vectorizer.fit_transform(
      categories_df.to_numpy()) * MULTIPLIER
  
  pickle_dump(categories_count_vectorizer, "categories_count_vectorizer.pkl")

  return categories_vector_components

def train_budget(budget_df):
  return budget_df

def train_model_for_user_budget(data_df):
  # Preprocess Dataset
  dataset_df = data_df
  # categories_trained_vector_components = train_categories(dataset_df[category_field])
  city_trained_vector_components = train_city(dataset_df[city_field])
  categories_trained_vector_components = train_categories(dataset_df[category_field])
  budget_trained_vector_components = np.array([train_budget(dataset_df[budget_field])]).T

  all_vector_components = hstack([city_trained_vector_components, 
                            categories_trained_vector_components,
                            budget_trained_vector_components,])

  # max_abs_scaler = MaxAbsScaler()
  # all_vector_components = max_abs_scaler.fit_transform(all_vector_components)
  # pickle_dump(max_abs_scaler, "max_abs_scaler_budget.pkl")

  # print(scaled_all_vector_components.shape)

  nearest_neighbors = NearestNeighbors(n_neighbors = 10)
  nearest_neighbors.fit(all_vector_components)
  pickle_dump(nearest_neighbors, "tourism_place_user_budget_nearest_neighbors.pkl")
  
  return all_vector_components

all_vector_components = train_model_for_user_budget(
    data_df
)

print(f"Current Shape: {all_vector_components.shape}")

Current Shape: (437, 16)


## Recommendation Time!

In [16]:
def pickle_load(file_name):
  return pickle.load(open(file_name, 'rb'))

def transform_to_vector(preprocessing_vector_model_name, data):
  vectorizer = pickle_load(preprocessing_vector_model_name)
  categories_vector_components = vectorizer.transform(data)
  return categories_vector_components

def recommend_travelling_places_using_knn(all_vector_components, model_name):
  dataset = data_df
  k_nearest_neighbors = pickle_load(model_name)
  k_nearest_neighbors_scores = k_nearest_neighbors.kneighbors(all_vector_components)
  
  return k_nearest_neighbors_scores

def get_top_n_recommendations_based_on_similarity_scores(df, top_n_indexes):
  top_n_df = df.iloc[top_n_indexes]
  return top_n_df

### User Query

Make a Sample Data

In [17]:
sample_description = "Taman Lalu Lintas Ade Irma Suryani Nasution"
sample_place_name = sample_description

In [18]:
def transform_description(sample_description):
  return transform_to_vector("tf_idf_vectorizer_descriptions.pkl", sample_description)
  
def transform(sample_place_name, sample_description):
  description_vector_component = transform_description([sample_description])
  # place_name_vector_component = transform_place_name([sample_place_name])

  # all_vector_components = hstack([place_name_vector_component,
  #                                 description_vector_component], format = 'csr')

  return description_vector_component

all_vector_components = transform(
    sample_place_name,
    sample_description,
)

top_n_distances, top_n_indexes_ranking = recommend_travelling_places_using_knn(
    all_vector_components, 
    "tourism_place_user_query_nearest_neighbors.pkl"
)

print(f"Current Shape: {all_vector_components.shape}")
print(f"Top N Distances shape: {top_n_distances.shape}")
print(f"K nearest neighbors scores: {top_n_indexes_ranking.shape}")

print(top_n_distances)
print(top_n_indexes_ranking.flatten())

Current Shape: (1, 6317)
Top N Distances shape: (1, 10)
K nearest neighbors scores: (1, 10)
[[1.00111186 1.38371153 1.38446355 1.38539839 1.38581057 1.39211797
  1.39304842 1.39312526 1.3953358  1.39901876]]
[239 346 250 352 394 292 433 392  56 402]


In [19]:
get_top_n_recommendations_based_on_similarity_scores(data_df, top_n_indexes_ranking.flatten())

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description,Preprocessed_Description
239,240,240,Taman Lalu Lintas Ade Irma Suryani Nasution,Taman Lalu-lintas Ade Irma Suryani adalah sebu...,Taman Hiburan,Bandung,7000,4.4,60.0,"{'lat': -6.911211, 'lng': 107.6133389}",-6.911211,107.613339,Wisata Taman Ade Irma Suryani Nasution (Taman ...,Taman Lalu Lintas Ade Irma Suryani Nasution Ta...,wisata taman ade irma suryani nasution taman l...
346,349,347,Taman Pandanaran,"Dalam sejarah yang tercatat, dulunya tempat in...",Taman Hiburan,Semarang,0,4.4,45.0,"{'lat': -6.987793600000001, 'lng': 110.4172262}",-6.987794,110.417226,Taman Srigunting adalah salah satu yang paling...,Taman Pandanaran adalah salah satu yang paling...,taman srigunting adalah salah satu yang paling...
250,252,251,Taman Lansia,Berlibur santai di akhir pekan cocok dilakukan...,Taman Hiburan,Bandung,0,4.4,,"{'lat': -6.9021326, 'lng': 107.6209387}",-6.902133,107.620939,Sedikit bergeser dari kota SBY dan masih di da...,Taman Lansia Sedikit bergeser dari SBY masih d...,sedikit geser dari kota sby dan masih di daera...
352,355,353,Taman Srigunting,Merupakan salah satu landmark di Kawasan Kota ...,Taman Hiburan,Semarang,0,4.7,,"{'lat': -6.9681728, 'lng': 110.4278262}",-6.968173,110.427826,Taman Srigunting adalah salah satu yang paling...,Taman Srigunting adalah salah satu yang paling...,taman srigunting adalah salah satu yang paling...
394,398,395,Taman Prestasi,Taman Prestasi Surabaya merupakan salah satu t...,Taman Hiburan,Surabaya,0,4.6,,"{'lat': -7.2614722, 'lng': 112.7428284}",-7.261472,112.742828,Taman Prestasi merupakan taman kota yang terle...,Taman Prestasi merupakan yang terletak di Tang...,taman prestasi rupa taman kota yang letak di t...
292,295,293,Taman Badak,Taman Badak ini baru saja diresmikan pada tang...,Taman Hiburan,Bandung,0,4.5,,"{'lat': -6.9132752, 'lng': 107.6094908}",-6.913275,107.609491,Liburan ke Bandung nggak pas kalau berburu tam...,Taman Badak Liburan ke nggak pas kalau berburu...,libur ke bandung nggak pas kalau buru taman hi...
433,438,434,Taman Bungkul,Taman Bungkul adalah taman wisata kota yang te...,Taman Hiburan,Surabaya,0,4.6,,"{'lat': -7.291346799999999, 'lng': 112.7398218}",-7.291347,112.739822,"Tidak mengherankan, karena di sini pengunjung ...",Taman Bungkul Tidak mengherankan karena di sin...,tidak heran karena di sini kunjung dapat duduk...
392,396,393,Taman Harmoni Keputih,Tempat tersebut ialah Taman Hatmoni Keputih Su...,Cagar Alam,Surabaya,0,4.4,60.0,"{'lat': -7.2952211, 'lng': 112.8035603}",-7.295221,112.80356,Salah satu tempat bersantai di Surabaya yang b...,Taman Harmoni Keputih Salah satu tempat bersan...,salah satu tempat santai di surabaya yang bany...
56,56,57,Taman Lapangan Banteng,"Lapangan Banteng, dulu bernama Waterlooplein (...",Taman Hiburan,Jakarta,0,4.7,,"{'lat': -6.170554999999999, 'lng': 106.8350378}",-6.170555,106.835038,"Selanjutnya, bagi pengunjung yang membawa anak...",Taman Lapangan Banteng Selanjutnya bagi pengun...,lanjut bagi kunjung yang bawa anak bisa guna a...
402,406,403,Taman Barunawati,Taman Barunawati yang lokasinya berada di kota...,Taman Hiburan,Surabaya,0,4.2,30.0,"{'lat': -7.222055899999998, 'lng': 112.7319967}",-7.222056,112.731997,Tempat wisata di Surabaya ini memang memiliki ...,Taman Barunawati Tempat di ini memang memiliki...,tempat wisata di surabaya ini memang milik ars...


### User Location

Make a Sample Data

In [22]:
sample_categories = "Cagar Alam"
sample_cities = "Bandung"
sample_lat = -10.587055
sample_long = 106.90

In [23]:
def transform_categories(sample_categories):
  return transform_to_vector("categories_count_vectorizer.pkl", sample_categories) * MULTIPLIER

def transform_city(sample_cities):
  return transform_to_vector("city_count_vectorizer.pkl", sample_cities) * MULTIPLIER

def transform_long_and_lat(sample_longitude, sample_latitude):
#  lat_min_max_scaler = pickle_load("lat_min_max_scaler.pkl")
#  long_min_max_scaler = pickle_load("long_min_max_scaler.pkl")

#  sample_longitude_normalized = long_min_max_scaler.transform([[sample_longitude]])
#  sample_latitude_normalized = lat_min_max_scaler.transform([[sample_latitude]])

#  return sample_longitude_normalized, sample_latitude_normalized
  return sample_longitude * LOCATION_MULTIPLIER, sample_latitude * LOCATION_MULTIPLIER

def transform(sample_categories,
              sample_cities,
              sample_latitude, 
              sample_longitude):
  
  # categories_vector_component = transform_categories([sample_categories])
  city_vector_component = transform_city([sample_cities])
  categories_vector_component = transform_categories([sample_categories])
  
  sample_longitude, sample_latitude = transform_long_and_lat(sample_latitude, sample_longitude)

  all_vector_components = hstack([city_vector_component,
                                  categories_vector_component,
                                  sample_longitude,
                                  sample_latitude], format = 'csr')
  
  # max_abs_scaler_location = pickle_load("max_abs_scaler_location.pkl")
  # all_vector_components = max_abs_scaler_location.transform(all_vector_components)
  
  print(all_vector_components.shape)
  
  return all_vector_components

all_vector_components = transform(
    sample_categories,
    sample_cities,
    sample_lat,
    sample_long,
)

top_n_distances, top_n_indexes_ranking = recommend_travelling_places_using_knn(
    all_vector_components,
    "tourism_place_user_location_nearest_neighbors.pkl",
)

print(f"Current Shape: {all_vector_components.shape}")
print(f"Top N Distances shape: {top_n_distances.shape}")
print(f"K nearest neighbors scores: {top_n_indexes_ranking.shape}")

(1, 17)
Current Shape: (1, 17)
Top N Distances shape: (1, 10)
K nearest neighbors scores: (1, 10)


In [24]:
get_top_n_recommendations_based_on_similarity_scores(data_df, top_n_indexes_ranking.flatten())

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description,Preprocessed_Description
322,325,323,Kebun Tanaman Obat Sari Alam,Kebun Tanaman Obat Sari Alam lebih terkenal de...,Cagar Alam,Bandung,0,4.9,90.0,"{'lat': -6.601320999999999, 'lng': 106.632734}",-6.601321,106.632734,Berbagai wahana dan permainan tersedia untuk m...,Kebun Tanaman Obat Sari Alam Berbagai wahana p...,bagai wahana dan main sedia untuk tunjang giat...
299,302,300,Sanghyang Heuleut,Danau yang satu ini memiliki air jernih bernua...,Cagar Alam,Bandung,10000,4.4,,"{'lat': -6.876513099999999, 'lng': 107.3422183}",-6.876513,107.342218,Pesona alamnya yang luar biasa membuat banyak ...,Sanghyang Heuleut alamnya yang luar biasa memb...,pesona alam yang luar biasa buat banyak sobat ...
315,318,316,Curug Malela,Curug Malela adalah salah satu destinasi alam ...,Cagar Alam,Bandung,10000,4.4,,"{'lat': -7.0182386, 'lng': 107.2072145}",-7.018239,107.207215,"Curug Malela berada di Kawasan Bandung Barat, ...",Curug Malela berada di Barat tepatnya berada d...,curug malela ada di kawasan bandung barat tepa...
298,301,299,Sendang Geulis Kahuripan,Wilayah ini lebih tepatnya berada di RPH Cikal...,Cagar Alam,Bandung,10000,4.3,120.0,"{'lat': -6.7495268, 'lng': 107.478658}",-6.749527,107.478658,Air yang ada di sini juga sangat jernih dan be...,Sendang Geulis Kahuripan yang ada di sini juga...,air yang ada di sini juga sangat jernih dan be...
327,330,328,Gua Pawon,Gua Pawon adalah sebuah gua alami dan situs pu...,Cagar Alam,Bandung,10000,4.5,90.0,"{'lat': -6.8231833, 'lng': 107.4371655}",-6.823183,107.437166,Wisata Gua Pawon â€“ Jawa Barat di Cipatat Ban...,Gua Pawon â€“ di Cipatat adalah salah satu tem...,wisata gua pawon jawa barat di cipatat bandung...
272,274,273,Curug Bugbrug,Curug Bugbrug Sesuai dengan namanya kata Bugbr...,Cagar Alam,Bandung,7500,4.3,120.0,"{'lat': -6.790388999999999, 'lng': 107.5777632}",-6.790389,107.577763,Curug Bugbrug merupakan destinasi wisata berup...,Curug Bugbrug merupakan destinasi berupa yang ...,curug bugbrug rupa destinasi wisata rupa air t...
245,247,246,Curug Tilu Leuwi Opat,Curug Tilu Leuwi Opat merupakan salah satu wis...,Cagar Alam,Bandung,10000,4.4,120.0,"{'lat': -6.790569199999999, 'lng': 107.5826388}",-6.790569,107.582639,"Tidak hanya trekking langsung ke curug tilu, d...",Curug Tilu Leuwi Opat Tidak hanya trekking lan...,tidak hanya trekking langsung ke curug tilu si...
288,290,289,Curug Aseupan,Curug Aseupan berada di kawasan Curug Tilu Leu...,Cagar Alam,Bandung,10000,4.7,,"{'lat': -6.7875652, 'lng': 107.5822844}",-6.787565,107.582284,Area curug ini terdiri dari beberapa air terju...,Curug Aseupan Area ini terdiri dari beberapa d...,area curug ini diri dari beberapa air terjun d...
260,262,261,Ciwangun Indah Camp Official,Ciwangun Indah Camp atau CIC adalah sebuah tem...,Cagar Alam,Bandung,10000,4.3,,"{'lat': -6.786939, 'lng': 107.5837331}",-6.786939,107.583733,Ciwangun Indah Camp atau CIC adalah sebuah tem...,Ciwangun Indah Camp Official atau CIC adalah s...,ciwangun indah camp atau cic adalah buah tempa...
210,211,211,GunungTangkuban perahu,Gunung Tangkuban Parahu adalah salah satu gunu...,Cagar Alam,Bandung,30000,4.5,,"{'lat': -6.759637700000001, 'lng': 107.6097807}",-6.759638,107.609781,Area Gunung dan Kawah Tangkuban Perahu memang ...,GunungTangkuban perahu Area Tangkuban memang i...,area gunung dan kawah tangkuban perahu memang ...


### User Budget

In [25]:
sample_categories = "Taman Hiburan"
sample_cities = "Bandung"
sample_budget = 100000

In [26]:
def transform_categories(sample_categories):
  return transform_to_vector("categories_count_vectorizer.pkl", sample_categories) * MULTIPLIER

def transform_city(sample_cities):
  return transform_to_vector("city_count_vectorizer.pkl", sample_cities) * MULTIPLIER

def transform_budget(sample_budget):
  return sample_budget

def transform(sample_categories,
              sample_cities, 
              sample_budget):
  
  # categories_vector_component = transform_categories([sample_categories])
  city_vector_component = transform_city([sample_cities])
  categories_vector_component = transform_categories([sample_categories])
  
  budget_vector_component = transform_budget(sample_budget)

  all_vector_components = hstack([city_vector_component,
                                  categories_vector_component,
                                  budget_vector_component], format = 'csr')
  
  # max_abs_scaler_budget = pickle_load("max_abs_scaler_budget.pkl")
  # all_vector_components = max_abs_scaler_budget.transform(all_vector_components)
  
  # print(all_vector_components.shape)
  
  return all_vector_components

all_vector_components = transform(
    sample_categories,
    sample_cities,
    sample_budget,
)

top_n_distances, top_n_indexes_ranking = recommend_travelling_places_using_knn(
    all_vector_components,
    "tourism_place_user_budget_nearest_neighbors.pkl",
)

print(f"Current Shape: {all_vector_components.shape}")
print(f"Top N Distances shape: {top_n_distances.shape}")
print(f"K nearest neighbors scores: {top_n_indexes_ranking.shape}")

Current Shape: (1, 16)
Top N Distances shape: (1, 10)
K nearest neighbors scores: (1, 10)


In [27]:
get_top_n_recommendations_based_on_similarity_scores(data_df, top_n_indexes_ranking.flatten())

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description,Preprocessed_Description
248,250,249,Upside Down World Bandung,Upside Down World Bandung pertama kali dibuka ...,Taman Hiburan,Bandung,100000,4.0,30.0,"{'lat': -6.896300000000001, 'lng': 107.617}",-6.8963,107.617,Semua furnitur yang berada di Upside Down Worl...,Upside Down World Bandung Semua furnitur yang ...,semua furnitur yang ada di upside down world i...
236,237,237,Panghegar Waterboom Bandung,Panghegar Waterboom Bandung merupakan salah sa...,Taman Hiburan,Bandung,75000,4.3,,"{'lat': -6.961403199999999, 'lng': 107.6227321}",-6.961403,107.622732,Ada pun yang paling baru di Panghegar Waterboo...,Panghegar Waterboom Bandung Ada pun yang palin...,ada pun yang paling baru di panghegar waterboo...
295,298,296,Batununggal Indah Club,Kolam renang Batununggal merupakan salah satu ...,Taman Hiburan,Bandung,70000,4.4,,"{'lat': -6.963229999999999, 'lng': 107.626416}",-6.96323,107.626416,Jika ingin ke pusat belanja dan indoor theme-p...,Batununggal Indah Club Jika ingin ke pusat bel...,jika ingin ke pusat belanja dan indoor theme-p...
280,282,281,Tektona Waterpark,Tektona Waterpark sebuah wahana yang memberika...,Taman Hiburan,Bandung,60000,3.8,,"{'lat': -7.009602199999999, 'lng': 107.6062161}",-7.009602,107.606216,Kampung Batu Malakasari adalah sebuah tempat w...,Tektona Waterpark adalah sebuah tempat yang di...,kampung batu malakasari adalah buah tempat wis...
224,225,225,Orchid Forest Cikole,Orchid Forest Cikole Lembang sudah berdiri sej...,Taman Hiburan,Bandung,50000,4.6,150.0,"{'lat': -6.780493, 'lng': 107.637475}",-6.780493,107.637475,Untuk bisa masuk ke Orchid Forest wisatawan cu...,Orchid Forest Cikole Untuk bisa masuk ke wisat...,untuk bisa masuk ke orchid forest wisatawan cu...
290,292,291,Water Park Bandung Indah,Bandung Indah Waterpark merupakan salah satu k...,Taman Hiburan,Bandung,50000,4.0,,"{'lat': -6.980580700000001, 'lng': 107.5856567}",-6.980581,107.585657,Terdapat kolam renang untuk dewasa dan anak-an...,Water Park Bandung Indah Terdapat kolam renang...,dapat kolam renang untuk dewasa dan anak seper...
247,249,248,NuArt Sculpture Park,NuArt Sculpture Park ini merupakan sebuah gall...,Taman Hiburan,Bandung,50000,4.7,,"{'lat': -6.8778589, 'lng': 107.5720263}",-6.877859,107.572026,Setelah sukses menyelenggarakan dance camp dan...,NuArt Sculpture Park Setelah sukses menyelengg...,telah sukses selenggara dance camp dan pentas ...
333,336,334,Chingu Cafe Little Seoul,Selain populer karena memiliki pemandangan yan...,Taman Hiburan,Bandung,50000,4.5,,"{'lat': -6.901224099999999, 'lng': 107.6099853}",-6.901224,107.609985,"Namun, ternyata Bandung punya solusi dan alter...",Chingu Cafe Little Seoul Namun ternyata punya ...,namun nyata bandung punya solusi dan alternati...
249,251,250,Pesona Nirwana Waterpark & Cottages,Pesona Nirwana Waterpark merupakan sebuah obje...,Taman Hiburan,Bandung,50000,4.1,,"{'lat': -7.040915399999998, 'lng': 107.5269156}",-7.040915,107.526916,Untuk menikmati berbagai wahana permainan air ...,Pesona Nirwana Waterpark Cottages Untuk menik...,untuk nikmat bagai wahana main air di water pa...
266,268,267,Panama Park 825,Panama Park Bandung menjadi salah satu tempat ...,Taman Hiburan,Bandung,50000,4.4,,"{'lat': -6.9119768, 'lng': 107.569674}",-6.911977,107.569674,Bandoeng.co.id portal berbagi informasi tentan...,Panama Park 825 Bandoengcoid portal berbagi in...,bandoeng co id portal bagi informasi tentang t...
