# Recommendation System Item Based Collaborative Filtering

In [22]:
# ! pip install surprise
! pip install nlp-id

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


Library Import

In [23]:
import pandas as pd
import numpy as np
import sklearn
import pickle
import string

from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from nlp_id.tokenizer import Tokenizer
from nlp_id.lemmatizer import Lemmatizer
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from nltk.corpus import stopwords
from nltk.corpus import stopwords
from sklearn.neighbors import DistanceMetric
from scipy.sparse import hstack, vstack

import nltk
# import surprise

nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

## Data Loading

Tourism Description dataset.

In [24]:
dataset_path = "tourism_summarized.csv"
data_df = pd.read_csv(dataset_path, delimiter = ';', decimal = ',')

data_df.head(5)

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description
0,0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,Monumen Nasional (Monas) mulai dibangun pada 1...
1,1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,Kota Tua Jakarta dapat dijadikan tujuan wisata...
2,2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,Wahana yang ada di kelompokkan menjadi: Baca j...
3,3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,Bisa dibilang kalau Taman Mini Indonesia Indah...
4,4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,Atlantis Water Adventures mengajak para pengun...


In [25]:
data_df.describe()

Unnamed: 0,Index,Place_Id,Price,Lat,Long
count,437.0,437.0,437.0,437.0,437.0
mean,219.491991,219.0,24652.173913,-7.095438,109.160142
std,127.738024,126.295289,66446.374709,0.727241,1.962848
min,0.0,1.0,0.0,-8.197894,103.931398
25%,109.0,110.0,0.0,-7.74959,107.578369
50%,219.0,219.0,5000.0,-7.020524,110.237468
75%,330.0,328.0,20000.0,-6.829411,110.431869
max,441.0,437.0,900000.0,1.07888,112.821662


Attribute Variables

In [26]:
summarized_description_field = "Summarized_Description"
place_name_field = "Place_Name"
category_field = "Category"
preprocessed_description_field = "Preprocessed_Description"
preprocessed_summarized_description_field = "Preprocessed_Summarized_Description"
city_field = "City"
latitude_field = "Lat"
longitude_field = "Long"

## Data Preprocessing

Remove place_name from summarized_text_description

In [27]:
stopwords_list = data_df[place_name_field].str.split(' ').to_numpy()
place_name_stopwords = set(np.hstack(stopwords_list))
place_name_stopwords = [word.lower() for word in place_name_stopwords]

def remove_stopwords(txt):
    txt_nostop = ' '.join([word for word in txt.split(" ") if str(word).lower() not in place_name_stopwords])
    return txt_nostop

data_df[preprocessed_summarized_description_field] = data_df[summarized_description_field].apply(lambda x: remove_stopwords(x))
data_df.head()

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description
0,0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,Monumen Nasional (Monas) mulai dibangun pada 1...,(Monas) mulai dibangun pada 17 Agustus 1961 di...
1,1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,Kota Tua Jakarta dapat dijadikan tujuan wisata...,dapat dijadikan tujuan kamu ketika berada di D...
2,2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,Wahana yang ada di kelompokkan menjadi: Baca j...,Wahana yang ada di kelompokkan menjadi: Baca j...
3,3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,Bisa dibilang kalau Taman Mini Indonesia Indah...,"Bisa dibilang kalau adalah Indonesia, mulai da..."
4,4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,Atlantis Water Adventures mengajak para pengun...,Adventures mengajak para pengunjung untuk mera...


Concatenate Place Name and Summarized_Description

In [28]:
concat_place_name_and_description = []

for index, row in data_df.iterrows():
  place_name_row = row[place_name_field]
  place_description_row = row[preprocessed_summarized_description_field]

  concat_row = place_name_row + " " + place_description_row
  concat_place_name_and_description.append(concat_row)
  
data_df[preprocessed_summarized_description_field] = concat_place_name_and_description
data_df.head(5)

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description
0,0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,Monumen Nasional (Monas) mulai dibangun pada 1...,Monumen Nasional (Monas) mulai dibangun pada 1...
1,1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,Kota Tua Jakarta dapat dijadikan tujuan wisata...,Kota Tua dapat dijadikan tujuan kamu ketika be...
2,2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,Wahana yang ada di kelompokkan menjadi: Baca j...,Dunia Fantasi Wahana yang ada di kelompokkan m...
3,3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,Bisa dibilang kalau Taman Mini Indonesia Indah...,Taman Mini Indonesia Indah (TMII) Bisa dibilan...
4,4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,Atlantis Water Adventures mengajak para pengun...,Atlantis Water Adventure Adventures mengajak p...


Remove Punctuations

In [29]:
removed_punctuation_sentences = []

for index, row in data_df.iterrows():
  place_description_row = row[preprocessed_summarized_description_field]

  concat_row = place_description_row.translate(str.maketrans('', '', string.punctuation))
  removed_punctuation_sentences.append(concat_row)
  
data_df[preprocessed_summarized_description_field] = removed_punctuation_sentences
data_df.head(5)

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description
0,0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,Monumen Nasional (Monas) mulai dibangun pada 1...,Monumen Nasional Monas mulai dibangun pada 17 ...
1,1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,Kota Tua Jakarta dapat dijadikan tujuan wisata...,Kota Tua dapat dijadikan tujuan kamu ketika be...
2,2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,Wahana yang ada di kelompokkan menjadi: Baca j...,Dunia Fantasi Wahana yang ada di kelompokkan m...
3,3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,Bisa dibilang kalau Taman Mini Indonesia Indah...,Taman Mini Indonesia Indah TMII Bisa dibilang ...
4,4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,Atlantis Water Adventures mengajak para pengun...,Atlantis Water Adventure Adventures mengajak p...


Lemmatize Text.

In [30]:
indo_lemmatizer = Lemmatizer()

index = 0

tokenized_descriptions = []

for index, row in data_df.iterrows():
  description = row[summarized_description_field]
  tokenized_sentence = indo_lemmatizer.lemmatize(description)
  tokenized_descriptions.append(tokenized_sentence)

data_df[preprocessed_description_field] = tokenized_descriptions
data_df.head(5)

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description,Preprocessed_Description
0,0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,Monumen Nasional (Monas) mulai dibangun pada 1...,Monumen Nasional Monas mulai dibangun pada 17 ...,monumen nasional monas mulai bangun pada 17 ag...
1,1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,Kota Tua Jakarta dapat dijadikan tujuan wisata...,Kota Tua dapat dijadikan tujuan kamu ketika be...,kota tua jakarta dapat jadi tuju wisata kamu k...
2,2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,Wahana yang ada di kelompokkan menjadi: Baca j...,Dunia Fantasi Wahana yang ada di kelompokkan m...,wahana yang ada di kelompok jadi baca juga daf...
3,3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,Bisa dibilang kalau Taman Mini Indonesia Indah...,Taman Mini Indonesia Indah TMII Bisa dibilang ...,bisa bilang kalau taman mini indonesia indah a...
4,4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,Atlantis Water Adventures mengajak para pengun...,Atlantis Water Adventure Adventures mengajak p...,atlantis water adventures ajak para kunjung un...


In [31]:
data_df[city_field].unique()

array(['Jakarta', 'Yogyakarta', 'Bandung', 'Semarang', 'Surabaya'],
      dtype=object)

## Data Modelling

In [32]:
def pickle_dump(model, model_name):
  pickle.dump(model, open(model_name, 'wb'))

### Based on User Query

Make Machine Learning Model (KNN Recommendation System).

In [33]:
def train_description(description_data_df):
  from nltk.corpus import stopwords
  stopwords = stopwords.words("indonesian")
  tf_idf_vectorizer = TfidfVectorizer(stop_words = stopwords)
  vector_components = tf_idf_vectorizer.fit_transform(description_data_df)
  index_to_word_mapping = tf_idf_vectorizer.get_feature_names()

  pickle_dump(tf_idf_vectorizer, "tf_idf_vectorizer_descriptions.pkl")

  return vector_components

def train_model_for_user_query(data_df):
  dataset_df = data_df

  description_trained_vector_components = train_description(
    dataset_df[preprocessed_summarized_description_field]
  )


  nearest_neighbors = NearestNeighbors(n_neighbors = 10)
  nearest_neighbors.fit(description_trained_vector_components)
  pickle_dump(nearest_neighbors, "tourism_place_user_query_nearest_neighbors.pkl")

  return description_trained_vector_components

all_vector_components = train_model_for_user_query(
    data_df
)

print(f"Current Shape: {all_vector_components.shape}")

Current Shape: (437, 6317)


  'stop_words.' % sorted(inconsistent))


### Based on User Location

Make Machine Learning Model (KNN Recommendation System).

In [94]:
def train_city(city_df):
  count_vectorizer = CountVectorizer()
  city_vector_components = count_vectorizer.fit_transform(city_df.to_numpy()) * 1000

  pickle_dump(count_vectorizer, "city_count_vectorizer.pkl")
  return city_vector_components

def train_categories(categories_df):
  categories_count_vectorizer = CountVectorizer()
  categories_vector_components = categories_count_vectorizer.fit_transform(categories_df.to_numpy()) * 1000
  
  pickle_dump(categories_count_vectorizer, "categories_count_vectorizer.pkl")

  return categories_vector_components

def train_location(latitude_df, longitude_df):
  lat_df = latitude_df
  long_df = longitude_df
  
  return lat_df, long_df

def train_model_for_user_location(data_df):
  # Preprocess Dataset
  dataset_df = data_df
  # categories_trained_vector_components = train_categories(dataset_df[category_field])
  city_trained_vector_components = train_city(dataset_df[city_field])
  categories_trained_vector_components = train_categories(dataset_df[category_field])
  latitude_trained_vector_components, longitude_trained_vector_components = train_location(dataset_df[latitude_field], dataset_df[longitude_field])
  latitude_trained_vector_components = np.array([latitude_trained_vector_components]).T
  longitude_trained_vector_components = np.array([longitude_trained_vector_components]).T

  print(latitude_trained_vector_components.shape)
  print(longitude_trained_vector_components.shape)

  all_vector_components = hstack([city_trained_vector_components, 
                            categories_trained_vector_components,
                            longitude_trained_vector_components, 
                            latitude_trained_vector_components])

  print(all_vector_components.shape)

  nearest_neighbors = NearestNeighbors(n_neighbors = 10)
  nearest_neighbors.fit(all_vector_components)
  pickle_dump(nearest_neighbors, "tourism_place_user_location_nearest_neighbors.pkl")
  
  return all_vector_components

all_vector_components = train_model_for_user_location(
    data_df
)

print(f"Current Shape: {all_vector_components.shape}")

(437, 1)
(437, 1)
(437, 17)
Current Shape: (437, 17)


## Recommendation Time!

In [95]:
def pickle_load(file_name):
  return pickle.load(open(file_name, 'rb'))

def transform_to_vector(preprocessing_vector_model_name, data):
  vectorizer = pickle_load(preprocessing_vector_model_name)
  categories_vector_components = vectorizer.transform(data)
  return categories_vector_components

def recommend_travelling_places_using_knn(all_vector_components, model_name):
  dataset = data_df
  k_nearest_neighbors = pickle_load(model_name)
  k_nearest_neighbors_scores = k_nearest_neighbors.kneighbors(all_vector_components)
  
  return k_nearest_neighbors_scores

def get_top_n_recommendations_based_on_similarity_scores(df, top_n_indexes):
  top_n_df = df.iloc[top_n_indexes]
  return top_n_df

### User Query

Make a Sample Data

In [96]:
sample_description = "Taman Lalu Lintas Ade Irma Suryani Nasution"
sample_place_name = sample_description

In [97]:
def transform_description(sample_description):
  return transform_to_vector("tf_idf_vectorizer_descriptions.pkl", sample_description)
  
def transform(sample_place_name, sample_description):
  description_vector_component = transform_description([sample_description])
  # place_name_vector_component = transform_place_name([sample_place_name])

  # all_vector_components = hstack([place_name_vector_component,
  #                                 description_vector_component], format = 'csr')

  return description_vector_component

all_vector_components = transform(
    sample_place_name,
    sample_description,
)

top_n_distances, top_n_indexes_ranking = recommend_travelling_places_using_knn(
    all_vector_components, 
    "tourism_place_user_query_nearest_neighbors.pkl"
)

print(f"Current Shape: {all_vector_components.shape}")
print(f"Top N Distances shape: {top_n_distances.shape}")
print(f"K nearest neighbors scores: {top_n_indexes_ranking.shape}")

print(top_n_distances)
print(top_n_indexes_ranking.flatten())

Current Shape: (1, 6317)
Top N Distances shape: (1, 10)
K nearest neighbors scores: (1, 10)
[[1.00111186 1.38371153 1.38446355 1.38539839 1.38581057 1.39211797
  1.39304842 1.39312526 1.3953358  1.39901876]]
[239 346 250 352 394 292 433 392  56 402]


  'stop_words.' % sorted(inconsistent))


In [98]:
get_top_n_recommendations_based_on_similarity_scores(data_df, top_n_indexes_ranking.flatten())

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description,Preprocessed_Description
239,240,240,Taman Lalu Lintas Ade Irma Suryani Nasution,Taman Lalu-lintas Ade Irma Suryani adalah sebu...,Taman Hiburan,Bandung,7000,4.4,60.0,"{'lat': -6.911211, 'lng': 107.6133389}",-6.911211,107.613339,Wisata Taman Ade Irma Suryani Nasution (Taman ...,Taman Lalu Lintas Ade Irma Suryani Nasution Ta...,wisata taman ade irma suryani nasution taman l...
346,349,347,Taman Pandanaran,"Dalam sejarah yang tercatat, dulunya tempat in...",Taman Hiburan,Semarang,0,4.4,45.0,"{'lat': -6.987793600000001, 'lng': 110.4172262}",-6.987794,110.417226,Taman Srigunting adalah salah satu yang paling...,Taman Pandanaran adalah salah satu yang paling...,taman srigunting adalah salah satu yang paling...
250,252,251,Taman Lansia,Berlibur santai di akhir pekan cocok dilakukan...,Taman Hiburan,Bandung,0,4.4,,"{'lat': -6.9021326, 'lng': 107.6209387}",-6.902133,107.620939,Sedikit bergeser dari kota SBY dan masih di da...,Taman Lansia Sedikit bergeser dari SBY masih d...,sedikit geser dari kota sby dan masih di daera...
352,355,353,Taman Srigunting,Merupakan salah satu landmark di Kawasan Kota ...,Taman Hiburan,Semarang,0,4.7,,"{'lat': -6.9681728, 'lng': 110.4278262}",-6.968173,110.427826,Taman Srigunting adalah salah satu yang paling...,Taman Srigunting adalah salah satu yang paling...,taman srigunting adalah salah satu yang paling...
394,398,395,Taman Prestasi,Taman Prestasi Surabaya merupakan salah satu t...,Taman Hiburan,Surabaya,0,4.6,,"{'lat': -7.2614722, 'lng': 112.7428284}",-7.261472,112.742828,Taman Prestasi merupakan taman kota yang terle...,Taman Prestasi merupakan yang terletak di Tang...,taman prestasi rupa taman kota yang letak di t...
292,295,293,Taman Badak,Taman Badak ini baru saja diresmikan pada tang...,Taman Hiburan,Bandung,0,4.5,,"{'lat': -6.9132752, 'lng': 107.6094908}",-6.913275,107.609491,Liburan ke Bandung nggak pas kalau berburu tam...,Taman Badak Liburan ke nggak pas kalau berburu...,libur ke bandung nggak pas kalau buru taman hi...
433,438,434,Taman Bungkul,Taman Bungkul adalah taman wisata kota yang te...,Taman Hiburan,Surabaya,0,4.6,,"{'lat': -7.291346799999999, 'lng': 112.7398218}",-7.291347,112.739822,"Tidak mengherankan, karena di sini pengunjung ...",Taman Bungkul Tidak mengherankan karena di sin...,tidak heran karena di sini kunjung dapat duduk...
392,396,393,Taman Harmoni Keputih,Tempat tersebut ialah Taman Hatmoni Keputih Su...,Cagar Alam,Surabaya,0,4.4,60.0,"{'lat': -7.2952211, 'lng': 112.8035603}",-7.295221,112.80356,Salah satu tempat bersantai di Surabaya yang b...,Taman Harmoni Keputih Salah satu tempat bersan...,salah satu tempat santai di surabaya yang bany...
56,56,57,Taman Lapangan Banteng,"Lapangan Banteng, dulu bernama Waterlooplein (...",Taman Hiburan,Jakarta,0,4.7,,"{'lat': -6.170554999999999, 'lng': 106.8350378}",-6.170555,106.835038,"Selanjutnya, bagi pengunjung yang membawa anak...",Taman Lapangan Banteng Selanjutnya bagi pengun...,lanjut bagi kunjung yang bawa anak bisa guna a...
402,406,403,Taman Barunawati,Taman Barunawati yang lokasinya berada di kota...,Taman Hiburan,Surabaya,0,4.2,30.0,"{'lat': -7.222055899999998, 'lng': 112.7319967}",-7.222056,112.731997,Tempat wisata di Surabaya ini memang memiliki ...,Taman Barunawati Tempat di ini memang memiliki...,tempat wisata di surabaya ini memang milik ars...


### User Location

Make a Sample Data

In [99]:
sample_categories = "Budaya Taman Hiburan Cagar Alam Bahari"
sample_cities = "Jakarta Bandung"
sample_lat = -10.587055
sample_long = 106.90

In [100]:
def transform_categories(sample_categories):
  return transform_to_vector("categories_count_vectorizer.pkl", sample_categories) * 1000

def transform_city(sample_cities):
  return transform_to_vector("city_count_vectorizer.pkl", sample_cities) * 1000

def transform_long_and_lat(sample_longitude, sample_latitude):
#  lat_min_max_scaler = pickle_load("lat_min_max_scaler.pkl")
#  long_min_max_scaler = pickle_load("long_min_max_scaler.pkl")

#  sample_longitude_normalized = long_min_max_scaler.transform([[sample_longitude]])
#  sample_latitude_normalized = lat_min_max_scaler.transform([[sample_latitude]])

#  return sample_longitude_normalized, sample_latitude_normalized
  return sample_longitude, sample_latitude

def transform(sample_categories,
              sample_cities, 
              sample_latitude, 
              sample_longitude):
  
  # categories_vector_component = transform_categories([sample_categories])
  city_vector_component = transform_city([sample_cities])
  categories_vector_component = transform_categories([sample_categories])
  
  sample_longitude, sample_latitude = transform_long_and_lat(sample_latitude, sample_longitude)

  all_vector_components = hstack([city_vector_component,
                                  categories_vector_component,
                                  sample_longitude,
                                  sample_latitude], format = 'csr')
  
  print(all_vector_components.shape)
  
  return all_vector_components

all_vector_components = transform(
    sample_categories,
    sample_cities,
    sample_lat,
    sample_long,
)

top_n_distances, top_n_indexes_ranking = recommend_travelling_places_using_knn(
    all_vector_components,
    "tourism_place_user_location_nearest_neighbors.pkl",
)

print(f"Current Shape: {all_vector_components.shape}")
print(f"Top N Distances shape: {top_n_distances.shape}")
print(f"K nearest neighbors scores: {top_n_indexes_ranking.shape}")

(1, 17)
Current Shape: (1, 17)
Top N Distances shape: (1, 10)
K nearest neighbors scores: (1, 10)


In [101]:
get_top_n_recommendations_based_on_similarity_scores(data_df, top_n_indexes_ranking.flatten())

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description,Preprocessed_Description
30,30,31,Wisata Alam Mangrove Angke,Jangan anggap di Jakarta tidak ada wisata alam...,Cagar Alam,Jakarta,25000,4.3,,"{'lat': -6.105334, 'lng': 106.735584}",-6.105334,106.735584,Di daerah ini terdapat area wisata alam yaitu ...,Wisata Alam Mangrove Angke Di daerah ini terda...,di daerah ini dapat area wisata alam yaitu tam...
71,71,72,Waterboom PIK (Pantai Indah Kapuk),Waterbom Jakarta merupakan sebuah wahana perma...,Taman Hiburan,Jakarta,300000,4.4,,"{'lat': -6.113655899999999, 'lng': 106.7478596}",-6.113656,106.74786,Waterboom PIK bisa disebut-sebut sebagai salah...,Waterboom PIK Pantai Indah Kapuk bisa disebuts...,waterboom pik bisa sebut bagai salah satu waha...
66,66,67,Margasatwa Muara Angke,Suaka margasatwa Muara Angke adalah sebuah kaw...,Cagar Alam,Jakarta,25000,4.2,15.0,"{'lat': -6.1160075, 'lng': 106.7692016}",-6.116008,106.769202,Cagar Alam atau Suaka Margasatwa Muara Angke (...,Margasatwa Muara Angke Cagar atau Suaka SMMA m...,cagar alam atau suaka margasatwa muara angke s...
2,2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,Wahana yang ada di kelompokkan menjadi: Baca j...,Dunia Fantasi Wahana yang ada di kelompokkan m...,wahana yang ada di kelompok jadi baca juga daf...
7,7,8,Ocean Ecopark,Ocean Ecopark Salah satu zona rekreasi Ancol y...,Taman Hiburan,Jakarta,180000,4.0,,"{'lat': -6.125801699999999, 'lng': 106.8363249}",-6.125802,106.836325,Ecopark Ancol menjadi destinasi yang tepat unt...,Ocean Ecopark menjadi destinasi yang tepat unt...,ecopark ancol jadi destinasi yang tepat untuk ...
4,4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,Atlantis Water Adventures mengajak para pengun...,Atlantis Water Adventure Adventures mengajak p...,atlantis water adventures ajak para kunjung un...
44,44,45,Jakarta Aquarium dan Safari,"Jika telah mengunjungi Seaworld Ancol, mungkin...",Taman Hiburan,Jakarta,185000,4.6,,"{'lat': -6.1752647, 'lng': 106.7904684}",-6.175265,106.790468,Dengan mengunjungi Neo Soho Mall yang ada di J...,Jakarta Aquarium dan Safari Dengan mengunjungi...,dengan kunjung neo soho mall yang ada di jakar...
35,35,36,Skyrink - Mall Taman Anggrek,Salah satu arena ice skating Jakarta adalah Ic...,Taman Hiburan,Jakarta,110000,4.5,180.0,"{'lat': -6.178996, 'lng': 106.791941}",-6.178996,106.791941,"Namun, masyarakat Indonesia tetap dapat menikm...",Skyrink Mall Taman Anggrek Namun masyarakat t...,namun masyarakat indonesia tetap dapat nikmat ...
26,26,27,Sea World,Seaworld Indonesia adalah sebuah miniatur peso...,Taman Hiburan,Jakarta,115000,4.5,180.0,"{'lat': -6.126477500000001, 'lng': 106.842963}",-6.126478,106.842963,Objek Wisata Sea World Ancol di Pademangan DKI...,Sea World Objek di Pademangan DKI adalah salah...,objek wisata sea world ancol di pademangan dki...
77,77,78,Hutan Kota Srengseng,Selain Taman Hutan Mangrove dan Pantai Indah K...,Taman Hiburan,Jakarta,1000,4.3,,"{'lat': -6.210694499999999, 'lng': 106.7643954}",-6.210694,106.764395,Namun jangan salah Jakarta juga punya hutan ya...,Hutan Kota Srengseng Namun jangan salah juga p...,namun jangan salah jakarta juga punya hutan ya...
