# Recommendation System Item Based Collaborative Filtering

In [1]:
# ! pip install surprise
! pip install nlp-id

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting nlp-id
  Downloading nlp_id-0.1.12.0.tar.gz (7.9 MB)
[K     |████████████████████████████████| 7.9 MB 4.8 MB/s 
[?25hCollecting scikit-learn==0.22
  Downloading scikit_learn-0.22-cp37-cp37m-manylinux1_x86_64.whl (7.0 MB)
[K     |████████████████████████████████| 7.0 MB 33.9 MB/s 
[?25hCollecting nltk==3.4.5
  Downloading nltk-3.4.5.zip (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 58.5 MB/s 
[?25hCollecting wget==3.2
  Downloading wget-3.2.zip (10 kB)
Building wheels for collected packages: nlp-id, nltk, wget
  Building wheel for nlp-id (setup.py) ... [?25l[?25hdone
  Created wheel for nlp-id: filename=nlp_id-0.1.12.0-py3-none-any.whl size=8074104 sha256=04b473e1ace0ece4d5dc4a853609961fef5412795fdf0b21754524e7d3d3e545
  Stored in directory: /root/.cache/pip/wheels/b2/50/48/da59531125bd94f48dfe66140f41d8fd8a4f04062050375013
  Building wheel for nltk (setup.py

Library Import

In [308]:
import pandas as pd
import numpy as np
import sklearn
import pickle
import string

from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from nlp_id.tokenizer import Tokenizer
from nlp_id.lemmatizer import Lemmatizer
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from nltk.corpus import stopwords
from nltk.corpus import stopwords
from sklearn.neighbors import DistanceMetric
from scipy.sparse import hstack, vstack

import nltk
# import surprise

nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

## Data Loading

Tourism Description dataset.

In [206]:
dataset_path = "tourism_summarized.csv"
data_df = pd.read_csv(dataset_path, delimiter = ';', decimal = ',')

data_df.head(5)

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description
0,0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,Monumen Nasional (Monas) mulai dibangun pada 1...
1,1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,Kota Tua Jakarta dapat dijadikan tujuan wisata...
2,2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,Wahana yang ada di kelompokkan menjadi: Baca j...
3,3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,Bisa dibilang kalau Taman Mini Indonesia Indah...
4,4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,Atlantis Water Adventures mengajak para pengun...


In [207]:
data_df.describe()

Unnamed: 0,Index,Place_Id,Price,Lat,Long
count,437.0,437.0,437.0,437.0,437.0
mean,219.491991,219.0,24652.173913,-7.095438,109.160142
std,127.738024,126.295289,66446.374709,0.727241,1.962848
min,0.0,1.0,0.0,-8.197894,103.931398
25%,109.0,110.0,0.0,-7.74959,107.578369
50%,219.0,219.0,5000.0,-7.020524,110.237468
75%,330.0,328.0,20000.0,-6.829411,110.431869
max,441.0,437.0,900000.0,1.07888,112.821662


Attribute Variables

In [208]:
summarized_description_field = "Summarized_Description"
place_name_field = "Place_Name"
category_field = "Category"
preprocessed_description_field = "Preprocessed_Description"
preprocessed_summarized_description_field = "Preprocessed_Summarized_Description"
city_field = "City"
latitude_field = "Lat"
longitude_field = "Long"

## Data Preprocessing

Remove place_name from summarized_text_description

In [209]:
stopwords_list = data_df[place_name_field].str.split(' ').to_numpy()
place_name_stopwords = set(np.hstack(stopwords_list))
place_name_stopwords = [word.lower() for word in place_name_stopwords]

def remove_stopwords(txt):
    txt_nostop = ' '.join([word for word in txt.split(" ") if str(word).lower() not in place_name_stopwords])
    return txt_nostop

data_df[preprocessed_summarized_description_field] = data_df[summarized_description_field].apply(lambda x: remove_stopwords(x))
data_df.head()

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description
0,0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,Monumen Nasional (Monas) mulai dibangun pada 1...,(Monas) mulai dibangun pada 17 Agustus 1961 di...
1,1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,Kota Tua Jakarta dapat dijadikan tujuan wisata...,dapat dijadikan tujuan kamu ketika berada di D...
2,2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,Wahana yang ada di kelompokkan menjadi: Baca j...,Wahana yang ada di kelompokkan menjadi: Baca j...
3,3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,Bisa dibilang kalau Taman Mini Indonesia Indah...,"Bisa dibilang kalau adalah Indonesia, mulai da..."
4,4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,Atlantis Water Adventures mengajak para pengun...,Adventures mengajak para pengunjung untuk mera...


Concatenate Place Name and Summarized_Description

In [210]:
concat_place_name_and_description = []

for index, row in data_df.iterrows():
  place_name_row = row[place_name_field]
  place_description_row = row[preprocessed_summarized_description_field]

  concat_row = place_name_row + " " + place_description_row
  concat_place_name_and_description.append(concat_row)
  
data_df[preprocessed_summarized_description_field] = concat_place_name_and_description
data_df.head(5)

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description
0,0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,Monumen Nasional (Monas) mulai dibangun pada 1...,Monumen Nasional (Monas) mulai dibangun pada 1...
1,1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,Kota Tua Jakarta dapat dijadikan tujuan wisata...,Kota Tua dapat dijadikan tujuan kamu ketika be...
2,2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,Wahana yang ada di kelompokkan menjadi: Baca j...,Dunia Fantasi Wahana yang ada di kelompokkan m...
3,3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,Bisa dibilang kalau Taman Mini Indonesia Indah...,Taman Mini Indonesia Indah (TMII) Bisa dibilan...
4,4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,Atlantis Water Adventures mengajak para pengun...,Atlantis Water Adventure Adventures mengajak p...


Remove Punctuations

In [211]:
removed_punctuation_sentences = []

for index, row in data_df.iterrows():
  place_description_row = row[preprocessed_summarized_description_field]

  concat_row = place_description_row.translate(str.maketrans('', '', string.punctuation))
  removed_punctuation_sentences.append(concat_row)
  
data_df[preprocessed_summarized_description_field] = removed_punctuation_sentences
data_df.head(5)

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description
0,0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,Monumen Nasional (Monas) mulai dibangun pada 1...,Monumen Nasional Monas mulai dibangun pada 17 ...
1,1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,Kota Tua Jakarta dapat dijadikan tujuan wisata...,Kota Tua dapat dijadikan tujuan kamu ketika be...
2,2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,Wahana yang ada di kelompokkan menjadi: Baca j...,Dunia Fantasi Wahana yang ada di kelompokkan m...
3,3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,Bisa dibilang kalau Taman Mini Indonesia Indah...,Taman Mini Indonesia Indah TMII Bisa dibilang ...
4,4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,Atlantis Water Adventures mengajak para pengun...,Atlantis Water Adventure Adventures mengajak p...


Lemmatize Text.

In [212]:
indo_lemmatizer = Lemmatizer()

index = 0

tokenized_descriptions = []

for index, row in data_df.iterrows():
  description = row[summarized_description_field]
  tokenized_sentence = indo_lemmatizer.lemmatize(description)
  tokenized_descriptions.append(tokenized_sentence)

data_df[preprocessed_description_field] = tokenized_descriptions
data_df.head(5)

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description,Preprocessed_Description
0,0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,Monumen Nasional (Monas) mulai dibangun pada 1...,Monumen Nasional Monas mulai dibangun pada 17 ...,monumen nasional monas mulai bangun pada 17 ag...
1,1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,Kota Tua Jakarta dapat dijadikan tujuan wisata...,Kota Tua dapat dijadikan tujuan kamu ketika be...,kota tua jakarta dapat jadi tuju wisata kamu k...
2,2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,Wahana yang ada di kelompokkan menjadi: Baca j...,Dunia Fantasi Wahana yang ada di kelompokkan m...,wahana yang ada di kelompok jadi baca juga daf...
3,3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,Bisa dibilang kalau Taman Mini Indonesia Indah...,Taman Mini Indonesia Indah TMII Bisa dibilang ...,bisa bilang kalau taman mini indonesia indah a...
4,4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,Atlantis Water Adventures mengajak para pengun...,Atlantis Water Adventure Adventures mengajak p...,atlantis water adventures ajak para kunjung un...


In [213]:
data_df[city_field].unique()

array(['Jakarta', 'Yogyakarta', 'Bandung', 'Semarang', 'Surabaya'],
      dtype=object)

## Data Modelling

In [214]:
def pickle_dump(model, model_name):
  pickle.dump(model, open(model_name, 'wb'))

### Based on User Query

Make Machine Learning Model (KNN Recommendation System).

In [215]:
def train_description(description_data_df):
  from nltk.corpus import stopwords
  stopwords = stopwords.words("indonesian")
  tf_idf_vectorizer = TfidfVectorizer(stop_words = stopwords)
  vector_components = tf_idf_vectorizer.fit_transform(description_data_df)
  index_to_word_mapping = tf_idf_vectorizer.get_feature_names()

  pickle_dump(tf_idf_vectorizer, "tf_idf_vectorizer_descriptions.pkl")

  return vector_components

def train_model_for_user_query(data_df):
  dataset_df = data_df

  description_trained_vector_components = train_description(
    dataset_df[preprocessed_summarized_description_field]
  )


  nearest_neighbors = NearestNeighbors(n_neighbors = 10)
  nearest_neighbors.fit(description_trained_vector_components)
  pickle_dump(nearest_neighbors, "tourism_place_user_query_nearest_neighbors.pkl")

  return description_trained_vector_components

all_vector_components = train_model_for_user_query(
    data_df
)

print(f"Current Shape: {all_vector_components.shape}")

Current Shape: (437, 6317)


  'stop_words.' % sorted(inconsistent))


### Based on User Location

Make Machine Learning Model (KNN Recommendation System).

In [346]:
def train_city(city_df):
  one_hot_encoder = OneHotEncoder()
  city_vector_components = one_hot_encoder.fit_transform(city_df.to_numpy().reshape(-1, 1)) * 100
  print(city_vector_components)
  # print(city_vector_components.toarray())

  pickle_dump(one_hot_encoder, "city_count_vectorizer.pkl")
  return city_vector_components

def train_location(latitude_df, longitude_df):
  # lat_min_max_scaler = MinMaxScaler()
  # long_min_max_scaler = MinMaxScaler()

  # lat_df = data_df[latitude_field] 
  # lat_df = lat_min_max_scaler.fit_transform(np.array(lat_df).reshape(-1, 1)).T.flatten()
  # # print(lat_df)
  # pickle_dump(lat_min_max_scaler, "lat_min_max_scaler.pkl")

  # long_df = data_df[longitude_field]
  # long_df = long_min_max_scaler.fit_transform(np.array(long_df).reshape(-1, 1)).T.flatten()
  # pickle_dump(long_min_max_scaler, "long_min_max_scaler.pkl")
  lat_df = latitude_df
  long_df = longitude_df
  
  return lat_df, long_df

def train_model_for_user_location(data_df):
  # Preprocess Dataset
  dataset_df = data_df
  # categories_trained_vector_components = train_categories(dataset_df[category_field])
  city_trained_vector_components = train_city(dataset_df[city_field])
  latitude_trained_vector_components, longitude_trained_vector_components = train_location(dataset_df[latitude_field], dataset_df[longitude_field])
  latitude_trained_vector_components = np.array([latitude_trained_vector_components]).T
  longitude_trained_vector_components = np.array([longitude_trained_vector_components]).T

  print(latitude_trained_vector_components.shape)
  print(longitude_trained_vector_components.shape)

  all_vector_components = hstack([city_trained_vector_components, 
                            longitude_trained_vector_components, 
                            latitude_trained_vector_components])

  print(all_vector_components.shape)
  
  # min_max_scaler = MinMaxScaler()
  # all_vector_components = all_vector_components.toarray()
  # all_vector_components = min_max_scaler.fit_transform(all_vector_components)
  # pickle_dump(min_max_scaler, "min_max_scaler.pkl")

  # Train the Model
  print(sklearn.neighbors.VALID_METRICS['brute'])
  harvesine = DistanceMetric.get_metric('haversine')
  nearest_neighbors = NearestNeighbors(n_neighbors = 10, metric = 'manhattan')
  nearest_neighbors.fit(all_vector_components)
  pickle_dump(nearest_neighbors, "tourism_place_user_location_nearest_neighbors.pkl")
  
  return all_vector_components

all_vector_components = train_model_for_user_location(
    data_df
)

print(f"Current Shape: {all_vector_components.shape}")

  (0, 1)	100.0
  (1, 1)	100.0
  (2, 1)	100.0
  (3, 1)	100.0
  (4, 1)	100.0
  (5, 1)	100.0
  (6, 1)	100.0
  (7, 1)	100.0
  (8, 1)	100.0
  (9, 1)	100.0
  (10, 1)	100.0
  (11, 1)	100.0
  (12, 1)	100.0
  (13, 1)	100.0
  (14, 1)	100.0
  (15, 1)	100.0
  (16, 1)	100.0
  (17, 1)	100.0
  (18, 1)	100.0
  (19, 1)	100.0
  (20, 1)	100.0
  (21, 1)	100.0
  (22, 1)	100.0
  (23, 1)	100.0
  (24, 1)	100.0
  :	:
  (412, 3)	100.0
  (413, 3)	100.0
  (414, 3)	100.0
  (415, 3)	100.0
  (416, 3)	100.0
  (417, 3)	100.0
  (418, 3)	100.0
  (419, 3)	100.0
  (420, 3)	100.0
  (421, 3)	100.0
  (422, 3)	100.0
  (423, 3)	100.0
  (424, 3)	100.0
  (425, 3)	100.0
  (426, 3)	100.0
  (427, 3)	100.0
  (428, 3)	100.0
  (429, 3)	100.0
  (430, 3)	100.0
  (431, 3)	100.0
  (432, 3)	100.0
  (433, 3)	100.0
  (434, 3)	100.0
  (435, 3)	100.0
  (436, 3)	100.0
(437, 1)
(437, 1)
(437, 7)
['cityblock', 'cosine', 'euclidean', 'haversine', 'l2', 'l1', 'manhattan', 'precomputed', 'nan_euclidean', 'braycurtis', 'canberra', 'chebyshev', 'corre

## Recommendation Time!

In [347]:
def pickle_load(file_name):
  return pickle.load(open(file_name, 'rb'))

def transform_to_vector(preprocessing_vector_model_name, data):
  vectorizer = pickle_load(preprocessing_vector_model_name)
  categories_vector_components = vectorizer.transform(data)
  return categories_vector_components

def recommend_travelling_places_using_knn(all_vector_components, model_name):
  dataset = data_df
  k_nearest_neighbors = pickle_load(model_name)
  k_nearest_neighbors_scores = k_nearest_neighbors.kneighbors(all_vector_components)
  
  return k_nearest_neighbors_scores

def get_top_n_recommendations_based_on_similarity_scores(df, top_n_indexes):
  top_n_df = df.iloc[top_n_indexes]
  return top_n_df

### User Query

Make a Sample Data

In [348]:
sample_description = "Taman Lalu Lintas Ade Irma Suryani Nasution"
sample_place_name = sample_description

In [349]:
def transform_description(sample_description):
  return transform_to_vector("tf_idf_vectorizer_descriptions.pkl", sample_description)
  
def transform(sample_place_name, sample_description):
  description_vector_component = transform_description([sample_description])
  # place_name_vector_component = transform_place_name([sample_place_name])

  # all_vector_components = hstack([place_name_vector_component,
  #                                 description_vector_component], format = 'csr')

  return description_vector_component

all_vector_components = transform(
    sample_place_name,
    sample_description,
)

top_n_distances, top_n_indexes_ranking = recommend_travelling_places_using_knn(
    all_vector_components, 
    "tourism_place_user_query_nearest_neighbors.pkl"
)

print(f"Current Shape: {all_vector_components.shape}")
print(f"Top N Distances shape: {top_n_distances.shape}")
print(f"K nearest neighbors scores: {top_n_indexes_ranking.shape}")

print(top_n_distances)
print(top_n_indexes_ranking.flatten())

Current Shape: (1, 6317)
Top N Distances shape: (1, 10)
K nearest neighbors scores: (1, 10)
[[1.00111186 1.38371153 1.38446355 1.38539839 1.38581057 1.39211797
  1.39304842 1.39312526 1.3953358  1.39901876]]
[239 346 250 352 394 292 433 392  56 402]


  'stop_words.' % sorted(inconsistent))


In [350]:
get_top_n_recommendations_based_on_similarity_scores(data_df, top_n_indexes_ranking.flatten())

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description,Preprocessed_Description
239,240,240,Taman Lalu Lintas Ade Irma Suryani Nasution,Taman Lalu-lintas Ade Irma Suryani adalah sebu...,Taman Hiburan,Bandung,7000,4.4,60.0,"{'lat': -6.911211, 'lng': 107.6133389}",-6.911211,107.613339,Wisata Taman Ade Irma Suryani Nasution (Taman ...,Taman Lalu Lintas Ade Irma Suryani Nasution Ta...,wisata taman ade irma suryani nasution taman l...
346,349,347,Taman Pandanaran,"Dalam sejarah yang tercatat, dulunya tempat in...",Taman Hiburan,Semarang,0,4.4,45.0,"{'lat': -6.987793600000001, 'lng': 110.4172262}",-6.987794,110.417226,Taman Srigunting adalah salah satu yang paling...,Taman Pandanaran adalah salah satu yang paling...,taman srigunting adalah salah satu yang paling...
250,252,251,Taman Lansia,Berlibur santai di akhir pekan cocok dilakukan...,Taman Hiburan,Bandung,0,4.4,,"{'lat': -6.9021326, 'lng': 107.6209387}",-6.902133,107.620939,Sedikit bergeser dari kota SBY dan masih di da...,Taman Lansia Sedikit bergeser dari SBY masih d...,sedikit geser dari kota sby dan masih di daera...
352,355,353,Taman Srigunting,Merupakan salah satu landmark di Kawasan Kota ...,Taman Hiburan,Semarang,0,4.7,,"{'lat': -6.9681728, 'lng': 110.4278262}",-6.968173,110.427826,Taman Srigunting adalah salah satu yang paling...,Taman Srigunting adalah salah satu yang paling...,taman srigunting adalah salah satu yang paling...
394,398,395,Taman Prestasi,Taman Prestasi Surabaya merupakan salah satu t...,Taman Hiburan,Surabaya,0,4.6,,"{'lat': -7.2614722, 'lng': 112.7428284}",-7.261472,112.742828,Taman Prestasi merupakan taman kota yang terle...,Taman Prestasi merupakan yang terletak di Tang...,taman prestasi rupa taman kota yang letak di t...
292,295,293,Taman Badak,Taman Badak ini baru saja diresmikan pada tang...,Taman Hiburan,Bandung,0,4.5,,"{'lat': -6.9132752, 'lng': 107.6094908}",-6.913275,107.609491,Liburan ke Bandung nggak pas kalau berburu tam...,Taman Badak Liburan ke nggak pas kalau berburu...,libur ke bandung nggak pas kalau buru taman hi...
433,438,434,Taman Bungkul,Taman Bungkul adalah taman wisata kota yang te...,Taman Hiburan,Surabaya,0,4.6,,"{'lat': -7.291346799999999, 'lng': 112.7398218}",-7.291347,112.739822,"Tidak mengherankan, karena di sini pengunjung ...",Taman Bungkul Tidak mengherankan karena di sin...,tidak heran karena di sini kunjung dapat duduk...
392,396,393,Taman Harmoni Keputih,Tempat tersebut ialah Taman Hatmoni Keputih Su...,Cagar Alam,Surabaya,0,4.4,60.0,"{'lat': -7.2952211, 'lng': 112.8035603}",-7.295221,112.80356,Salah satu tempat bersantai di Surabaya yang b...,Taman Harmoni Keputih Salah satu tempat bersan...,salah satu tempat santai di surabaya yang bany...
56,56,57,Taman Lapangan Banteng,"Lapangan Banteng, dulu bernama Waterlooplein (...",Taman Hiburan,Jakarta,0,4.7,,"{'lat': -6.170554999999999, 'lng': 106.8350378}",-6.170555,106.835038,"Selanjutnya, bagi pengunjung yang membawa anak...",Taman Lapangan Banteng Selanjutnya bagi pengun...,lanjut bagi kunjung yang bawa anak bisa guna a...
402,406,403,Taman Barunawati,Taman Barunawati yang lokasinya berada di kota...,Taman Hiburan,Surabaya,0,4.2,30.0,"{'lat': -7.222055899999998, 'lng': 112.7319967}",-7.222056,112.731997,Tempat wisata di Surabaya ini memang memiliki ...,Taman Barunawati Tempat di ini memang memiliki...,tempat wisata di surabaya ini memang milik ars...


### User Location

Make a Sample Data

In [357]:
# sample_categories = "Taman Hiburan"
sample_cities = "Semarang"
sample_lat = -5.587055
sample_long = 106.90

In [362]:
def transform_city(sample_cities):
  return transform_to_vector("city_count_vectorizer.pkl", sample_cities)

def transform_long_and_lat(sample_longitude, sample_latitude):
#  lat_min_max_scaler = pickle_load("lat_min_max_scaler.pkl")
#  long_min_max_scaler = pickle_load("long_min_max_scaler.pkl")

#  sample_longitude_normalized = long_min_max_scaler.transform([[sample_longitude]])
#  sample_latitude_normalized = lat_min_max_scaler.transform([[sample_latitude]])

#  return sample_longitude_normalized, sample_latitude_normalized
  return sample_longitude, sample_latitude

def transform(sample_cities, 
              sample_latitude, 
              sample_longitude):
  
  # categories_vector_component = transform_categories([sample_categories])
  city_vector_component = transform_city([[sample_cities]]) * 100
  sample_longitude_normalized, sample_latitude_normalized = transform_long_and_lat(sample_latitude, sample_longitude)

  all_vector_components = hstack([city_vector_component,
                                  sample_longitude_normalized,
                                  sample_latitude_normalized], format = 'csr')
  
  print(all_vector_components.shape)
  

  
  # min_max_scaler = pickle_load("min_max_scaler.pkl")
  # all_vector_components = all_vector_components.toarray()
  # all_vector_components = min_max_scaler.transform(all_vector_components)
  
  return all_vector_components

all_vector_components = transform(
    # sample_categories,
    sample_cities,
    sample_lat,
    sample_long,
)

top_n_distances, top_n_indexes_ranking = recommend_travelling_places_using_knn(
    all_vector_components,
    "tourism_place_user_location_nearest_neighbors.pkl",
)

print(f"Current Shape: {all_vector_components.shape}")
print(f"Top N Distances shape: {top_n_distances.shape}")
print(f"K nearest neighbors scores: {top_n_indexes_ranking.shape}")

# print(top_n_distances)
# print(top_n_indexes_ranking.flatten())

(1, 7)
Current Shape: (1, 7)
Top N Distances shape: (1, 10)
K nearest neighbors scores: (1, 10)


In [363]:
get_top_n_recommendations_based_on_similarity_scores(data_df, top_n_indexes_ranking.flatten())

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description,Preprocessed_Description
357,360,358,Masjid Kapal Semarang,Masjid Safinatun Najah atau pengunjung biasa m...,Tempat Ibadah,Semarang,0,4.1,,"{'lat': -7.018778199999999, 'lng': 110.2937134}",-7.018778,110.293713,Begitu pulalah yang berlaku untuk Masjid Kapal...,Masjid Kapal Semarang Begitu pulalah yang berl...,begitu pula yang laku untuk masjid kapal akpel...
348,351,349,Wisata Mangrove Tapak,Wisata hutan mangrove Semarang hampir tak pern...,Cagar Alam,Semarang,5000,4.3,90.0,"{'lat': -6.968561800000001, 'lng': 110.3459696}",-6.968562,110.34597,Lokasi hutan ini mudah dijangkau dari pusat ko...,Wisata Mangrove Tapak Lokasi ini mudah dijangk...,lokasi hutan ini mudah jangkau dari pusat kota...
362,365,363,Pantai Maron,Pantai Maron (bahasa Jawa: ê¦¥ê¦±ê¦¶ê¦±ê¦¶ê¦‚â...,Bahari,Semarang,5000,3.4,,"{'lat': -6.9545687, 'lng': 110.3600951}",-6.954569,110.360095,Sungai Maron merupakan salah satu objek wisata...,Pantai Maron merupakan salah satu objek yang c...,sungai maron rupa salah satu objek wisata yang...
343,346,344,Pantai Marina,Pantai Marina (bahasa Jawa: ê¦¥ê¦±ê¦¶ê¦±ê¦¶ê¦‚...,Bahari,Semarang,3000,4.1,,"{'lat': -6.948877, 'lng': 110.3893285}",-6.948877,110.389329,Pantai Marina Bantaeng merupakan salah satu pa...,Pantai Marina Bantaeng merupakan salah satu ya...,pantai marina bantaeng rupa salah satu pantai ...
342,345,343,Pantai Baruna,kunjungi Pantai Baruna yang berada di Kota Sem...,Bahari,Semarang,3000,4.0,,"{'lat': -6.945105199999999, 'lng': 110.3982398}",-6.945105,110.39824,Dan yang akan kita bahas saat ini adalah Panta...,Pantai Baruna yang akan kita bahas saat ini ad...,dan yang akan kita bahas saat ini adalah panta...
335,338,336,Grand Maerakaca,Masyarakat Jawa Tengah mungkin sudah tidak asi...,Taman Hiburan,Semarang,15000,4.4,,"{'lat': -6.9605225, 'lng': 110.3863941}",-6.960522,110.386394,Taman wisata ini merupakan â€œTaman Miniâ€ ve...,Grand Maerakaca ini merupakan â€œTaman Miniâ€...,taman wisata ini rupa taman mini versi jawa te...
349,352,350,Pantai Cipta,Pantai Cipta juga dikenal sebagai Pantai Petik...,Bahari,Semarang,5000,4.0,90.0,"{'lat': -6.9486402, 'lng': 110.4102923}",-6.94864,110.410292,Pantai Cirewang merupakan salah satu objek wis...,Pantai Cipta Cirewang merupakan salah satu obj...,pantai cirewang rupa salah satu objek wisata k...
364,367,365,Waduk Jatibarang,Waduk Jatibarang merupakan sebuah waduk yang b...,Cagar Alam,Semarang,2500,4.3,,"{'lat': -7.0370499, 'lng': 110.3477586}",-7.03705,110.347759,Fasilitas yang ada di sekitar Waduk Jatibarang...,Waduk Jatibarang Fasilitas yang ada di sekitar...,fasilitas yang ada di sekitar waduk jatibarang...
386,389,387,Obyek Wisata Goa Kreo,Goa Kreo Semarang yang berada di ibukota Jawa ...,Cagar Alam,Semarang,5500,4.3,60.0,"{'lat': -7.0372113, 'lng': 110.3476164}",-7.037211,110.347616,Goa Kreo Semarang yang berada di ibukota Jawa ...,Obyek Wisata Goa Kreo yang berada di ibukota i...,goa kreo sarang yang ada di ibukota jawa tenga...
380,383,381,Tugu Muda Semarang,"Tugu Muda (""Monumen Pemuda"" Indonesia) adalah ...",Budaya,Semarang,0,4.7,,"{'lat': -6.9843485, 'lng': 110.4093207}",-6.984349,110.409321,Wisata Tugu Muda di Sekayu Semarang Jawa Tenga...,Tugu Muda Semarang di Sekayu adalah salah satu...,wisata tugu muda di kayu sarang jawa tengah ad...
