# Recommendation System Item Based Collaborative Filtering

In [236]:
# ! pip install surprise
! pip install nlp-id

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


Library Import

In [237]:
import pandas as pd
import numpy as np
import sklearn
import pickle
import string

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from nlp_id.tokenizer import Tokenizer
from nlp_id.lemmatizer import Lemmatizer
from nltk.corpus import stopwords
from nltk.corpus import stopwords
from scipy.sparse import hstack, vstack

import nltk
# import surprise

nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

## Data Loading

Tourism Description dataset.

In [238]:
dataset_path = "tourism_summarized.csv"
data_df = pd.read_csv(dataset_path, delimiter = ';', decimal = ',')

data_df.head(5)

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description
0,0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,Monumen Nasional (Monas) mulai dibangun pada 1...
1,1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,Kota Tua Jakarta dapat dijadikan tujuan wisata...
2,2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,Wahana yang ada di kelompokkan menjadi: Baca j...
3,3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,Bisa dibilang kalau Taman Mini Indonesia Indah...
4,4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,Atlantis Water Adventures mengajak para pengun...


In [239]:
data_df.describe()

Unnamed: 0,Index,Place_Id,Price,Lat,Long
count,437.0,437.0,437.0,437.0,437.0
mean,219.491991,219.0,24652.173913,-7.095438,109.160142
std,127.738024,126.295289,66446.374709,0.727241,1.962848
min,0.0,1.0,0.0,-8.197894,103.931398
25%,109.0,110.0,0.0,-7.74959,107.578369
50%,219.0,219.0,5000.0,-7.020524,110.237468
75%,330.0,328.0,20000.0,-6.829411,110.431869
max,441.0,437.0,900000.0,1.07888,112.821662


Attribute Variables

In [240]:
summarized_description_field = "Summarized_Description"
place_name_field = "Place_Name"
category_field = "Category"
preprocessed_description_field = "Preprocessed_Description"
preprocessed_summarized_description_field = "Preprocessed_Summarized_Description"
city_field = "City"
latitude_field = "Lat"
longitude_field = "Long"

## Data Preprocessing

Remove place_name from summarized_text_description

In [241]:
stopwords_list = data_df[place_name_field].str.split(' ').to_numpy()
place_name_stopwords = set(np.hstack(stopwords_list))
place_name_stopwords = [word.lower() for word in place_name_stopwords]

def remove_stopwords(txt):
    txt_nostop = ' '.join([word for word in txt.split(" ") if str(word).lower() not in place_name_stopwords])
    return txt_nostop

data_df[preprocessed_summarized_description_field] = data_df[summarized_description_field].apply(lambda x: remove_stopwords(x))
data_df.head()

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description
0,0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,Monumen Nasional (Monas) mulai dibangun pada 1...,(Monas) mulai dibangun pada 17 Agustus 1961 di...
1,1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,Kota Tua Jakarta dapat dijadikan tujuan wisata...,dapat dijadikan tujuan kamu ketika berada di D...
2,2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,Wahana yang ada di kelompokkan menjadi: Baca j...,Wahana yang ada di kelompokkan menjadi: Baca j...
3,3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,Bisa dibilang kalau Taman Mini Indonesia Indah...,"Bisa dibilang kalau adalah Indonesia, mulai da..."
4,4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,Atlantis Water Adventures mengajak para pengun...,Adventures mengajak para pengunjung untuk mera...


Concatenate Place Name and Summarized_Description

In [242]:
concat_place_name_and_description = []

for index, row in data_df.iterrows():
  place_name_row = row[place_name_field]
  place_description_row = row[preprocessed_summarized_description_field]

  concat_row = place_name_row + " " + place_description_row
  concat_place_name_and_description.append(concat_row)
  
data_df[preprocessed_summarized_description_field] = concat_place_name_and_description
data_df.head(5)

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description
0,0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,Monumen Nasional (Monas) mulai dibangun pada 1...,Monumen Nasional (Monas) mulai dibangun pada 1...
1,1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,Kota Tua Jakarta dapat dijadikan tujuan wisata...,Kota Tua dapat dijadikan tujuan kamu ketika be...
2,2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,Wahana yang ada di kelompokkan menjadi: Baca j...,Dunia Fantasi Wahana yang ada di kelompokkan m...
3,3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,Bisa dibilang kalau Taman Mini Indonesia Indah...,Taman Mini Indonesia Indah (TMII) Bisa dibilan...
4,4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,Atlantis Water Adventures mengajak para pengun...,Atlantis Water Adventure Adventures mengajak p...


Remove Punctuations

In [243]:
removed_punctuation_sentences = []

for index, row in data_df.iterrows():
  place_description_row = row[preprocessed_summarized_description_field]

  concat_row = place_description_row.translate(str.maketrans('', '', string.punctuation))
  removed_punctuation_sentences.append(concat_row)
  
data_df[preprocessed_summarized_description_field] = removed_punctuation_sentences
data_df.head(5)

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description
0,0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,Monumen Nasional (Monas) mulai dibangun pada 1...,Monumen Nasional Monas mulai dibangun pada 17 ...
1,1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,Kota Tua Jakarta dapat dijadikan tujuan wisata...,Kota Tua dapat dijadikan tujuan kamu ketika be...
2,2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,Wahana yang ada di kelompokkan menjadi: Baca j...,Dunia Fantasi Wahana yang ada di kelompokkan m...
3,3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,Bisa dibilang kalau Taman Mini Indonesia Indah...,Taman Mini Indonesia Indah TMII Bisa dibilang ...
4,4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,Atlantis Water Adventures mengajak para pengun...,Atlantis Water Adventure Adventures mengajak p...


Lemmatize Text.

In [244]:
indo_lemmatizer = Lemmatizer()

index = 0

tokenized_descriptions = []

for index, row in data_df.iterrows():
  description = row[summarized_description_field]
  tokenized_sentence = indo_lemmatizer.lemmatize(description)
  tokenized_descriptions.append(tokenized_sentence)

data_df[preprocessed_description_field] = tokenized_descriptions
data_df.head(5)

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description,Preprocessed_Description
0,0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,Monumen Nasional (Monas) mulai dibangun pada 1...,Monumen Nasional Monas mulai dibangun pada 17 ...,monumen nasional monas mulai bangun pada 17 ag...
1,1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,Kota Tua Jakarta dapat dijadikan tujuan wisata...,Kota Tua dapat dijadikan tujuan kamu ketika be...,kota tua jakarta dapat jadi tuju wisata kamu k...
2,2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,Wahana yang ada di kelompokkan menjadi: Baca j...,Dunia Fantasi Wahana yang ada di kelompokkan m...,wahana yang ada di kelompok jadi baca juga daf...
3,3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,Bisa dibilang kalau Taman Mini Indonesia Indah...,Taman Mini Indonesia Indah TMII Bisa dibilang ...,bisa bilang kalau taman mini indonesia indah a...
4,4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,Atlantis Water Adventures mengajak para pengun...,Atlantis Water Adventure Adventures mengajak p...,atlantis water adventures ajak para kunjung un...


In [245]:
data_df[city_field].unique()

array(['Jakarta', 'Yogyakarta', 'Bandung', 'Semarang', 'Surabaya'],
      dtype=object)

## Data Modelling

In [246]:
def pickle_dump(model, model_name):
  pickle.dump(model, open(model_name, 'wb'))

### Based on User Query

Make Machine Learning Model (KNN Recommendation System).

In [284]:
def train_description(description_data_df):
  from nltk.corpus import stopwords
  stopwords = stopwords.words("indonesian")
  tf_idf_vectorizer = TfidfVectorizer(stop_words = stopwords)
  vector_components = tf_idf_vectorizer.fit_transform(description_data_df)
  index_to_word_mapping = tf_idf_vectorizer.get_feature_names()

  pickle_dump(tf_idf_vectorizer, "tf_idf_vectorizer_descriptions.pkl")

  return vector_components

def train_model_for_user_query(data_df):
  dataset_df = data_df

  description_trained_vector_components = train_description(
    dataset_df[preprocessed_summarized_description_field]
  )


  nearest_neighbors = NearestNeighbors(n_neighbors = 10)
  nearest_neighbors.fit(description_trained_vector_components)
  pickle_dump(nearest_neighbors, "tourism_place_user_query_nearest_neighbors.pkl")

  return description_trained_vector_components

all_vector_components = train_model_for_user_query(
    data_df
)

print(f"Current Shape: {all_vector_components.shape}")

Current Shape: (437, 6317)


  'stop_words.' % sorted(inconsistent))


### Based on User Location

Make Machine Learning Model (KNN Recommendation System).

In [285]:
def train_city(city_df):
  count_vectorizer = CountVectorizer()
  city_vector_components = count_vectorizer.fit_transform(city_df)
  pickle_dump(count_vectorizer, "city_count_vectorizer.pkl")
  return city_vector_components

def train_location(latitude_df, longitude_df):
  dataset = data_df
  lat_df = data_df[latitude_field]
  long_df = data_df[longitude_field]
  return lat_df, long_df

def train_model_for_user_location(data_df):
  # Preprocess Dataset
  dataset_df = data_df
  # categories_trained_vector_components = train_categories(dataset_df[category_field])
  city_trained_vector_components = train_city(dataset_df[city_field])
  latitude_trained_vector_components, longitude_trained_vector_components = train_location(dataset_df[latitude_field], dataset_df[longitude_field])
  latitude_trained_vector_components = np.array([latitude_trained_vector_components]).T
  longitude_trained_vector_components = np.array([longitude_trained_vector_components]).T

  all_vector_components = hstack([# categories_trained_vector_components,
                                  city_trained_vector_components,
                                  # description_trained_vector_components,
                                  longitude_trained_vector_components,
                                  latitude_trained_vector_components], format = 'csr')

  # Train the Model
  nearest_neighbors = NearestNeighbors(n_neighbors = 10)
  nearest_neighbors.fit(all_vector_components)
  pickle_dump(nearest_neighbors, "tourism_place_user_location_nearest_neighbors.pkl")
  
  return all_vector_components

all_vector_components = train_model_for_user_location(
    data_df
)

print(f"Current Shape: {all_vector_components.shape}")

Current Shape: (437, 7)


## Recommendation Time!

In [286]:
def pickle_load(file_name):
  return pickle.load(open(file_name, 'rb'))

def transform_to_vector(preprocessing_vector_model_name, data):
  vectorizer = pickle_load(preprocessing_vector_model_name)
  categories_vector_components = vectorizer.transform(data)
  return categories_vector_components

def recommend_travelling_places_using_knn(all_vector_components, model_name):
  dataset = data_df
  k_nearest_neighbors = pickle_load(model_name)
  k_nearest_neighbors_scores = k_nearest_neighbors.kneighbors(all_vector_components)
  
  return k_nearest_neighbors_scores

def get_top_n_recommendations_based_on_similarity_scores(df, top_n_indexes):
  top_n_df = df.iloc[top_n_indexes]
  return top_n_df

### User Query

Make a Sample Data

In [331]:
sample_description = "Taman Lalu Lintas Ade Irma Suryani Nasution"
sample_place_name = sample_description

In [332]:
def transform_description(sample_description):
  return transform_to_vector("tf_idf_vectorizer_descriptions.pkl", sample_description)
  
def transform(sample_place_name, sample_description):
  description_vector_component = transform_description([sample_description])
  # place_name_vector_component = transform_place_name([sample_place_name])

  # all_vector_components = hstack([place_name_vector_component,
  #                                 description_vector_component], format = 'csr')

  return description_vector_component

all_vector_components = transform(
    sample_place_name,
    sample_description,
)

top_n_distances, top_n_indexes_ranking = recommend_travelling_places_using_knn(
    all_vector_components, 
    "tourism_place_user_query_nearest_neighbors.pkl"
)

print(f"Current Shape: {all_vector_components.shape}")
print(f"Top N Distances shape: {top_n_distances.shape}")
print(f"K nearest neighbors scores: {top_n_indexes_ranking.shape}")

print(top_n_distances)
print(top_n_indexes_ranking.flatten())

Current Shape: (1, 6317)
Top N Distances shape: (1, 10)
K nearest neighbors scores: (1, 10)
[[1.00111186 1.38371153 1.38446355 1.38539839 1.38581057 1.39211797
  1.39304842 1.39312526 1.3953358  1.39901876]]
[239 346 250 352 394 292 433 392  56 402]


  'stop_words.' % sorted(inconsistent))


In [333]:
get_top_n_recommendations_based_on_similarity_scores(data_df, top_n_indexes_ranking.flatten())

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description,Preprocessed_Description
239,240,240,Taman Lalu Lintas Ade Irma Suryani Nasution,Taman Lalu-lintas Ade Irma Suryani adalah sebu...,Taman Hiburan,Bandung,7000,4.4,60.0,"{'lat': -6.911211, 'lng': 107.6133389}",-6.911211,107.613339,Wisata Taman Ade Irma Suryani Nasution (Taman ...,Taman Lalu Lintas Ade Irma Suryani Nasution Ta...,wisata taman ade irma suryani nasution taman l...
346,349,347,Taman Pandanaran,"Dalam sejarah yang tercatat, dulunya tempat in...",Taman Hiburan,Semarang,0,4.4,45.0,"{'lat': -6.987793600000001, 'lng': 110.4172262}",-6.987794,110.417226,Taman Srigunting adalah salah satu yang paling...,Taman Pandanaran adalah salah satu yang paling...,taman srigunting adalah salah satu yang paling...
250,252,251,Taman Lansia,Berlibur santai di akhir pekan cocok dilakukan...,Taman Hiburan,Bandung,0,4.4,,"{'lat': -6.9021326, 'lng': 107.6209387}",-6.902133,107.620939,Sedikit bergeser dari kota SBY dan masih di da...,Taman Lansia Sedikit bergeser dari SBY masih d...,sedikit geser dari kota sby dan masih di daera...
352,355,353,Taman Srigunting,Merupakan salah satu landmark di Kawasan Kota ...,Taman Hiburan,Semarang,0,4.7,,"{'lat': -6.9681728, 'lng': 110.4278262}",-6.968173,110.427826,Taman Srigunting adalah salah satu yang paling...,Taman Srigunting adalah salah satu yang paling...,taman srigunting adalah salah satu yang paling...
394,398,395,Taman Prestasi,Taman Prestasi Surabaya merupakan salah satu t...,Taman Hiburan,Surabaya,0,4.6,,"{'lat': -7.2614722, 'lng': 112.7428284}",-7.261472,112.742828,Taman Prestasi merupakan taman kota yang terle...,Taman Prestasi merupakan yang terletak di Tang...,taman prestasi rupa taman kota yang letak di t...
292,295,293,Taman Badak,Taman Badak ini baru saja diresmikan pada tang...,Taman Hiburan,Bandung,0,4.5,,"{'lat': -6.9132752, 'lng': 107.6094908}",-6.913275,107.609491,Liburan ke Bandung nggak pas kalau berburu tam...,Taman Badak Liburan ke nggak pas kalau berburu...,libur ke bandung nggak pas kalau buru taman hi...
433,438,434,Taman Bungkul,Taman Bungkul adalah taman wisata kota yang te...,Taman Hiburan,Surabaya,0,4.6,,"{'lat': -7.291346799999999, 'lng': 112.7398218}",-7.291347,112.739822,"Tidak mengherankan, karena di sini pengunjung ...",Taman Bungkul Tidak mengherankan karena di sin...,tidak heran karena di sini kunjung dapat duduk...
392,396,393,Taman Harmoni Keputih,Tempat tersebut ialah Taman Hatmoni Keputih Su...,Cagar Alam,Surabaya,0,4.4,60.0,"{'lat': -7.2952211, 'lng': 112.8035603}",-7.295221,112.80356,Salah satu tempat bersantai di Surabaya yang b...,Taman Harmoni Keputih Salah satu tempat bersan...,salah satu tempat santai di surabaya yang bany...
56,56,57,Taman Lapangan Banteng,"Lapangan Banteng, dulu bernama Waterlooplein (...",Taman Hiburan,Jakarta,0,4.7,,"{'lat': -6.170554999999999, 'lng': 106.8350378}",-6.170555,106.835038,"Selanjutnya, bagi pengunjung yang membawa anak...",Taman Lapangan Banteng Selanjutnya bagi pengun...,lanjut bagi kunjung yang bawa anak bisa guna a...
402,406,403,Taman Barunawati,Taman Barunawati yang lokasinya berada di kota...,Taman Hiburan,Surabaya,0,4.2,30.0,"{'lat': -7.222055899999998, 'lng': 112.7319967}",-7.222056,112.731997,Tempat wisata di Surabaya ini memang memiliki ...,Taman Barunawati Tempat di ini memang memiliki...,tempat wisata di surabaya ini memang milik ars...


### User Location

Make a Sample Data

In [334]:
# sample_categories = "Taman Hiburan"
sample_cities = "Yogyakarta Jakarta"
sample_lat = -6.302
sample_long = 106.90

In [335]:
def transform_city(sample_cities):
  return transform_to_vector("city_count_vectorizer.pkl", sample_cities)

def transform_description(sample_description):
  return transform_to_vector("tf_idf_vectorizer_descriptions.pkl", sample_description)

def transform(sample_cities, 
              sample_latitude, 
              sample_longitude):
  
  # categories_vector_component = transform_categories([sample_categories])
  city_vector_component = transform_city([sample_cities])

  all_vector_components = hstack([city_vector_component,
                                  sample_longitude,
                                  sample_latitude], format = 'csr')
  
  return all_vector_components

all_vector_components = transform(
    # sample_categories,
    sample_cities,
    sample_lat,
    sample_long,
)

top_n_distances, top_n_indexes_ranking = recommend_travelling_places_using_knn(
    all_vector_components,
    "tourism_place_user_location_nearest_neighbors.pkl",
)

print(f"Current Shape: {all_vector_components.shape}")
print(f"Top N Distances shape: {top_n_distances.shape}")
print(f"K nearest neighbors scores: {top_n_indexes_ranking.shape}")

# print(top_n_distances)
# print(top_n_indexes_ranking.flatten())

Current Shape: (1, 7)
Top N Distances shape: (1, 10)
K nearest neighbors scores: (1, 10)


In [336]:
get_top_n_recommendations_based_on_similarity_scores(data_df, top_n_indexes_ranking.flatten())

Unnamed: 0,Index,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Summarized_Description,Preprocessed_Summarized_Description,Preprocessed_Description
72,72,73,Museum Perangko,Museum perangko dibangun dengan bentuk banguna...,Budaya,Jakarta,5000,4.4,,"{'lat': -6.304184100000001, 'lng': 106.9017725}",-6.304184,106.901773,"Seperti halnya, museum perangko yang ada di ka...",Museum Perangko Seperti halnya yang ada di Min...,seperti hal museum perangko yang ada di kawasa...
3,3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,Bisa dibilang kalau Taman Mini Indonesia Indah...,Taman Mini Indonesia Indah TMII Bisa dibilang ...,bisa bilang kalau taman mini indonesia indah a...
61,61,62,Taman Legenda Keong Emas,Taman Legenda Keong Emas merupakan salah satu ...,Taman Hiburan,Jakarta,30000,4.5,,"{'lat': -6.303904200000001, 'lng': 106.8936219}",-6.303904,106.893622,Kawasan Petualangan Dinosaurus Taman Legenda K...,Taman Legenda Keong Emas Petualangan Dinosauru...,kawasan tualang dinosaurus taman legenda keong...
74,74,75,SnowBay Waterpark,Waterpark yang menghadirkan wahana imajinatif ...,Taman Hiburan,Jakarta,180000,4.3,,"{'lat': -6.299271800000001, 'lng': 106.89151}",-6.299272,106.89151,Wisata SnowBay Waterpark di Cipayung DKI Jakar...,SnowBay Waterpark di Cipayung DKI Timur adalah...,wisata snowbay waterpark di cipayung dki jakar...
53,53,54,Taman Agrowisata Cilangkap,Taman seluas sekitar 19 hektar ini dimiliki ol...,Taman Hiburan,Jakarta,0,4.2,,"{'lat': -6.340151399999999, 'lng': 106.9012561}",-6.340151,106.901256,Jika Anda merindukan sebuah taman yang luas le...,Taman Agrowisata Cilangkap Jika Anda merinduka...,jika anda rindu buah taman yang luas lengkap d...
27,27,28,Wisata Agro Edukatif Istana Susu Cibugary,Kawasan Wisata Agro Edukatif Istana Susu â€œCi...,Taman Hiburan,Jakarta,35000,4.5,,"{'lat': -6.3568093, 'lng': 106.9062371}",-6.356809,106.906237,Kali ini kami akan merekomendasikan beberapa d...,Wisata Agro Edukatif Istana Susu Cibugary ini ...,kali ini kami akan rekomendasi beberapa daftar...
36,36,37,Bumi Perkemahan Cibubur,Bumi Perkemahan dan Graha Wisata Pramuka Cibub...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.360248700000001, 'lng': 106.893842}",-6.360249,106.893842,Menyadari pentingnya fungsi RTH membuat Pemeri...,Bumi Perkemahan Cibubur Menyadari pentingnya f...,sadar penting fungsi rth buat perintah dki-jak...
70,70,71,Cibubur Garden Diary (Cibugary),Cibubur Garden Dairy atau biasa dikenal degan ...,Cagar Alam,Jakarta,50000,4.5,,"{'lat': -6.379996900000001, 'lng': 106.8949793}",-6.379997,106.894979,Cibubur Garden Diary merupakan objek wisata ed...,Cibubur Garden Diary Cibugary merupakan objek ...,cibubur garden diary rupa objek wisata edukasi...
78,78,79,Taman Spathodea,Objek Wisata Taman Spathodea di Jagakarsa DKI ...,Taman Hiburan,Jakarta,0,4.6,30.0,"{'lat': -6.3226133, 'lng': 106.8241871}",-6.322613,106.824187,Objek Wisata Taman Spathodea di Jagakarsa DKI ...,Taman Spathodea Objek di Jagakarsa DKI adalah ...,objek wisata taman spathodea di jagakarsa dki ...
76,76,77,Taman Hutan Tebet,"Taman Tebet dibangun tahun 1960, taman ini sem...",Taman Hiburan,Jakarta,0,4.4,60.0,"{'lat': -6.237140900000001, 'lng': 106.8526505}",-6.237141,106.85265,simak juga: tempat wisata di jakarta Rute menu...,Taman Hutan Tebet simak juga tempat di Rute me...,simak juga tempat wisata di jakarta rute tuju ...
