In [1]:
# library for data analysis
import pandas as pd

from sklearn.feature_extraction.text import TfidfVectorizer 
from sklearn.metrics.pairwise import cosine_similarity 

import os 

## Data preparation

In [2]:
destination = pd.read_csv('../dataset/destination.csv')

In [3]:
# new variable for content based filtering 
place_id = destination['id'].unique().tolist()
place_name = destination['name_wisata'].unique().tolist()
place_category = destination['category'].tolist()

In [4]:
# dictionary for place_recommend
place_recommend = pd.DataFrame({
    'place_id' : place_id,
    'place_name' : place_name,
    'place_category' : place_category
})
place_recommend

Unnamed: 0,place_id,place_name,place_category
0,1,Monumen Nasional,Budaya
1,2,Kota Tua,Budaya
2,3,Dunia Fantasi,Taman Hiburan
3,4,Taman Mini Indonesia Indah (TMII),Taman Hiburan
4,5,Atlantis Water Adventure,Taman Hiburan
...,...,...,...
432,433,Museum Mpu Tantular,Budaya
433,434,Taman Bungkul,Taman Hiburan
434,435,Taman Air Mancur Menari Kenjeran,Taman Hiburan
435,436,Taman Flora Bratang Surabaya,Taman Hiburan


In [5]:
# data frame for content based filtering 
data_content = place_recommend
data_content

Unnamed: 0,place_id,place_name,place_category
0,1,Monumen Nasional,Budaya
1,2,Kota Tua,Budaya
2,3,Dunia Fantasi,Taman Hiburan
3,4,Taman Mini Indonesia Indah (TMII),Taman Hiburan
4,5,Atlantis Water Adventure,Taman Hiburan
...,...,...,...
432,433,Museum Mpu Tantular,Budaya
433,434,Taman Bungkul,Taman Hiburan
434,435,Taman Air Mancur Menari Kenjeran,Taman Hiburan
435,436,Taman Flora Bratang Surabaya,Taman Hiburan


In [6]:
tfidf= TfidfVectorizer()
tfidf.fit(data_content['place_name'])

In [7]:
# change data into matrix 
tfidf_matrix = tfidf.fit_transform(data_content['place_name'])
tfidf_matrix.shape

(437, 712)

In [8]:
# change vector TF-IDF into matrix 
tfidf_matrix.todense()

matrix([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]])

In [9]:
# calculate the cosine similairty between the place name and each entry in 
cosine_similarity = cosine_similarity(tfidf_matrix)
cosine_similarity

array([[1.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 1.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.07709925,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.07709925, 1.        ,
        0.13442662],
       [0.        , 0.        , 0.        , ..., 0.        , 0.13442662,
        1.        ]])

In [10]:
df_cosine_similarity = pd.DataFrame(cosine_similarity, index = data_content['place_name'], columns = data_content['place_name'])
print('Shape : ', df_cosine_similarity.shape)

Shape :  (437, 437)


## Model

In [11]:
# recommendation function for content based filtering 

def content_rec(place_name, similarity_data = df_cosine_similarity, items = data_content[['place_name', 'place_category']], k = 5) :
  index = similarity_data.loc[:, place_name].to_numpy().argpartition(range(-1, -k, -1))

  closest = similarity_data.columns[index[-1:-(k+2): -1]]
  closest = closest.drop(place_name, errors = 'ignore')

  return pd.DataFrame(closest).merge(items).head(k)

## Output

In [12]:
# test the function 
result = content_rec('Alun-Alun Kota Bandung')

In [13]:
destination = destination.rename(columns={'name_wisata' : 'place_name'})

In [14]:
merged_df = pd.merge(result, destination, on='place_name')
merged_df 

Unnamed: 0,place_name,place_category,id,description_wisata,category,destination_photo,city,price,rating,time_minutes,coordinate,destination_lat,destination_long
0,Alun Alun Selatan Yogyakarta,Taman Hiburan,96,Alunalun Selatan atau yang sekarang lebih dike...,Taman Hiburan,lala.png,Yogyakarta,0,4.6,60.0,"{'lat': -7.8116719, 'lng': 110.363238}",-7.81167,110.3632
1,Alun-alun Utara Keraton Yogyakarta,Budaya,125,Alunalun utara atau dalam Bahasa Jawa disebut ...,Budaya,lala.png,Yogyakarta,0,4.6,60.0,"{'lat': -7.803897499999998, 'lng': 110.3644232}",-7.8039,110.3644
2,Taman Balai Kota Bandung,Taman Hiburan,236,Taman Balai Kota Bandung merupakan sebuah tama...,Taman Hiburan,lala.png,Bandung,0,4.6,45.0,"{'lat': -6.912966000000001, 'lng': 107.6096031}",-6.91297,107.6096
3,Kota Mini,Taman Hiburan,333,Destinasi yang sangat menarik bernuansa eropa ...,Taman Hiburan,lala.png,Bandung,20000,4.4,,"{'lat': -6.818688799999999, 'lng': 107.6169403}",-6.81869,107.6169
4,Kota Tua,Budaya,2,Kota tua di Jakarta yang juga bernama Kota Tua...,Budaya,lala.png,Jakarta,5000,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.13764,106.8171
