# Recommendation System Item Based Collaborative Filtering

In [9]:
! pip install surprise
! pip install nlp-id

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


Library Import

In [10]:
import pandas as pd
import numpy as np
import sklearn

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nlp_id.tokenizer import Tokenizer
from nlp_id.lemmatizer import Lemmatizer

import surprise

## Data Loading

In [11]:
dataset_path = "tourism_with_id.csv"
data_df = pd.read_csv(dataset_path, delimiter = ',')

data_df.drop(["Unnamed: 11", "Unnamed: 12"], axis = 1, inplace = True)
data_df.head(5)

Unnamed: 0,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long
0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153
1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125
2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538
3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156
4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134


In [12]:
data_df.describe()

Unnamed: 0,Place_Id,Price,Rating,Time_Minutes,Lat,Long
count,437.0,437.0,437.0,205.0,437.0,437.0
mean,219.0,24652.173913,4.442792,82.609756,-7.095438,109.160142
std,126.295289,66446.374709,0.208587,52.872339,0.727241,1.962848
min,1.0,0.0,3.4,10.0,-8.197894,103.931398
25%,110.0,0.0,4.3,45.0,-7.74959,107.578369
50%,219.0,5000.0,4.5,60.0,-7.020524,110.237468
75%,328.0,20000.0,4.6,120.0,-6.829411,110.431869
max,437.0,900000.0,5.0,360.0,1.07888,112.821662


Attribute Variables

In [19]:
description_field = "Description"
category_field = "Category"
tokenized_description_field = "Tokenized_Description"

## Data Preprocessing

Lemmatize Text.

In [20]:
indo_lemmatizer = Lemmatizer()

index = 0

tokenized_descriptions = []

for index, row in data_df.iterrows():
  description = row[description_field]
  tokenized_sentence = indo_lemmatizer.lemmatize(description)
  tokenized_descriptions.append(tokenized_sentence)

data_df[tokenized_description_field] = tokenized_descriptions
data_df.head(5)

Unnamed: 0,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Tokenized_text,Tokenized_Description
0,1,Monumen Nasional,Monumen Nasional atau yang populer disingkat d...,Budaya,Jakarta,20000,4.6,15.0,"{'lat': -6.1753924, 'lng': 106.8271528}",-6.175392,106.827153,gereja katolik lahir santa perawan maria rupa ...,monumen nasional atau yang populer singkat den...
1,2,Kota Tua,"Kota tua di Jakarta, yang juga bernama Kota Tu...",Budaya,Jakarta,0,4.6,90.0,"{'lat': -6.137644799999999, 'lng': 106.8171245}",-6.137645,106.817125,gereja katolik lahir santa perawan maria rupa ...,kota tua di jakarta yang juga nama kota tua pu...
2,3,Dunia Fantasi,Dunia Fantasi atau disebut juga Dufan adalah t...,Taman Hiburan,Jakarta,270000,4.6,360.0,"{'lat': -6.125312399999999, 'lng': 106.8335377}",-6.125312,106.833538,gereja katolik lahir santa perawan maria rupa ...,dunia fantasi atau sebut juga dufan adalah tem...
3,4,Taman Mini Indonesia Indah (TMII),Taman Mini Indonesia Indah merupakan suatu kaw...,Taman Hiburan,Jakarta,10000,4.5,,"{'lat': -6.302445899999999, 'lng': 106.8951559}",-6.302446,106.895156,gereja katolik lahir santa perawan maria rupa ...,taman mini indonesia indah rupa suatu kawasan ...
4,5,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,gereja katolik lahir santa perawan maria rupa ...,atlantis water adventure atau kenal dengan atl...


Transform Sentence into TF-IDF Matrix.