In [1]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt

In [2]:
rating=pd.read_csv('tourism_rating.csv')

In [3]:
rating

Unnamed: 0,User_Id,Place_Id,Place_Ratings,Location,Age
0,1,179,3,"Semarang, Jawa Tengah",20
1,1,344,2,"Semarang, Jawa Tengah",20
2,1,5,5,"Semarang, Jawa Tengah",20
3,1,373,3,"Semarang, Jawa Tengah",20
4,1,101,4,"Semarang, Jawa Tengah",20
...,...,...,...,...,...
9995,300,425,2,"Ponorogo, Jawa Timur",26
9996,300,64,4,"Ponorogo, Jawa Timur",26
9997,300,311,3,"Ponorogo, Jawa Timur",26
9998,300,279,4,"Ponorogo, Jawa Timur",26


#### group by user

In [4]:
placename=pd.read_csv('tourism_with_id.csv')
placename.sample(1)

Unnamed: 0,Place_Id,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Unnamed: 11,Unnamed: 12
26,27,Sea World,Seaworld Indonesia adalah sebuah miniatur peso...,Taman Hiburan,Jakarta,115000,4.5,180.0,"{'lat': -6.126477500000001, 'lng': 106.842963}",-6.126478,106.842963,,27


In [5]:
rating_merged=pd.merge(rating, placename, how='left', on='Place_Id')
rating_merged.head(5)

Unnamed: 0,User_Id,Place_Id,Place_Ratings,Location,Age,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Unnamed: 11,Unnamed: 12
0,1,179,3,"Semarang, Jawa Tengah",20,Candi Ratu Boko,Situs Ratu Baka atau Candi Boko (Hanacaraka:ꦕꦤ...,Budaya,Yogyakarta,75000,4.6,90.0,"{'lat': -7.7705416, 'lng': 110.4894158}",-7.770542,110.489416,,179
1,1,344,2,"Semarang, Jawa Tengah",20,Pantai Marina,"Pantai Marina (bahasa Jawa: ꦥꦱꦶꦱꦶꦂ​ꦩꦫꦶꦤ, trans...",Bahari,Semarang,3000,4.1,,"{'lat': -6.948877, 'lng': 110.3893285}",-6.948877,110.389329,,344
2,1,5,5,"Semarang, Jawa Tengah",20,Atlantis Water Adventure,Atlantis Water Adventure atau dikenal dengan A...,Taman Hiburan,Jakarta,94000,4.5,60.0,"{'lat': -6.12419, 'lng': 106.839134}",-6.12419,106.839134,,5
3,1,373,3,"Semarang, Jawa Tengah",20,Museum Kereta Ambarawa,Museum Kereta Api Ambarawa (bahasa Inggris: In...,Budaya,Semarang,10000,4.5,60.0,"{'lat': -7.264598899999997, 'lng': 110.4046017}",-7.264599,110.404602,,373
4,1,101,4,"Semarang, Jawa Tengah",20,Kampung Wisata Sosro Menduran,Kampung wisata Sosromenduran merupakan kampung...,Budaya,Yogyakarta,0,4.0,,"{'lat': -7.792189999999999, 'lng': 110.362151}",-7.79219,110.362151,,101


In [6]:
placeratings = rating_merged.groupby('User_Id')['Place_Ratings']
placeratings = pd.DataFrame(placeratings.count())
placeratings

Unnamed: 0_level_0,Place_Ratings
User_Id,Unnamed: 1_level_1
1,30
2,29
3,30
4,26
5,33
...,...
296,36
297,36
298,30
299,39


In [7]:
train_data, test_data = train_test_split(rating, test_size =.20, random_state=42)
print(f'Data Train shape: {train_data.shape}')
print(f'Data Test shape: {test_data.shape}')

Data Train shape: (8000, 5)
Data Test shape: (2000, 5)


# Mean Ratings Predictions

In [8]:
class RatingsRecommender():
    def __init__(self, train_data, test_data, user_id, place_id):
        self.train_data = train_data
        self.test_data = test_data
        self.user_id = user_id
        self.place_id = place_id
        
    #Membuat model rekomendasi
    def model_fit(self):
        #Membuat score rekomendasi dari popularitas
        grouped_train = train_data.groupby([self.place_id]).agg({self.user_id: 'count'}).reset_index()
        grouped_train['score']=grouped_train['User_Id']
        grouped_train.drop('User_Id',axis=1,inplace=True)
        #Sorting berdasarkan score
        sorted_train = grouped_train.sort_values(['score', self.place_id], ascending = [0,1])
        #Ranking tempat berdasarkan score
        sorted_train['Rank'] = sorted_train['score'].rank(ascending=False, method='first')
        #Top places
        self.top_places = sorted_train.head(50)

    #Hasil Rekomendasi
    def recommendations(self, user_id, City, n):    
        #Filter hanya place yang dirating user
        ratedPlace = train_data[train_data[self.user_id] == user_id][self.place_id] 
        recom_result = self.top_places[~self.top_places[self.place_id].isin(ratedPlace)]
        recom_result.insert(0,'UserId',user_id)
        recom_result_merged=pd.merge(recom_result, placename, how='left', on='Place_Id')
        recom_result_merged= recom_result_merged.loc[recom_result_merged['City']==City]
        return recom_result_merged.head(n)
        
    def predictions(self):        
        ratings_mean = pd.DataFrame(train_data.groupby(self.place_id)['Place_Ratings'].mean())
        pred_ratings = []            
        #Membuat prediksi dengan rata-rata rating
        for df in self.test_data.values:
            if(df[1] in (ratings_mean.index)):
                pred_ratings.append(ratings_mean.loc[df[1]])
            else:
                pred_ratings.append(0)
        pred=pd.DataFrame(pred_ratings)
        self.predictions_result=pred
        return print(f'Prediction results:\n{pd.DataFrame(pred)}') 
    
    def eval(self):
        mse = mean_squared_error(self.test_data['Place_Ratings'], self.predictions_result)
        rmse = sqrt(mse)
        return print(f'MSE: {mse}\nRMSE: {rmse}')

In [9]:
result = RatingsRecommender(train_data=train_data, test_data=test_data, user_id='User_Id', place_id='Place_Id')

In [10]:
result.model_fit()

In [11]:
result.predictions()

Prediction results:
     Place_Ratings
207       2.809524
268       3.055556
103       3.000000
119       2.722222
288       2.750000
..             ...
47        3.041667
292       3.533333
336       3.360000
176       3.500000
222       2.857143

[2000 rows x 1 columns]


In [12]:
result.eval()

MSE: 1.974896680506201
RMSE: 1.4053101723485106


In [13]:
result.recommendations('1','Jakarta',10)

Unnamed: 0,UserId,Place_Id,score,Rank,Place_Name,Description,Category,City,Price,Rating,Time_Minutes,Coordinate,Lat,Long,Unnamed: 11,Unnamed: 12
5,1,55,28,6.0,Klenteng Jin De Yuan,"Kelenteng Kim Tek Le, atau yang lebih dikenal ...",Tempat Ibadah,Jakarta,0,4.5,,"{'lat': -6.144473700000001, 'lng': 106.8126018}",-6.144474,106.812602,,55
11,1,29,27,12.0,Wisata Kuliner Pecenongan,Pecenongan merupakan salah satu surga kuliner ...,Pusat Perbelanjaan,Jakarta,0,5.0,,"{'lat': -6.166788700000001, 'lng': 106.8265261}",-6.166789,106.826526,,29
12,1,56,27,13.0,Pantai Ancol,Pantai Ancol merupakan kawasan wisata yang mer...,Bahari,Jakarta,25000,4.4,,"{'lat': -6.1194215, 'lng': 106.8502435}",-6.119421,106.850244,,56
26,1,68,25,27.0,Pasar Petak Sembilan,"Pecinan Glodok, tepatnya di Pasar Petak Sembil...",Pusat Perbelanjaan,Jakarta,0,4.4,45.0,"{'lat': -6.143672200000001, 'lng': 106.8129398}",-6.143672,106.81294,,68
41,1,13,24,42.0,Pulau Pramuka,Pulau Pramuka merupakan salah satu pulau yang ...,Bahari,Jakarta,5000,4.2,,"{'lat': -5.745962, 'lng': 106.6136577}",-5.745962,106.613658,,13
42,1,39,24,43.0,Museum Macan (Modern and Contemporary Art in N...,Museum Seni Modern dan Kontemporer di Nusantar...,Budaya,Jakarta,100000,4.5,120.0,"{'lat': -6.190942, 'lng': 106.767622}",-6.190942,106.767622,,39
43,1,47,24,44.0,Taman Situ Lembang,Taman Situ Lembang adalah sebuah taman kota ya...,Taman Hiburan,Jakarta,0,4.5,,"{'lat': -6.1978861, 'lng': 106.8346708}",-6.197886,106.834671,,47
