# ITEM-BASED recommender system

# Packages

In [None]:
import pandas as pd 
import numpy as np
import warnings
warnings.filterwarnings('ignore')

import seaborn as sns
import matplotlib.pyplot as plt
import sklearn

# Dataset

In [None]:
data_skripsi=pd.read_csv('FIX_DATASKRIPSI_NEW.CSV', encoding= 'unicode_escape')
data_skripsi

In [None]:
ratings=data_skripsi[['User_id','Item_id','Rating']]
ratings

In [None]:
hotel=data_skripsi[['Item_id','Item_name','PostDate', 'Bintang_Hotel']]
hotel

In [None]:
unique_user = ratings.User_id.nunique(dropna = True)
unique_hotel = ratings.Item_id.nunique(dropna = True)
print("number of unique user:")
print(unique_user)
print("number of unique hotel:")
print(unique_hotel)

In [None]:
#Sparsity Data
total_ratings = unique_user*unique_hotel
rating_present = ratings.shape[0]

ratings_not_provided = total_ratings - rating_present 

print("ratings not provided berarti beberapa user tidak menginap dan memberikan rating di beberapa hotel")
print(ratings_not_provided)
print("Nilai Sparsity dari data :")
print(ratings_not_provided / total_ratings)

# Exploratory Data Analysis (EDA)

In [None]:
# Deskripsikan dataset
data_skripsi.describe()

# Lihat informasi dataset
data_skripsi.info()

In [None]:
# 1)plot ratings count which gives information about which rating(on scale of 0 t0 5) is more frequent
rating_hotel = pd.DataFrame(ratings.groupby('Rating').size(),columns=['count'])
rating_hotel

In [None]:
# plot a bar plot to visualise the ratings
rating_hotel_for_vis = rating_hotel
ax = rating_hotel_for_vis.reset_index().rename(columns = {'index':'Rating'}).plot(
    x='Rating',
    y='count',
    logy = True,
    kind='bar',
    title='count for each rating',
    figsize=(12,6)
)
ax.set_xlabel('Rating')
ax.set_ylabel('count of each rating')
print("Jumlah untuk setiap rating")

In [None]:
# 2) plot rating frequency of each hotel(how many time a movie has been rated)
hotel_freq = pd.DataFrame(ratings.groupby('Item_id').size(),columns=['count'])
hotel_freq.head()

In [None]:
# plot rating freq
ax  = hotel_freq.sort_values('count',ascending=False).reset_index(drop=True).plot(
      title='rating freq by item',
      figsize=(12,6),
      
)
ax.set_xlabel("item")
ax.set_ylabel("rating frequency")

In [None]:
user_htl = pd.DataFrame(ratings.groupby('User_id').size(),columns=['count'])
user_htl.head()

In [None]:
# plot rating freq
ax  = user_htl.sort_values('count',ascending=False).reset_index(drop=True).plot(
      title='rating freq by user',
      figsize=(12,6),
      
)
ax.set_xlabel("users")
ax.set_ylabel("rating frequency")

In [None]:
n_ratings = len(ratings)
n_hotel= ratings['Item_id'].nunique()
n_users = ratings['User_id'].nunique()

print(f"Number of ratings: {n_ratings}")
print(f"Number of unique Item id: {n_hotel}")
print(f"Number of unique users: {n_users}")
print(f"Average number of ratings per user: {round(n_ratings/n_users, 2)}")
print(f"Average number of ratings per hotel: {round(n_ratings/n_hotel, 2)}")

In [None]:
user_freq = ratings[['User_id', 'Item_id']].groupby('User_id').count().reset_index()
user_freq.columns = ['User_id', 'n_ratings']
user_freq.head()

In [None]:
sns.set_style("whitegrid")
plt.figure(figsize=(35,10))
plt.subplot()
ax = sns.countplot(x="Rating", data=ratings, palette="viridis")
plt.title("Distribution of hotel ratings")


In [None]:
plt.figure(figsize=(35,5))
plt.subplot(1,2,1)
ax = sns.kdeplot(user_freq['n_ratings'], shade=True, legend=False)
plt.axvline(user_freq['n_ratings'].mean(), color="k", linestyle="--")
plt.xlabel("# ratings per user")
plt.ylabel("density")

plt.title("Number of hotel rated per user")
plt.show()

In [None]:
skirpsii=data_skripsi.drop_duplicates('Item_name')
skirpsii.head()

In [None]:
# 2) plot rating frequency of each hotel(how many time a movie has been rated)
bintang_freq = pd.DataFrame(skirpsii.groupby('Bintang_Hotel').size(),columns=['count'])
bintang_freq.head()

In [None]:

plt.figure(figsize=(10,5))
plt.subplot()
ax = sns.countplot(x="Bintang_Hotel", data=skirpsii, color="orange")
plt.title("Distribution of Bintang Hotel")

# MATRIX

In [76]:
#creating the user-item interaction matrix
data_matrix_kosong = data_skripsi.pivot_table(index='Item_name', columns='User_Name', values='Rating').fillna('Kosong')
data_matrix_kosong.head(10)

User_Name,A,A AGUSLIMI SHAFIRA PUTRI AP,A Dian Christianti,AA,AAN AMBARYATI,AB,ABDUL WAHAB ISKANDAR,AC,ACHMAT LUTHFI YAKIM,AD,...,yuri yah,yusuf satriyo,yuyun sutaji,yuzon erman,zahra azzahra,zainul arifin,zanni erdiawan saputra,zevina Liemahassana,ziah nur aisjah,zuraidah fitri lubis
Item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
@HOM Premiere Timoho,Kosong,Kosong,Kosong,5.0,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,...,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong
ARTOTEL Suites Bianti - Yogyakarta,Kosong,Kosong,Kosong,5.0,Kosong,3.0,Kosong,Kosong,Kosong,4.0,...,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong
Abadi Hotel Malioboro Yogyakarta by Tritama Hospitality,5.0,Kosong,Kosong,3.6,Kosong,2.0,Kosong,Kosong,Kosong,Kosong,...,Kosong,Kosong,Kosong,4.0,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong
Adhisthana Hotel,Kosong,Kosong,Kosong,5.0,Kosong,Kosong,Kosong,Kosong,Kosong,1.6,...,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong
Airlangga Hotel,Kosong,Kosong,Kosong,4.0,Kosong,Kosong,4.0,Kosong,Kosong,Kosong,...,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong
Allstay Ecotel Yogyakarta,Kosong,Kosong,Kosong,5.0,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,...,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong
Amaris Hotel Malioboro,Kosong,Kosong,Kosong,5.0,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,...,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong
Arte Hotel Malioboro Yogyakarta,Kosong,Kosong,Kosong,4.0,Kosong,5.0,Kosong,Kosong,Kosong,Kosong,...,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong
Atrium Premiere Hotel Yogyakarta Ambarukmo,Kosong,Kosong,Kosong,4.0,Kosong,5.0,Kosong,Kosong,Kosong,Kosong,...,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong
Aveta Hotel Malioboro,Kosong,Kosong,Kosong,Kosong,Kosong,2.8,Kosong,Kosong,Kosong,3.6,...,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,Kosong,4.2,Kosong,Kosong


In [1]:
df = data_skripsi.pivot_table(index='Item_name', columns='User_Name', values='Rating').fillna(0)
df1 = df.copy()
df1.head()

NameError: name 'data_skripsi' is not defined

In [78]:
user_index = df1.columns.tolist()
user_index

['A',
 'A AGUSLIMI SHAFIRA PUTRI AP',
 'A Dian Christianti',
 'AA',
 'AAN AMBARYATI',
 'AB',
 'ABDUL WAHAB ISKANDAR',
 'AC',
 'ACHMAT LUTHFI YAKIM',
 'AD',
 'ADI PRANTA SEMBIRING KALOKO',
 'ADIADHA TRI ARIPRADINA pradina',
 'ADIANTO ADIANTO',
 'ADITYA APRIANTA',
 'ADNI MAIWASTIWI',
 'AE',
 'AF',
 'AFAN OKKY',
 'AFIAT NELLY',
 'AG',
 'AGENG PRATIWI',
 'AH',
 'AHMAD FIRDAUS',
 'AI',
 'AJ',
 'AJIANTO MULYAWAN',
 'AK',
 'AKBAR PRANANTA',
 'AL',
 'AL FATH FILSAFAH',
 'ALEX HENDRA SIREGAR',
 'ALFA SHALATINE TEWA',
 'AM',
 'AMC Hermawan Harry Nugroho',
 'AMELIA Dwi Rara Adistha',
 'AN',
 'ANDREANI SUCIANI TANZIHAH',
 'ANDRI SAPUTRO',
 'ANDRIANA KUMALA DEWI',
 'ANDRIE DARMA NUGROHO',
 'ANDY ANDY',
 'ANGGA KUSUMA WARDANA',
 'ANIS SETYANI',
 'ANITA DWI ASTUTI',
 'ANTONIUS FERNANDO KUSWARA',
 'ANWAR ANWAR',
 'AO',
 'AP',
 'AQ',
 'AQNI Hanifa',
 'AR',
 'ARIE SAEPULLOH',
 'ARIF BAYU AJI',
 'ARIFIN MAKFUL',
 'ARIS ARIYANTO',
 'ARNO ALAM',
 'ARTIKA Sutopo',
 'ARYA JABAR',
 'AS',
 'AT',
 'AU',
 'AULIA

# Cosine Similarity

In [138]:
# define the model
from sklearn.neighbors import NearestNeighbors

In [139]:
knn = NearestNeighbors(metric='cosine', algorithm='brute')
knn.fit(df.values)
distances, indices = knn.kneighbors(df.values, n_neighbors=10)

In [140]:
indices

array([[  0,  36,  61, ..., 101,  49,  40],
       [  1,  36,  52, ...,  27,   4,  12],
       [  2, 104,  36, ...,  11,  61,  76],
       ...,
       [107,   5,   8, ...,  41, 101,  70],
       [108,  52,   5, ...,  87,  23,  59],
       [109,  31,  26, ...,   3,   2,  60]], dtype=int64)

In [141]:
user_index = df.columns.tolist()
user_index

['A',
 'A AGUSLIMI SHAFIRA PUTRI AP',
 'A Dian Christianti',
 'AA',
 'AAN AMBARYATI',
 'AB',
 'ABDUL WAHAB ISKANDAR',
 'AC',
 'ACHMAT LUTHFI YAKIM',
 'AD',
 'ADI PRANTA SEMBIRING KALOKO',
 'ADIADHA TRI ARIPRADINA pradina',
 'ADIANTO ADIANTO',
 'ADITYA APRIANTA',
 'ADNI MAIWASTIWI',
 'AE',
 'AF',
 'AFAN OKKY',
 'AFIAT NELLY',
 'AG',
 'AGENG PRATIWI',
 'AH',
 'AHMAD FIRDAUS',
 'AI',
 'AJ',
 'AJIANTO MULYAWAN',
 'AK',
 'AKBAR PRANANTA',
 'AL',
 'AL FATH FILSAFAH',
 'ALEX HENDRA SIREGAR',
 'ALFA SHALATINE TEWA',
 'AM',
 'AMC Hermawan Harry Nugroho',
 'AMELIA Dwi Rara Adistha',
 'AN',
 'ANDREANI SUCIANI TANZIHAH',
 'ANDRI SAPUTRO',
 'ANDRIANA KUMALA DEWI',
 'ANDRIE DARMA NUGROHO',
 'ANDY ANDY',
 'ANGGA KUSUMA WARDANA',
 'ANIS SETYANI',
 'ANITA DWI ASTUTI',
 'ANTONIUS FERNANDO KUSWARA',
 'ANWAR ANWAR',
 'AO',
 'AP',
 'AQ',
 'AQNI Hanifa',
 'AR',
 'ARIE SAEPULLOH',
 'ARIF BAYU AJI',
 'ARIFIN MAKFUL',
 'ARIS ARIYANTO',
 'ARNO ALAM',
 'ARTIKA Sutopo',
 'ARYA JABAR',
 'AS',
 'AT',
 'AU',
 'AULIA

In [142]:
distances.tolist()

[[0.0,
  0.8571433438198809,
  0.8611054565210775,
  0.8688543818659077,
  0.869736824532931,
  0.8800017494097527,
  0.8804394100260928,
  0.887111988436904,
  0.8872870077667018,
  0.8879913881263506],
 [0.0,
  0.8184074682958716,
  0.8420453378080388,
  0.8421653995819647,
  0.8470463999481007,
  0.8486706261027888,
  0.8499382280285122,
  0.8516278059439173,
  0.8523673803994579,
  0.8530204218424149],
 [2.220446049250313e-16,
  0.8420931627571486,
  0.842219922379772,
  0.8533439500798518,
  0.8536747115237613,
  0.8566573819716634,
  0.8567425955801491,
  0.8570976009977654,
  0.8575038394576836,
  0.857945813856289],
 [0.0,
  0.8138970455317558,
  0.8165372562331302,
  0.8321408208958025,
  0.8326649136283187,
  0.8333722321573116,
  0.8370228499147171,
  0.8406237523812817,
  0.840940018367693,
  0.8416727615984255],
 [0.0,
  0.8191612613095052,
  0.8379684089174322,
  0.8504837787472995,
  0.8523673803994579,
  0.8544006011321377,
  0.8581760817407591,
  0.8599505147984631,
  

In [143]:
index_for_hotel = df.index.tolist()
index_for_hotel

[' @HOM Premiere Timoho',
 'ARTOTEL Suites Bianti - Yogyakarta',
 'Abadi Hotel Malioboro Yogyakarta by Tritama Hospitality',
 'Adhisthana Hotel ',
 'Airlangga Hotel',
 'Allstay Ecotel Yogyakarta',
 'Amaris Hotel Malioboro',
 'Arte Hotel Malioboro Yogyakarta',
 'Atrium Premiere Hotel Yogyakarta Ambarukmo',
 'Aveta Hotel Malioboro',
 'Banana Homestay Prawirodirjan',
 'Best City Hotel',
 'Brothers Inn Babarsari',
 'Burza Hotel Yogyakarta',
 'Cavinton Hotel Yogyakarta by Tritama Hospitality',
 'Chrissant Hotel',
 'Crystal Lotus Hotel Yogyakarta',
 "D'Senopati Malioboro Grand Hotel",
 'DKayon Hotel Demangan Yogyakarta',
 'Dem Ayem Heritage Guest House',
 'Density Living',
 'Dermaga Keluarga Hotel Wirobrajan',
 'EDU Hostel',
 'Eastparc Hotel Yogyakarta',
 'Eclipse Hotel',
 'Forriz Hotel Yogyakarta',
 'Front One Cabin Malioboro Yogyakarta',
 'GAIA Cosmo Hotel',
 'Grand Ambarrukmo Yogyakarta',
 'Grand Inna Malioboro',
 'Grand Kangen Hotel Urip Sumoharjo Yogyakarta',
 'Grand Keisha Yogyakarta',

In [153]:
index_for_hotel = df.index.tolist().index('Grand Zuri Malioboro Yogyakarta') # it returns 0
sim_hotel = indices[index_for_hotel].tolist() # make list for similar movies
hotel_similarity = distances[index_for_hotel].tolist() # the list for distances of similar movies
id_hotel = sim_hotel.index(index_for_hotel) # get the position of the movie itself in indices and distances
sim_hotel.remove(index_for_hotel) # remove the movie itself in indices
hotel_similarity.pop(id_hotel) # remove the movie itself in distances

print('The Nearest hotel to Grand Zuri Malioboro Yogyakarta:', sim_hotel)
print('The Similarity from Grand Zuri Malioboro Yogyakarta:', hotel_similarity)

The Nearest hotel to Grand Zuri Malioboro Yogyakarta: [22, 8, 14, 26, 5, 36, 55, 100, 102]
The Similarity from Grand Zuri Malioboro Yogyakarta: [0.8476903654193607, 0.8580900243793789, 0.8736078502904916, 0.8743211404596987, 0.8748282161212771, 0.8760537931912133, 0.8775212691631773, 0.8817595164651124, 0.8840326305395125]


In [145]:
hotel_distances

[0.8580900243793789,
 0.8736078502904916,
 0.8743211404596987,
 0.8748282161212771,
 0.8760537931912133,
 0.8775212691631773,
 0.8817595164651124,
 0.8840326305395125]

In [146]:
hotel_distances = [1-x for x in hotel_distances] # # inverse distance  
hotel_distances

[0.14190997562062113,
 0.1263921497095084,
 0.12567885954030134,
 0.12517178387872285,
 0.12394620680878665,
 0.1224787308368227,
 0.11824048353488759,
 0.11596736946048747]

In [147]:
sim_hotel

[22, 8, 14, 26, 5, 36, 55, 100, 102]

In [148]:
df.iloc[sim_hotel[0]]

User_Name
A                              3.0
A AGUSLIMI SHAFIRA PUTRI AP    0.0
A Dian Christianti             0.0
AA                             0.0
AAN AMBARYATI                  0.0
                              ... 
zainul arifin                  0.0
zanni erdiawan saputra         0.0
zevina Liemahassana            0.0
ziah nur aisjah                0.0
zuraidah fitri lubis           0.0
Name: EDU Hostel, Length: 6654, dtype: float64

Prediksi Grand Zuri oleh user AA

In [149]:
sum(hotel_similarity)

7.847904806029222

In [150]:
predicted_rating = (hotel_similarity[0]*df.iloc[sim_hotel[0],3] + 
                    hotel_similarity[1]*df.iloc[sim_hotel[1],3]+
                   hotel_similarity[2]*df.iloc[sim_hotel[2],3]+
                    hotel_similarity[3]*df.iloc[sim_hotel[3],3]
                   +hotel_similarity[4]*df.iloc[sim_hotel[4],3]+
                   hotel_similarity[5]*df.iloc[sim_hotel[5],3]+
                   hotel_similarity[6]*df.iloc[sim_hotel[6],3]+
                   hotel_similarity[7]*df.iloc[sim_hotel[7],3]+
                   hotel_similarity[8]*df.iloc[sim_hotel[8],3])/sum(hotel_similarity)
print(predicted_rating)

2.5778721400711517


In [151]:
rmse = (predicted_rating - 0)/12527
rmse

0.00020578527501166695

# Get the top-N recommendations For User

In [93]:
def recommend_hotels(user, num_recommended_hotel):

  print('Hotel yang sudah diberikan rating oleh {} yaitu \n'.format(user))

  for m in df[df[user] > 0][user].index.tolist():
    print(m)
  
  print('\n')

  recommended_hotel = []

  for m in df[df[user] == 0].index.tolist():

    index_df = df.index.tolist().index(m)
    predicted_rating = df1.iloc[index_df, df1.columns.tolist().index(user)]
    recommended_hotel.append((m, predicted_rating))

  sorted_rm = sorted(recommended_hotel, key=lambda x:x[1], reverse=True)
  
  print('Hasil rekomendasinya yaitu \n')
  rank = 1
  for recommended_hotel in sorted_rm[:num_recommended_hotel]:
    
    print('{}: {} - dengan prediksi rating: {}'.format(rank, recommended_hotel[0], recommended_hotel[1]))
    rank = rank + 1

In [94]:
def hotels_recommender(user, num_neighbors, num_recommendation):
  
  number_neighbors = num_neighbors

  knn = NearestNeighbors(metric='cosine', algorithm='brute')
  knn.fit(df.values)
  distances, indices = knn.kneighbors(df.values, n_neighbors=number_neighbors)

  user_index = df.columns.tolist().index(user)

  for m,t in list(enumerate(df.index)):
    if df.iloc[m, user_index] == 0:
      sim_hotels = indices[m].tolist()
      hotels_distances = distances[m].tolist()
    
      if m in sim_hotels:
        id_hotels = sim_hotels.index(m)
        sim_hotels.remove(m)
        hotels_distances.pop(id_hotels) 

      else:
        sim_hotels = sim_hotels[:num_neighbors-1]
        hotels_distances = hotels_distances[:num_neighbors-1]
           
      hotels_similarity = [1-x for x in hotels_distances]
      hotels_similarity_copy = hotels_similarity.copy()
      nominator = 0

      for s in range(0, len(hotels_similarity)):
        if df.iloc[sim_hotels[s], user_index] == 0:
          if len(hotels_similarity_copy) == (number_neighbors - 1):
            hotels_similarity_copy.pop(s)
          
          else:
            hotels_similarity_copy.pop(s-(len(hotels_similarity)-len(hotels_similarity_copy)))
            
        else:
          nominator = nominator + hotels_similarity[s]*df.iloc[sim_hotels[s],user_index]
          
      if len(hotels_similarity_copy) > 0:
        if sum(hotels_similarity_copy) > 0:
          predicted_r = nominator/sum(hotels_similarity_copy)
        
        else:
          predicted_r = 0

      else:
        predicted_r = 0
        
      df1.iloc[m,user_index] = predicted_r
  recommend_hotels(user,num_recommendation)

In [95]:
hotels_recommender('Adela Silfi Pritissiya', 10, 10)

Hotel yang sudah diberikan rating oleh Adela Silfi Pritissiya yaitu 

Allstay Ecotel Yogyakarta
Atrium Premiere Hotel Yogyakarta Ambarukmo
Hotel Indah Palace Yogyakarta


Hasil rekomendasinya yaitu 

1: OYO 1319 88 Exclusive Guesthouse - dengan prediksi rating: 3.4
2: Omah Qu Guesthouse Malioboro - dengan prediksi rating: 3.4
3: ARTOTEL Suites Bianti - Yogyakarta - dengan prediksi rating: 3.2
4: Cavinton Hotel Yogyakarta by Tritama Hospitality - dengan prediksi rating: 3.2
5: Grove Hotel Yogyakarta - dengan prediksi rating: 3.2
6: Horison Ultima Riss Malioboro Yogyakarta - dengan prediksi rating: 3.2
7: Hotel Kristina Malioboro - dengan prediksi rating: 3.2
8: Hotel Sumaryo - dengan prediksi rating: 3.2
9: KJ Hotel Yogyakarta - dengan prediksi rating: 3.2
10: Malioboro Palace Hotel - dengan prediksi rating: 3.2


# Evaluation with Root Mean Square Error (RMSE)

In [96]:
distances, indices = knn.kneighbors(df.values, n_neighbors=3)

In [97]:
indices

array([[  0,  36,  61],
       [  1,  36,  52],
       [  2, 104,  36],
       [  3,   5,  60],
       [  4,   5,  36],
       [  5,   8,  59],
       [  6,  36,  15],
       [  7, 101,  22],
       [  8,   5,  41],
       [  9,  31,  36],
       [ 10,  86,  19],
       [ 11,  12, 103],
       [ 12,  60, 101],
       [ 13,  20,  70],
       [ 14, 102,  59],
       [ 15,   5,   6],
       [ 16, 102,  14],
       [ 17,  78,  12],
       [ 18,   5,  22],
       [ 19,  18,  88],
       [ 20,  13,  61],
       [ 21,  36,  40],
       [ 22,  36,  18],
       [ 23,   5,  41],
       [ 24,  59,  36],
       [ 25,  60,  16],
       [ 26, 104,  64],
       [ 27, 102,  12],
       [ 28,  59,  12],
       [ 29,   5,  36],
       [ 30,  60,  41],
       [ 31,  36,  59],
       [ 32,  37,  36],
       [ 33,   5, 100],
       [ 34,  84,  67],
       [ 35,   5,  58],
       [ 36,  80,  61],
       [ 37,  96,   5],
       [ 38,  22,   8],
       [ 39,   8,  73],
       [ 40,  12, 102],
       [ 41,  36

In [98]:
distances

array([[0.00000000e+00, 8.57143344e-01, 8.61105457e-01],
       [0.00000000e+00, 8.18407468e-01, 8.42045338e-01],
       [2.22044605e-16, 8.42093163e-01, 8.42219922e-01],
       [0.00000000e+00, 8.13897046e-01, 8.16537256e-01],
       [0.00000000e+00, 8.19161261e-01, 8.37968409e-01],
       [1.11022302e-16, 7.89028240e-01, 8.02324090e-01],
       [0.00000000e+00, 8.54896584e-01, 8.56962184e-01],
       [0.00000000e+00, 8.44505915e-01, 8.48751884e-01],
       [0.00000000e+00, 7.89028240e-01, 8.06766072e-01],
       [0.00000000e+00, 8.39067391e-01, 8.65763885e-01],
       [0.00000000e+00, 8.84273828e-01, 8.98088913e-01],
       [0.00000000e+00, 8.30688528e-01, 8.32608988e-01],
       [0.00000000e+00, 7.74516902e-01, 7.82355044e-01],
       [0.00000000e+00, 8.55938673e-01, 8.83356736e-01],
       [0.00000000e+00, 8.24268325e-01, 8.26317838e-01],
       [1.11022302e-16, 8.40481552e-01, 8.56962184e-01],
       [0.00000000e+00, 8.20852101e-01, 8.33244332e-01],
       [0.00000000e+00, 8.37648

In [99]:
#Rumus Distances=1-SimilarityCosine(A,B)
distances_hotel =1-distances
print(distances_hotel)

[[1.         0.14285666 0.13889454]
 [1.         0.18159253 0.15795466]
 [1.         0.15790684 0.15778008]
 [1.         0.18610295 0.18346274]
 [1.         0.18083874 0.16203159]
 [1.         0.21097176 0.19767591]
 [1.         0.14510342 0.14303782]
 [1.         0.15549409 0.15124812]
 [1.         0.21097176 0.19323393]
 [1.         0.16093261 0.13423611]
 [1.         0.11572617 0.10191109]
 [1.         0.16931147 0.16739101]
 [1.         0.2254831  0.21764496]
 [1.         0.14406133 0.11664326]
 [1.         0.17573167 0.17368216]
 [1.         0.15951845 0.14303782]
 [1.         0.1791479  0.16675567]
 [1.         0.16235197 0.14631377]
 [1.         0.18145156 0.16828168]
 [1.         0.16757159 0.15240879]
 [1.         0.14406133 0.14218302]
 [1.         0.16344479 0.15506687]
 [1.         0.1845055  0.16828168]
 [1.         0.14689322 0.14412334]
 [1.         0.11838548 0.11780352]
 [1.         0.12907396 0.12547038]
 [1.         0.19650441 0.15980044]
 [1.         0.15528272 0.15

In [100]:
np.array([np.abs(distances_hotel.T).sum(axis=1)]).T

array([[110.        ],
       [ 17.75968015],
       [ 16.74418793]])

In [101]:
distances_hotel.T.dot(df.values)

array([[74.8       ,  3.        ,  4.        , ...,  4.2       ,
         3.        ,  5.        ],
       [12.53848068,  0.34594169,  0.60923854, ...,  0.67591696,
         0.53918646,  0.53682857],
       [11.96023854,  0.34370734,  0.5676399 , ...,  0.56379168,
         0.53767572,  0.50080662]])

In [102]:
prediksi=distances_hotel.T.dot(df.values)/np.array([np.abs(distances_hotel.T).sum(axis=1)]).T
print(prediksi)

[[0.68       0.02727273 0.03636364 ... 0.03818182 0.02727273 0.04545455]
 [0.70600825 0.01947905 0.03430459 ... 0.03805907 0.03036014 0.03022738]
 [0.71429194 0.02052696 0.03390071 ... 0.03367089 0.03211119 0.02990928]]


In [103]:
ground_truth = df.T.values[distances_hotel.argsort()[0]]
ground_truth

array([[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 4. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 3. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. ,

In [104]:
from sklearn.metrics import mean_squared_error
from math import sqrt

In [105]:
def rmse(prediksi,ground_truth):
    prediksi=prediksi[ground_truth.nonzero()].flatten()
    ground_truth=ground_truth[ground_truth.nonzero()].flatten()
    return sqrt(mean_squared_error(prediksi,ground_truth))

In [106]:
error_rate=rmse(prediksi,ground_truth)
print("Accuracy: {:.3f}".format(100-error_rate))
print("RMSE: {:.5f}".format(error_rate))

Accuracy: 96.176
RMSE: 3.82449


In [107]:
def mae(prediksi,ground_truth):
    prediksi=prediksi[ground_truth.nonzero()].flatten()
    ground_truth=ground_truth[ground_truth.nonzero()].flatten()
    return mean_squared_error(prediksi,ground_truth)

In [108]:
error_rate=mae(prediksi,ground_truth)
print("Accuracy: {:.3f}".format(100-error_rate))
print("RMSE: {:.5f}".format(error_rate))

Accuracy: 85.373
RMSE: 14.62669
