In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import tensorflow as tf
import tensorflow_ranking as tfr
from sklearn.utils import shuffle

2022-06-01 08:51:02.435377: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-06-01 08:51:02.435418: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


# What is GAM

Interpretable Learning-to-Rank
Transparency and interpretability are important factors in deploying LTR models in ranking systems that can be involved in determining the outcomes of processes such as loan eligibility assessment, advertisement targeting, or guiding medical treatment decisions. In such cases, the contribution of each individual feature to the final ranking should be examinable and understandable to ensure transparency, accountability and fairness of the outcomes.

One possible way to achieve this is using generalized additive models (GAMs) — intrinsically interpretable machine learning models that are linearly composed of smooth functions of individual features. However, while GAMs have been extensively studied on regression and classification tasks, it is less clear how to apply them in a ranking setting. For instance, while GAMs can be straightforwardly applied to model each individual item in the list, modeling both item interactions and the context in which these items are ranked is a more challenging research problem. To this end, we have developed a neural ranking GAM — an extension of generalized additive models to ranking problems.

Unlike standard GAMs, a neural ranking GAM can take into account both the features of the ranked items and the context features (e.g., query or user profile) to derive an interpretable, compact model. This ensures that not only the contribution of each item-level feature is interpretable, but also the contribution of the context features. For example, in the figure below, using a neural ranking GAM makes visible how distance, price, and relevance, in the context of a given user device, contribute to the final ranking of the hotel. Neural ranking GAMs are now available as a part of TF-Ranking,

# Input
Items, Contexts (Features)

# Generalized Generative Model

Facilities + Prize + Distance 

# Output
Hotels Ranking

1. Understanding data (EDA)
2. Build Ranking Dataset
3. Build tfr.keras.layers.GAMLayer 


# Preprocessing

## Raw Data

In [3]:
df = pd.read_csv("hotels3.csv")


## Count Each Facilities and Acomodation

In [4]:
facilities_columns = ['Food and Drinks','Hotel Services','In-room Facilities', 'Business Facilities', 'Nearby Facilities', 'Public Facilities', 'General', 'Things to Do', 'Accessibilty', 'Connectivity', 'Transportation', 'Kids and Pets', 'Sports and Recreations', 'Shuttle Service']

facilities_columns.reverse()

for index , row in df.iterrows():
    
    # split per fasil and akomod
    arr = row['Facil + Akomod'].splitlines() 
    #iterate over fasil and akomod

    i = 0
    count = 0

    for  item in reversed(arr):
      count += 1
      if item in facilities_columns:
        df.at[index,item ] = count
        count = 0
        i += 1

In [5]:
df = df.fillna(0)

In [6]:
for index , row in df.iterrows():
    
    # split per fasil and akomod
    arr = row['Places Nearby'].splitlines() 
    
    #iterate over fasil and akomod

    i = 0
    count = 0
    
    for ind, item in enumerate(arr):
        itemsplits = item.split()
        for x in itemsplits:
            if x.isdigit():
                if itemsplits[1] == "km":
                    meters = itemsplits[0] * 1000
                else:
                    meters = itemsplits[0]
                df.at[index,arr[ind-1]] = meters

In [18]:
print(df.columns)

Index(['Hotel', 'Star', 'Rating', 'Reviews', 'Harga', 'Places Nearby',
       'Facil + Akomod', 'Shuttle Service', 'Sports and Recreations',
       'Kids and Pets', 'Transportation', 'Connectivity', 'Accessibilty',
       'Things to Do', 'General', 'Public Facilities', 'Nearby Facilities',
       'Business Facilities', 'In-room Facilities', 'Hotel Services',
       'Food and Drinks', 'Fast Food', 'Shop & Gifts', 'Business',
       'Transportation Hub', '', 'Casual Dining', 'Nightlife', 'Park & Zoo',
       'Public Service', 'Arts & Sciences', 'Fine Dining', 'Sport',
       'Quick Bites', 'Education', 'Street Food', 'Activity & Games', 'Cafe',
       'Entertainment', '359 m', 'Food Court', '32 m', '1.16 km',
       'Sight & Landmark', '214 m'],
      dtype='object')


In [8]:


df.Harga = df['Harga'].str.replace('.','', regex = True)
df.Harga = df['Harga'].str.replace(',','.', regex = True)
df.Harga = df['Harga'].astype(float).astype(int)


df.Reviews = df['Reviews'].str.replace('.','', regex = True)
df.Reviews = df['Reviews'].str.replace(',','.', regex = True)
df.Reviews = df['Reviews'].astype(float).astype(int)

In [9]:
df.dtypes

Hotel                      object
Star                      float64
Rating                    float64
Reviews                     int64
Harga                       int64
Places Nearby              object
Facil + Akomod             object
Shuttle Service           float64
Sports and Recreations    float64
Kids and Pets             float64
Transportation            float64
Connectivity              float64
Accessibilty              float64
Things to Do              float64
General                   float64
Public Facilities         float64
Nearby Facilities         float64
Business Facilities       float64
In-room Facilities        float64
Hotel Services            float64
Food and Drinks           float64
Fast Food                  object
Shop & Gifts               object
Business                   object
Transportation Hub         object
                           object
Casual Dining              object
Nightlife                  object
Park & Zoo                 object
Public Service

In [10]:
df.describe()

Unnamed: 0,Star,Rating,Reviews,Harga,Shuttle Service,Sports and Recreations,Kids and Pets,Transportation,Connectivity,Accessibilty,Things to Do,General,Public Facilities,Nearby Facilities,Business Facilities,In-room Facilities,Hotel Services,Food and Drinks
count,89.0,89.0,89.0,89.0,89.0,89.0,89.0,89.0,89.0,89.0,89.0,89.0,89.0,89.0,89.0,89.0,89.0,89.0
mean,4.168539,8.555056,2939.516854,1597875.0,1.741573,2.651685,1.224719,4.269663,3.808989,3.707865,6.314607,8.202247,11.044944,6.168539,6.404494,9.157303,11.617978,10.955056
std,0.828964,0.249094,3582.740905,1606771.0,1.695771,2.751451,1.952579,2.941722,1.870078,2.962655,3.854146,2.598862,3.10392,3.064592,3.121477,3.893173,4.698901,8.812324
min,0.0,7.8,1.0,382400.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,3.0,0.0
25%,4.0,8.5,9.0,675000.0,0.0,0.0,0.0,3.0,3.0,0.0,4.0,6.0,9.0,4.0,4.0,8.0,8.0,4.0
50%,4.0,8.6,1755.0,847000.0,2.0,3.0,0.0,4.0,4.0,4.0,6.0,8.0,12.0,7.0,6.0,10.0,11.0,7.0
75%,5.0,8.7,4678.0,1757813.0,3.0,4.0,3.0,6.0,4.0,6.0,8.0,10.0,13.0,9.0,9.0,12.0,15.0,16.0
max,5.0,9.0,18892.0,7327822.0,10.0,11.0,9.0,13.0,10.0,10.0,18.0,13.0,17.0,11.0,13.0,16.0,24.0,37.0


In [11]:
df

Unnamed: 0,Hotel,Star,Rating,Reviews,Harga,Places Nearby,Facil + Akomod,Shuttle Service,Sports and Recreations,Kids and Pets,...,Street Food,Activity & Games,Cafe,Entertainment,359 m,Food Court,32 m,1.16 km,Sight & Landmark,214 m
0,Hotel Indonesia Kempinski Jakarta,5.0,8.9,4363,2480500,Nearby Places\n\nJia Jia - Grand Indonesia (De...,Food and Drinks\nA la carte breakfast\nA la ca...,2.0,3.0,4.0,...,,,,,,,,,,
1,"The Langham, Jakarta",5.0,8.8,238,3823600,Nearby Places\n\nPig Me Up! - Ashta District 8...,Food and Drinks\nA la carte dinner\nA la carte...,0.0,2.0,3.0,...,,,,,,,,,,
2,Manhattan Hotel,5.0,8.5,8405,6231500,Nearby Places\n\nJia Jia - Grand Indonesia (De...,Hotel Services\nBellhop\nConcierge\nMoney chan...,2.0,3.0,3.0,...,,,,,,,,,,
3,Aloft South Jakarta,4.0,8.9,424,762300,Nearby Places\n\nSouthside Rooftop Bar & Loung...,Public Facilities\nParking\nCoffee shop\nEleva...,0.0,2.0,0.0,...,,,,,,,,,,
4,"The Mayflower, Jakarta - Marriott Executive Ap...",5.0,9.0,393,1756254,Nearby Places\n\nSudirman Plaza\nBusiness\n2 m...,Food and Drinks\nA la carte breakfast\nA la ca...,2.0,3.0,7.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84,POP! Hotel Airport Jakarta,2.0,7.9,11026,385200,Nearby Places\r\n\r\nsTREATs Restaurant - Ibis...,Hotel Services\r\nBellhop\r\n24-hour security\...,2.0,0.0,0.0,...,,,,,,,,,,
85,Sheraton Grand Jakarta Gandaria City Hotel,5.0,8.9,1511,2420000,Nearby Places\r\n\r\nAnigre at Sheraton Gandar...,Food and Drinks\r\nA la carte breakfast\r\nA l...,2.0,0.0,0.0,...,,,,,,,,,,
86,Horison Suites & Residences Rasuna Jakarta,4.0,7.8,517,688000,Nearby Places\r\n\r\nMeZZa Restaurant at Aston...,General\r\nAC\r\nBallroom\r\nBanquet\r\nFamily...,0.0,0.0,0.0,...,,,,,,,,,,
87,grandkemang Hotel,4.0,8.4,2717,431250,Nearby Places\r\n\r\nSparca Lounge at grandkem...,Food and Drinks\r\nA la carte dinner\r\nA la c...,2.0,3.0,0.0,...,,,,,,,,,,


In [12]:
training = df.iloc[:20,:]
evaluation = shuffle(df.iloc[20:,:])

# Understanding the Data (EDA)

# Build Ranking Dataset

In [13]:
def parsingfn(a):
    return a['Star','Rating','Reviews','Harga']


training = tfr.data.build_ranking_dataset_with_parsing_fn(
    training,
    parsingfn,
    10,
    reader=tfr.keras.pipeline.DatasetHparams.dataset_reader,
    reader_args=None,
    num_epochs=None,
    shuffle=True,
    shuffle_buffer_size=10000,
    shuffle_seed=None,
    prefetch_buffer_size=tf.data.experimental.AUTOTUNE,
    reader_num_threads=tf.data.experimental.AUTOTUNE,
    sloppy_ordering=False,
    drop_final_batch=False,
    num_parser_threads=tf.data.experimental.AUTOTUNE
)

2022-06-01 08:51:04.642913: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-06-01 08:51:04.642961: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-06-01 08:51:04.642990: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (gusanwa): /proc/driver/nvidia/version does not exist
2022-06-01 08:51:04.643570: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).

training = tfr.data.build_ranking_dataset_with_parsing_fn(
    training,
    parsingfn,
    10,
    reader=tfr.keras.pipeline.DatasetHparams.dataset_reader,
    reader_args=None,
    num_epochs=None,
    shuffle=True,
    shuffle_buffer_size=10000,
    shuffle_seed=None,
    prefetch_buffer_size=tf.data.experimental.AUTOTUNE,
    reader_num_threads=tf.data.experimental.AUTOTUNE,
    sloppy_ordering=False,
    drop_final_batch=False,
    num_parser_threads=tf.data.experimental.AUTOTUNE
)

# Build tfr.keras.layers.GAMLayer