In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# What is GAM

Interpretable Learning-to-Rank
Transparency and interpretability are important factors in deploying LTR models in ranking systems that can be involved in determining the outcomes of processes such as loan eligibility assessment, advertisement targeting, or guiding medical treatment decisions. In such cases, the contribution of each individual feature to the final ranking should be examinable and understandable to ensure transparency, accountability and fairness of the outcomes.

One possible way to achieve this is using generalized additive models (GAMs) — intrinsically interpretable machine learning models that are linearly composed of smooth functions of individual features. However, while GAMs have been extensively studied on regression and classification tasks, it is less clear how to apply them in a ranking setting. For instance, while GAMs can be straightforwardly applied to model each individual item in the list, modeling both item interactions and the context in which these items are ranked is a more challenging research problem. To this end, we have developed a neural ranking GAM — an extension of generalized additive models to ranking problems.

Unlike standard GAMs, a neural ranking GAM can take into account both the features of the ranked items and the context features (e.g., query or user profile) to derive an interpretable, compact model. This ensures that not only the contribution of each item-level feature is interpretable, but also the contribution of the context features. For example, in the figure below, using a neural ranking GAM makes visible how distance, price, and relevance, in the context of a given user device, contribute to the final ranking of the hotel. Neural ranking GAMs are now available as a part of TF-Ranking,

# Input
Items, Contexts (Features)

# Generalized Generative Model

Facilities + Prize + Distance 

# Output
Hotels RankingIn-room Facilities
Cable TV
Desk
In-room safe
Refrigerator
Shower
TV
￼
Hotel Services
Bellhop
Concierge
Front desk
24-hour Receptionist
24-hour security
Luggage storage
￼
General
AC
Banquet
Non-smoking room
Swimming pool
Terrace
￼
Business Facilities
Business center
Meeting facilities
Photocopier
Business centre
￼
Things to Do
Children play area
Fitness center
Sauna
￼
Public Facilities
Parking
Safety deposit box
WiFi in public area
￼
Nearby Facilities
ATM/Banking
Supermarket
￼
Sports and Recreations
Fitness center
Table tennis
￼
Family-friendly Facilities
Children pool
￼
Transportation
Secure parking
￼
Connectivity
Free WiFi

1. Understanding data (EDA)
2. Build Ranking Dataset
3. Build tfr.keras.layers.GAMLayer 


# Preprocessing

## Raw Data

In [2]:
df = pd.read_csv("hotels.csv")
df

Unnamed: 0,Hotel,Star,Rating,Reviews,Harga,Places Nearby,Facil + Akomod
0,Hotel Indonesia Kempinski Jakarta,5.0,8.9,4363,2.480.500,Nearby Places\n\nJia Jia - Grand Indonesia (De...,Food and Drinks\nA la carte breakfast\nA la ca...
1,"The Langham, Jakarta",5.0,8.8,238,3.823.600,Nearby Places\n\nPig Me Up! - Ashta District 8...,Food and Drinks\nA la carte dinner\nA la carte...
2,Manhattan Hotel,5.0,8.5,8405,6.231.500,Nearby Places\n\nJia Jia - Grand Indonesia (De...,Hotel Services\nBellhop\nConcierge\nMoney chan...
3,Aloft South Jakarta,4.0,8.9,424,762.300,Nearby Places\n\nSouthside Rooftop Bar & Loung...,Public Facilities\nParking\nCoffee shop\nEleva...
4,"The Mayflower, Jakarta - Marriott Executive Ap...",5.0,9.0,393,1.756.254,Nearby Places\n\nSudirman Plaza\nBusiness\n2 m...,Food and Drinks\nA la carte breakfast\nA la ca...
...,...,...,...,...,...,...,...
84,POP! Hotel Airport Jakarta,2.0,7.9,11026,"385.200,00",Nearby Places\r\n\r\nsTREATs Restaurant - Ibis...,Hotel Services\r\nBellhop\r\n24-hour security\...
85,Sheraton Grand Jakarta Gandaria City Hotel,5.0,8.9,1511,"2.420.000,00",Nearby Places\r\n\r\nAnigre at Sheraton Gandar...,Food and Drinks\r\nA la carte breakfast\r\nA l...
86,Horison Suites & Residences Rasuna Jakarta,4.0,7.8,517,"688.000,00",Nearby Places\r\n\r\nMeZZa Restaurant at Aston...,General\r\nAC\r\nBallroom\r\nBanquet\r\nFamily...
87,grandkemang Hotel,4.0,8.4,2717,"431.250,00",Nearby Places\r\n\r\nSparca Lounge at grandkem...,Food and Drinks\r\nA la carte dinner\r\nA la c...


## Count Each Facilities and Acomodation

In [3]:
facilities_columns = ['Food and Drinks','Hotel Services','In-room Facilities', 'Business Facilities', 'Nearby Facilities', 'Public Facilities', 'General', 'Things to Do', 'Accessibilty', 'Connectivity', 'Transportation', 'Kids and Pets', 'Sports and Recreations', 'Shuttle Service']

facilities_columns.reverse()

for index , row in df.iterrows():
    
    # split per fasil and akomod
    arr = row['Facil + Akomod'].splitlines() 
    
    #iterate over fasil and akomod

    i = 0
    count = 0

    for  item in reversed(arr):
      count += 1
      if item in facilities_columns:
        df.at[index,item ] = count
        count = 0
        i += 1

In [4]:
df

Unnamed: 0,Hotel,Star,Rating,Reviews,Harga,Places Nearby,Facil + Akomod,Shuttle Service,Sports and Recreations,Kids and Pets,...,Connectivity,Accessibilty,Things to Do,General,Public Facilities,Nearby Facilities,Business Facilities,In-room Facilities,Hotel Services,Food and Drinks
0,Hotel Indonesia Kempinski Jakarta,5.0,8.9,4363,2.480.500,Nearby Places\n\nJia Jia - Grand Indonesia (De...,Food and Drinks\nA la carte breakfast\nA la ca...,2.0,3.0,4.0,...,5.0,6.0,8.0,9.0,10.0,10.0,10.0,12.0,14.0,26.0
1,"The Langham, Jakarta",5.0,8.8,238,3.823.600,Nearby Places\n\nPig Me Up! - Ashta District 8...,Food and Drinks\nA la carte dinner\nA la carte...,,2.0,3.0,...,3.0,,10.0,7.0,11.0,8.0,7.0,13.0,12.0,14.0
2,Manhattan Hotel,5.0,8.5,8405,6.231.500,Nearby Places\n\nJia Jia - Grand Indonesia (De...,Hotel Services\nBellhop\nConcierge\nMoney chan...,2.0,3.0,3.0,...,6.0,8.0,4.0,10.0,16.0,10.0,12.0,13.0,20.0,16.0
3,Aloft South Jakarta,4.0,8.9,424,762.300,Nearby Places\n\nSouthside Rooftop Bar & Loung...,Public Facilities\nParking\nCoffee shop\nEleva...,,2.0,,...,,3.0,5.0,,9.0,,3.0,,5.0,5.0
4,"The Mayflower, Jakarta - Marriott Executive Ap...",5.0,9.0,393,1.756.254,Nearby Places\n\nSudirman Plaza\nBusiness\n2 m...,Food and Drinks\nA la carte breakfast\nA la ca...,2.0,3.0,7.0,...,3.0,,12.0,8.0,13.0,6.0,4.0,16.0,15.0,19.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84,POP! Hotel Airport Jakarta,2.0,7.9,11026,"385.200,00",Nearby Places\r\n\r\nsTREATs Restaurant - Ibis...,Hotel Services\r\nBellhop\r\n24-hour security\...,2.0,,,...,3.0,3.0,3.0,5.0,6.0,7.0,6.0,7.0,8.0,
85,Sheraton Grand Jakarta Gandaria City Hotel,5.0,8.9,1511,"2.420.000,00",Nearby Places\r\n\r\nAnigre at Sheraton Gandar...,Food and Drinks\r\nA la carte breakfast\r\nA l...,2.0,,,...,3.0,8.0,3.0,9.0,16.0,10.0,11.0,12.0,16.0,25.0
86,Horison Suites & Residences Rasuna Jakarta,4.0,7.8,517,"688.000,00",Nearby Places\r\n\r\nMeZZa Restaurant at Aston...,General\r\nAC\r\nBallroom\r\nBanquet\r\nFamily...,,,,...,3.0,,,7.0,6.0,,4.0,6.0,5.0,4.0
87,grandkemang Hotel,4.0,8.4,2717,"431.250,00",Nearby Places\r\n\r\nSparca Lounge at grandkem...,Food and Drinks\r\nA la carte dinner\r\nA la c...,2.0,3.0,,...,4.0,3.0,4.0,8.0,12.0,7.0,6.0,8.0,14.0,14.0


In [5]:
for index , row in df.iterrows():
    
    # split per fasil and akomod
    arr = row['Places Nearby'].splitlines() 
    
    #iterate over fasil and akomod

    i = 0
    count = 0
    
    for ind, item in enumerate(arr):
        itemsplits = item.split()
        for x in itemsplits:
            if x.isdigit():
                df.at[index,arr[ind-1]] = item

In [6]:
df

Unnamed: 0,Hotel,Star,Rating,Reviews,Harga,Places Nearby,Facil + Akomod,Shuttle Service,Sports and Recreations,Kids and Pets,...,Street Food,Activity & Games,Cafe,Entertainment,359 m,Food Court,32 m,1.16 km,Sight & Landmark,214 m
0,Hotel Indonesia Kempinski Jakarta,5.0,8.9,4363,2.480.500,Nearby Places\n\nJia Jia - Grand Indonesia (De...,Food and Drinks\nA la carte breakfast\nA la ca...,2.0,3.0,4.0,...,,,,,,,,,,
1,"The Langham, Jakarta",5.0,8.8,238,3.823.600,Nearby Places\n\nPig Me Up! - Ashta District 8...,Food and Drinks\nA la carte dinner\nA la carte...,,2.0,3.0,...,,,,,,,,,,
2,Manhattan Hotel,5.0,8.5,8405,6.231.500,Nearby Places\n\nJia Jia - Grand Indonesia (De...,Hotel Services\nBellhop\nConcierge\nMoney chan...,2.0,3.0,3.0,...,,,,,,,,,,
3,Aloft South Jakarta,4.0,8.9,424,762.300,Nearby Places\n\nSouthside Rooftop Bar & Loung...,Public Facilities\nParking\nCoffee shop\nEleva...,,2.0,,...,,,,,,,,,,
4,"The Mayflower, Jakarta - Marriott Executive Ap...",5.0,9.0,393,1.756.254,Nearby Places\n\nSudirman Plaza\nBusiness\n2 m...,Food and Drinks\nA la carte breakfast\nA la ca...,2.0,3.0,7.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84,POP! Hotel Airport Jakarta,2.0,7.9,11026,"385.200,00",Nearby Places\r\n\r\nsTREATs Restaurant - Ibis...,Hotel Services\r\nBellhop\r\n24-hour security\...,2.0,,,...,,,,,,,,,,
85,Sheraton Grand Jakarta Gandaria City Hotel,5.0,8.9,1511,"2.420.000,00",Nearby Places\r\n\r\nAnigre at Sheraton Gandar...,Food and Drinks\r\nA la carte breakfast\r\nA l...,2.0,,,...,,,,,,,,,,
86,Horison Suites & Residences Rasuna Jakarta,4.0,7.8,517,"688.000,00",Nearby Places\r\n\r\nMeZZa Restaurant at Aston...,General\r\nAC\r\nBallroom\r\nBanquet\r\nFamily...,,,,...,,,,,,,,,,
87,grandkemang Hotel,4.0,8.4,2717,"431.250,00",Nearby Places\r\n\r\nSparca Lounge at grandkem...,Food and Drinks\r\nA la carte dinner\r\nA la c...,2.0,3.0,,...,,,,,,,,,,


# Understanding the Data (EDA)

# Build Ranking Dataset

# Build tfr.keras.layers.GAMLayer