##### Copyright 2021 The TensorFlow Authors.

In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Recommend movies for users with TensorFlow Ranking

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://www.tensorflow.org/ranking/tutorials/quickstart"><img src="https://www.tensorflow.org/images/tf_logo_32px.png" />View on TensorFlow.org</a>
  </td>
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/ranking/blob/master/docs/tutorials/quickstart.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/tensorflow/ranking/blob/master/docs/tutorials/quickstart.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
  <td>
    <a href="https://storage.googleapis.com/tensorflow_docs/ranking/docs/tutorials/quickstart.ipynb"><img src="https://www.tensorflow.org/images/download_logo_32px.png" />Download notebook</a>
  </td>
</table>

In this tutorial, we build a simple two tower ranking model using the [MovieLens 100K dataset](https://grouplens.org/datasets/movielens/100k/) with TF-Ranking. We can use this model to rank and recommend movies for a given user according to their predicted user ratings.

## Setup

Install and import the TF-Ranking library:

In [None]:
!pip install -q tensorflow-ranking
# !pip install -q --upgrade tensorflow-datasets
!pip install -q tensorflow-recommenders
# !pip install -q tf-nightly

[K     |████████████████████████████████| 141 kB 5.1 MB/s 
[K     |████████████████████████████████| 511.7 MB 4.8 kB/s 
[K     |████████████████████████████████| 438 kB 40.2 MB/s 
[K     |████████████████████████████████| 5.8 MB 42.8 MB/s 
[K     |████████████████████████████████| 1.6 MB 36.6 MB/s 
[K     |████████████████████████████████| 4.3 MB 5.1 MB/s 
[K     |████████████████████████████████| 98 kB 7.0 MB/s 
[K     |████████████████████████████████| 85 kB 2.8 MB/s 
[?25h

In [None]:
from typing import Dict, Tuple

import tensorflow as tf

import tensorflow_ranking as tfr
import tensorflow_recommenders as tfrs

In [None]:
print("TF ", tf.__version__)
print("TFR ", tfr.__version__)

TF  2.9.1
TFR  0.5.0.dev
tfds  4.6.0


## Read the data

Prepare to train a model by creating a ratings dataset and movies dataset. Use `user_id` as the query input feature, `movie_title` as the document input feature, and `user_rating` as the label to train the ranking model.

In [None]:
# %%capture --no-display
# # Ratings data.
# ratings = tfds.load('movielens/100k-ratings', split="train")
# # Features of all the available movies.
# movies = tfds.load('movielens/100k-movies', split="train")

# from google.colab import drive
# drive.mount('/content/drive')

# %cd 'drive/MyDrive/Company-Based Capstone/ML'

Mounted at /content/drive
/content/drive/.shortcut-targets-by-id/1K1cEMf4s0TjuhlSZB6hFFrZv1OmrI8LI/Company-Based Capstone/ML


In [None]:
import pandas as pd

convert_dtype = {
    'hotel_id': object,
    'user_id': object
}

reviews_df = pd.read_csv('./data/combine-review.csv', dtype=convert_dtype)
hotels_df = pd.read_csv('./data/hotels.csv',encoding='windows-1254')
reviews_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8054 entries, 0 to 8053
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   user_id         8054 non-null   object 
 1   hotel_id        8054 non-null   object 
 2   name            8054 non-null   object 
 3   rating          8054 non-null   float64
 4   review          8054 non-null   object 
 5   travel_purpose  8054 non-null   object 
 6   gender          8054 non-null   int64  
 7   desktop         8054 non-null   int64  
dtypes: float64(1), int64(2), object(5)
memory usage: 503.5+ KB


In [None]:
hotels_df['id'] = hotels_df['id'].astype(str)
hotels_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 256 entries, 0 to 255
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   id            256 non-null    object 
 1   hotel_name    256 non-null    object 
 2   hotel_city    256 non-null    object 
 3   hotel_price   256 non-null    object 
 4   hotel_type    256 non-null    object 
 5   hotel_rating  256 non-null    float64
 6   total_review  256 non-null    int64  
dtypes: float64(1), int64(1), object(5)
memory usage: 14.1+ KB


In [None]:
reviews_df["rating"] = pd.to_numeric(reviews_df["rating"], errors='coerce')
reviews_df = reviews_df.dropna()

In [None]:
reviews_df = reviews_df[reviews_df['hotel_id'] != 'hotel_id']

In [None]:
hotels = tf.data.Dataset.from_tensor_slices(dict(hotels_df))
reviews = tf.data.Dataset.from_tensor_slices(dict(reviews_df))

In [None]:
import pprint

for x in reviews.take(1).as_numpy_iterator():
  pprint.pprint(x)

{'desktop': 1,
 'gender': 1,
 'hotel_id': b'3000010039024',
 'name': b'Mochamad B. N.',
 'rating': 8.3,
 'review': b'Bad Services, Bad cleanliness, Actual Condition too far from Pho'
           b'tos. Not recommended.',
 'travel_purpose': b'Holiday',
 'user_id': b'125987653'}


In [None]:
reviews = reviews.map(lambda x: {
    "hotel_id": x["hotel_id"],
    "travel_purpose": x["travel_purpose"],
    "gender": x["gender"],
    "desktop": x["desktop"],
    "rating": x["rating"],
})

Build vocabularies to convert all user ids and all movie titles into integer indices for embedding layers:

In [None]:
hotels = reviews.map(lambda x: x["hotel_id"])
travels = reviews.map(lambda x: x["travel_purpose"])
# zips = reviews.map(lambda x: x["user_zip_code"])

travel_vocabulary = tf.keras.layers.StringLookup(
    mask_token=None)
travel_vocabulary.adapt(travels.batch(256))

hotels_vocabulary = tf.keras.layers.StringLookup(
    mask_token=None)
hotels_vocabulary.adapt(hotels.batch(16))

In [None]:
travel_vocabulary('Staycation')

<tf.Tensor: shape=(), dtype=int64, numpy=3>

In [None]:
import numpy as np

hotel_id = reviews.batch(1000).map(lambda x: x["hotel_id"])
travel_purpose = reviews.batch(1000).map(lambda x: x["travel_purpose"])
gender = reviews.batch(1000).map(lambda x: x["gender"])
desktop = reviews.batch(1000).map(lambda x: x["desktop"])

unique_hotel_id = np.unique(np.concatenate(list(hotel_id)))
unique_travel_purpose = np.unique(np.concatenate(list(travel_purpose)))
unique_gender = np.unique(np.concatenate(list(gender)))
unique_desktop = np.unique(np.concatenate(list(desktop)))


In [None]:
unique_travel_purpose

array([b'Backpacking', b'Business', b'Business travel',
       b'Culture Sightseeings', b'Family vacation', b'Holiday',
       b'Leisure', b'Medical', b'Medical Travel', b'Romantic Vacation',
       b'Shopping & Culinary', b'Staycation', b'Transit'], dtype=object)

Group by `user_id` to form lists for ranking models:


In [None]:
key_func = lambda x: travel_vocabulary(x["travel_purpose"])
reduce_func = lambda key, dataset: dataset.batch(100)
ds_train = reviews.group_by_window(
    key_func=key_func, reduce_func=reduce_func, window_size=100)

In [None]:
for x in ds_train.take(1):
  for key, value in x.items():
    print(f"Shape of {key}: {value.shape}")
    print(f"Example values of {key}: {value[:5].numpy()}")
    print()

Shape of hotel_id: (100,)
Example values of hotel_id: [b'3000010039024' b'3000010039024' b'3000010039024' b'3000010039024'
 b'3000010039024']

Shape of travel_purpose: (100,)
Example values of travel_purpose: [b'Family vacation' b'Family vacation' b'Family vacation'
 b'Family vacation' b'Family vacation']

Shape of gender: (100,)
Example values of gender: [1 1 1 1 0]

Shape of desktop: (100,)
Example values of desktop: [1 0 0 0 1]

Shape of rating: (100,)
Example values of rating: [6.6 5.3 8.1 3.6 9.7]



Generate batched features and labels:

In [None]:
def _features_and_labels(
    x: Dict[str, tf.Tensor]) -> Tuple[Dict[str, tf.Tensor], tf.Tensor]:
  labels = x.pop("rating")
  return x, labels


ds_train = ds_train.map(_features_and_labels)

ds_train = ds_train.apply(
    tf.data.experimental.dense_to_ragged_batch(batch_size=32))

The `user_id` and `movie_title` tensors generated in `ds_train` are of shape `[32, None]`, where the second dimension is 100 in most cases except for the batches when less than 100 items grouped in lists. A model working on ragged tensors is thus used.

In [None]:
for x, label in ds_train.take(1):
  for key, value in x.items():
    print(f"Shape of {key}: {value.shape}")
    print(f"Example values of {key}: {value[:3, :3].numpy()}")
    print()
  print(f"Shape of label: {label.shape}")
  print(f"Example values of label: {label[:3, :3].numpy()}")

Shape of hotel_id: (32, None)
Example values of hotel_id: [[b'3000010039024' b'3000010039024' b'3000010039024']
 [b'4843' b'4843' b'4843']
 [b'3000010039024' b'3000010039024' b'3000010039024']]

Shape of travel_purpose: (32, None)
Example values of travel_purpose: [[b'Family vacation' b'Family vacation' b'Family vacation']
 [b'Family vacation' b'Family vacation' b'Family vacation']
 [b'Business travel' b'Business travel' b'Business travel']]

Shape of gender: (32, None)
Example values of gender: [[1 1 1]
 [1 1 1]
 [0 0 1]]

Shape of desktop: (32, None)
Example values of desktop: [[1 0 0]
 [0 1 1]
 [1 1 0]]

Shape of label: (32, None)
Example values of label: [[6.6 5.3 8.1]
 [4.  8.6 4. ]
 [4.  9.9 8.4]]


## Define a model

Define a ranking model by inheriting from `tf.keras.Model` and implementing the `call` method:

In [None]:
class HotelRankingModel(tf.keras.Model):

  def __init__(self):
    super().__init__()

    # # Set up user and movie vocabulary and embedding.
    # self.travel_vocab = travel_vocab
    # self.hotel_vocab = hotel_vocab
    # self.travel_embed = tf.keras.layers.Embedding(
    #       travel_vocab.vocabulary_size(), 128
    #     )
    # self.hotel_embed = tf.keras.layers.Embedding(
    #       hotel_vocab.vocabulary_size(), 128
    #     )
    # self.gender_embed = tf.keras.layers.Embedding(2, 16)
    # self.device_embed = tf.keras.layers.Embedding(2, 16)

    # Compute embeddings for hotel id.
    self.hotel_embeddings = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_hotel_id, mask_token=None),
      tf.keras.layers.Embedding(len(unique_hotel_id) + 1, 128),
      tf.keras.layers.Dense(256, activation="tanh"),
    ])

    # Compute embeddings for travel purpose.
    self.travel_embeddings = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_travel_purpose, mask_token=None),
      tf.keras.layers.Embedding(len(unique_travel_purpose) + 1, 128),
      tf.keras.layers.Dense(256, activation="tanh"),
    ])

    # Compute embeddings for gender purpose.
    self.gender_embeddings = tf.keras.Sequential([
      tf.keras.layers.Embedding(2, 128),
      tf.keras.layers.Dense(256, activation="tanh"),
    ])

    # Compute embeddings for device purpose.
    self.device_embeddings = tf.keras.Sequential([
      tf.keras.layers.Embedding(2, 128),
      tf.keras.layers.Dense(256, activation="tanh"),
    ])

  def call(self, features: Dict[str, tf.Tensor]) -> tf.Tensor:
    # Define how the ranking scores are computed: 
    # Take the dot-product of the user embeddings with the movie embeddings.

    travel_embed = self.travel_embeddings(features["travel_purpose"])
    hotel_embed = self.hotel_embeddings(features["hotel_id"])
    gender_embed = self.gender_embeddings(features["gender"])
    device_embed = self.device_embeddings(features["desktop"])

    return tf.reduce_sum(travel_embed * gender_embed * device_embed * hotel_embed,  axis=2)

Create the model, and then compile it with ranking `tfr.keras.losses` and `tfr.keras.metrics`, which are the core of the TF-Ranking package. 

This example uses a ranking-specific **softmax loss**, which is a listwise loss introduced to promote all relevant items in the ranking list with better chances on top of the irrelevant ones. In contrast to the softmax loss in the multi-class classification problem, where only one class is positive and the rest are negative, the TF-Ranking library supports multiple relevant documents in a query list and non-binary relevance labels.

For ranking metrics, this example uses in specific **Normalized Discounted Cumulative Gain (NDCG)** and **Mean Reciprocal Rank (MRR)**, which calculate the user utility of a ranked query list with position discounts. For more details about ranking metrics, review evaluation measures [offline metrics](https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Offline_metrics).

In [None]:
# Create the ranking model, trained with a ranking loss and evaluated with
# ranking metrics.
model = HotelRankingModel()
optimizer = tf.keras.optimizers.Adam(0.0018)
loss = tfr.keras.losses.get(
    loss=tfr.keras.losses.RankingLossKey.SOFTMAX_LOSS, ragged=True)
eval_metrics = [
    tfr.keras.metrics.get(key="ndcg", name="metric/ndcg", ragged=True),
    tfr.keras.metrics.get(key="mrr", name="metric/mrr", ragged=True)
]
model.compile(optimizer=optimizer, loss=loss, metrics=eval_metrics)

## Train and evaluate the model

Train the model with `model.fit`.

In [None]:
model.fit(ds_train, epochs=50)

Epoch 1/50


  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7fd4b10d1f90>

In [None]:
model.summary()

Model: "hotel_ranking_model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_20 (Sequential)  (None, None, 256)         62080     
                                                                 
 sequential_21 (Sequential)  (None, None, 256)         34816     
                                                                 
 sequential_22 (Sequential)  (None, None, 256)         33280     
                                                                 
 sequential_23 (Sequential)  (None, None, 256)         33280     
                                                                 
Total params: 163,456
Trainable params: 163,456
Non-trainable params: 0
_________________________________________________________________


Generate predictions and evaluate.

In [None]:


# Generate the input for user 42.
inputs = {
    "travel_purpose":
        tf.expand_dims(tf.repeat("Staycation", repeats=unique_hotel_id.shape[0]), axis=0),
    "gender":
        tf.expand_dims(tf.repeat(1, repeats=unique_hotel_id.shape[0]), axis=0),
    "desktop":
        tf.expand_dims(tf.repeat(1, repeats=unique_hotel_id.shape[0]), axis=0),
    "hotel_id":
        tf.expand_dims(unique_hotel_id, axis=0)
}

# Get movie recommendations for user 42.
scores = model(inputs)
titles = tfr.utils.sort_by_scores(scores,
                                  [tf.expand_dims(unique_hotel_id, axis=0)])[0]
print(f"Top 5 recommendations for user 42: {titles[0, :5]}")

Top 5 recommendations for user 42: [b'3000010002158' b'49722' b'239433' b'62310' b'305023']


In [None]:
scores[0, :5]

<tf.Tensor: shape=(5,), dtype=float32, numpy=
array([-0.07833742, -0.14340425, -0.39685816,  0.24377899, -0.521583  ],
      dtype=float32)>

In [None]:


# Generate the input for user 42.
inputs = {
    "travel_purpose":
        tf.expand_dims(tf.repeat("Backpacker", repeats=unique_hotel_id.shape[0]), axis=0),
    "gender":
        tf.expand_dims(tf.repeat(1, repeats=unique_hotel_id.shape[0]), axis=0),
    "desktop":
        tf.expand_dims(tf.repeat(0, repeats=unique_hotel_id.shape[0]), axis=0),
    "hotel_id":
        tf.expand_dims(unique_hotel_id, axis=0)
}

# Get movie recommendations for user 42.
scores = model(inputs)
titles = tfr.utils.sort_by_scores(scores,
                                  [tf.expand_dims(unique_hotel_id, axis=0)])[0]
print(f"Top 5 recommendations for user 42: {titles[0, :5]}")
print(f"Top 5 recommendations for user 42: {titles[0, -5:]}")

Top 5 recommendations for user 42: [b'1000000471401' b'2000000218205' b'1' b'3000010014429' b'625695']
Top 5 recommendations for user 42: [b'344819' b'3000010040418' b'2000000364013' b'3000010005459'
 b'3000020017037']


In [None]:


# Generate the input for user 42.
inputs = {
    "travel_purpose":
        tf.expand_dims(tf.repeat("Backpacking", repeats=unique_hotel_id.shape[0]), axis=0),
    "gender":
        tf.expand_dims(tf.repeat(1, repeats=unique_hotel_id.shape[0]), axis=0),
    "desktop":
        tf.expand_dims(tf.repeat(0, repeats=unique_hotel_id.shape[0]), axis=0),
    "hotel_id":
        tf.expand_dims(unique_hotel_id, axis=0)
}

# Get movie recommendations for user 42.
scores = model(inputs)
titles = tfr.utils.sort_by_scores(scores,
                                  [tf.expand_dims(unique_hotel_id, axis=0)])[0]
print(f"Top 5 recommendations for user 42: {titles[0, :5]}")
print(f"Top 5 recommendations for user 42: {titles[0, -5:]}")

Top 5 recommendations for user 42: [b'3000020007139' b'3000010010313' b'3000020018650' b'409115' b'4755']
Top 5 recommendations for user 42: [b'624034' b'239433' b'1000000496074' b'3000010041546' b'3000010023653']


In [None]:


# Generate the input for user 42.
inputs = {
    "travel_purpose":
        tf.expand_dims(tf.repeat("Leisure", repeats=unique_hotel_id.shape[0]), axis=0),
    "gender":
        tf.expand_dims(tf.repeat(1, repeats=unique_hotel_id.shape[0]), axis=0),
    "desktop":
        tf.expand_dims(tf.repeat(0, repeats=unique_hotel_id.shape[0]), axis=0),
    "hotel_id":
        tf.expand_dims(unique_hotel_id, axis=0)
}

# Get movie recommendations for user 42.
scores = model(inputs)
titles = tfr.utils.sort_by_scores(scores,
                                  [tf.expand_dims(unique_hotel_id, axis=0)])[0]
print(f"Top 5 recommendations for user 42: {titles[0, :5]}")
print(f"Top 5 recommendations for user 42: {titles[0, -5:]}")

Top 5 recommendations for user 42: [b'295675' b'3000010005459' b'9000000413948' b'344819' b'3000010036106']
Top 5 recommendations for user 42: [b'149045' b'3000010041546' b'182170' b'62311' b'1000000471346']


In [None]:
hotel_ex = ['3000020003597', '544602', '239433', '8650', '564300']
hotel_ex1 = np.array(hotel_ex)

In [None]:


# Generate the input for user 42.
inputs = {
    "travel_purpose":
        tf.expand_dims(tf.repeat("Leisure", repeats=5), axis=0),
    "gender":
        tf.expand_dims(tf.repeat(1, repeats=5), axis=0),
    "desktop":
        tf.expand_dims(tf.repeat(0, repeats=5), axis=0),
    "hotel_id":
        tf.expand_dims(hotel_ex, axis=0)
}

# Get movie recommendations for user 42.
scores = model(inputs)
titles = tfr.utils.sort_by_scores(scores,
                                  [tf.expand_dims(hotel_ex, axis=0)])[0]
print(f"Top 5 recommendations for user 42: {titles[0, :5]}")
print(f"Top 5 recommendations for user 42: {titles[0, -5:]}")

Top 5 recommendations for user 42: [b'544602' b'3000020003597' b'8650' b'564300' b'239433']
Top 5 recommendations for user 42: [b'544602' b'3000020003597' b'8650' b'564300' b'239433']


In [None]:
list(titles.numpy().tolist())[0]

[b'544602', b'3000020003597', b'8650', b'564300', b'239433']

In [None]:
model.save_weights('./data/model_weight2')

In [None]:
loaded = HotelRankingModel()

In [None]:
loaded.load_weights('./data/model_weight2')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fd4ba5f2f90>

In [None]:
type(loaded)

__main__.HotelRankingModel

In [None]:


# Generate the input for user 42.
inputs = {
    "travel_purpose":
        tf.expand_dims(tf.repeat("Backpacking", repeats=unique_hotel_id.shape[0]), axis=0),
    "desktop":
        tf.expand_dims(tf.repeat(0, repeats=unique_hotel_id.shape[0]), axis=0),
    "gender":
        tf.expand_dims(tf.repeat(1, repeats=unique_hotel_id.shape[0]), axis=0),
    "hotel_id":
        tf.expand_dims(unique_hotel_id, axis=0)
}

# Get movie recommendations for user 42.
scores = loaded(inputs)
titles = tfr.utils.sort_by_scores(scores,
                                  [tf.expand_dims(unique_hotel_id, axis=0)])[0]
print(f"Top 5 recommendations for user 42: {titles[0, :5]}")
print(f"Top 5 recommendations for user 42: {titles[0, -5:]}")

Top 5 recommendations for user 42: [b'3000020007139' b'3000010010313' b'3000020018650' b'409115' b'4755']
Top 5 recommendations for user 42: [b'624034' b'239433' b'1000000496074' b'3000010041546' b'3000010023653']


In [None]:


# Generate the input for user 42.
inputs = {
    "travel_purpose":
        tf.expand_dims(tf.repeat("Backpacking", repeats=unique_hotel_id.shape[0]), axis=0),
    "desktop":
        tf.expand_dims(tf.repeat(False, repeats=unique_hotel_id.shape[0]), axis=0),
    "gender":
        tf.expand_dims(tf.repeat(True, repeats=unique_hotel_id.shape[0]), axis=0),
    "hotel_id":
        tf.expand_dims(unique_hotel_id, axis=0)
}

# Get movie recommendations for user 42.
scores = loaded(inputs)
titles = tfr.utils.sort_by_scores(scores,
                                  [tf.expand_dims(unique_hotel_id, axis=0)])[0]
print(f"Top 5 recommendations for user 42: {titles[0, :5]}")
print(f"Top 5 recommendations for user 42: {titles[0, -5:]}")

Top 5 recommendations for user 42: [b'3000020007139' b'3000010010313' b'3000020018650' b'409115' b'4755']
Top 5 recommendations for user 42: [b'624034' b'239433' b'1000000496074' b'3000010041546' b'3000010023653']


In [None]:
np.save('data/unique_hotel_id', unique_hotel_id)

In [None]:
a = np.load('data/unique_hotel_id.npy', allow_pickle=True)

In [None]:
np.save('data/unique_travel_purpose', unique_travel_purpose)

In [None]:
b = np.load('data/unique_travel_purpose.npy', allow_pickle=True)

In [None]:
b

array([b'Backpacking', b'Business', b'Business travel',
       b'Culture Sightseeings', b'Family vacation', b'Holiday',
       b'Leisure', b'Medical', b'Medical Travel', b'Romantic Vacation',
       b'Shopping & Culinary', b'Staycation', b'Transit'], dtype=object)

In [None]:
type(b)

numpy.ndarray