In [2]:
!pip install -q tensorflow-recommenders

In [2]:
from typing import Dict, Text
import os
import pprint
import tempfile
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs
import pandas as pd
from sklearn.model_selection import train_test_split

ModuleNotFoundError: No module named 'tensorflow_datasets'

In [3]:
# Ratings data.
ratings = pd.read_csv("/content/sample_data/Datasetgriya - Ratings.csv")
# Features of all the available griya.
griya = pd.read_csv("/content/sample_data/Datasetgriya - Griya.csv")



In [4]:
griya

Unnamed: 0,id_griya,nama_griya
0,gr01,Kost Uluwatu Stay
1,gr02,Kos 168
2,gr03,Lakuna House
3,gr04,Awana Apartments
4,gr05,Rumah Una Hotel
5,gr06,Kost Pondok EFATA Jimbaran
6,gr07,Wika Bali Beach House
7,gr08,Kost Ibu Made Taman Griya
8,gr09,Griya Amerta Kost & Home Stay
9,gr10,Kos PuriNaga at Jimbaran Bukit perum Purigadin...


In [5]:
ratings

Unnamed: 0,id_griya,nama_griya,alamat_griya,latitude,longitude,rating,user_id
0,gr01,Kost Uluwatu Stay,Jimbaran,-8.797208,115.162319,4,1
1,gr02,Kos 168,Gg. Buanasari Uluwatu 2,-8.795778,115.159935,3,2
2,gr03,Lakuna House,Jalan Muding Mekar Taman Muding Mekar,-8.788545,115.182526,5,3
3,gr04,Awana Apartments,Jl. Arwana No.6,-8.778635,115.176744,4,4
4,gr05,Rumah Una Hotel,656H+MRQ,-8.788037,115.179516,2,5
5,gr06,Kost Pondok EFATA Jimbaran,Jimbaran,-8.793385,115.165669,5,6
6,gr07,Wika Bali Beach House,Perumahan Terrace Campus Jalan Perumahan Terra...,-8.787962,115.172461,5,7
7,gr08,Kost Ibu Made Taman Griya,Jl. Taman Griya No.15,-8.784103,115.185916,4,8
8,gr09,Griya Amerta Kost & Home Stay,65CR+GW6,-8.778157,115.192303,5,9
9,gr10,Kos PuriNaga at Jimbaran Bukit perum Purigadin...,perum Puri Gading,-8.801855,115.155358,5,10


In [6]:
ratings_dataset = tf.data.Dataset.from_tensor_slices((ratings['id_griya'], ratings['nama_griya'], ratings['rating'], ratings['user_id'].astype(str)))
griya_dataset = tf.data.Dataset.from_tensor_slices((griya['id_griya'], griya['nama_griya']))

In [7]:
for x in griya_dataset.take(1).as_numpy_iterator():
  pprint.pprint(x)

(b'gr01', b'Kost Uluwatu Stay')


In [8]:
for x in ratings_dataset.take(1).as_numpy_iterator():
  pprint.pprint(x)

(b'gr01', b'Kost Uluwatu Stay', 4, b'1')


In [9]:
ratings_dataset = ratings_dataset.map(lambda id_griya, nama_griya, rating, user_id: {
    "nama_griya": nama_griya,
    "user_id": user_id,
})
griya_dataset = griya_dataset.map(lambda id_griya, nama_griya: nama_griya)


In [10]:
tf.random.set_seed(42)
shuffled = ratings_dataset.shuffle(10, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(8)
test = shuffled.skip(8).take(2)

In [11]:
nama_griya = griya_dataset.batch(1)
user_ids = ratings_dataset.batch(1_000_000).map(lambda x: x["user_id"])

unique_griya_name = np.unique(np.concatenate(list(nama_griya)))
unique_user_ids = np.unique(np.concatenate(list(user_ids))).astype(str)
unique_user_ids = unique_user_ids.astype(object)

unique_griya_name_tensor = tf.keras.layers.StringLookup(
    vocabulary=unique_griya_name, mask_token=None
)

unique_user_ids_tensor = tf.keras.layers.StringLookup(
    vocabulary=unique_user_ids, mask_token=None
)

unique_griya_name[:10]

array([b'Awana Apartments', b'Griya Amerta Kost & Home Stay', b'Kos 168',
       b'Kos PuriNaga at Jimbaran Bukit perum Purigading Cluster Palem',
       b'Kost Ibu Made Taman Griya', b'Kost Pondok EFATA Jimbaran',
       b'Kost Uluwatu Stay', b'Lakuna House', b'Rumah Una Hotel',
       b'Wika Bali Beach House'], dtype=object)

In [12]:
unique_user_ids

array(['1', '10', '2', '3', '4', '5', '6', '7', '8', '9'], dtype=object)

In [13]:
embedding_dimension = 32


In [56]:
user_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_user_ids, mask_token=None),
  # We add an additional embedding to account for unknown tokens.
  tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
])

In [57]:
griya_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_griya_name, mask_token=None),
  tf.keras.layers.Embedding(len(unique_griya_name) + 1, embedding_dimension)
])

In [58]:
metrics = tfrs.metrics.FactorizedTopK(
  candidates=griya_dataset.batch(1).map(griya_model)
)


In [59]:
task = tfrs.tasks.Retrieval(
  metrics=metrics
)

In [60]:
class griyalensModel(tfrs.Model):

  def __init__(self, user_model,griya_model):
    super().__init__()
    self.griya_model: tf.keras.Model = griya_model
    self.user_model: tf.keras.Model = user_model
    self.task: tf.keras.layers.Layer = task

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    user_embeddings = self.user_model(features["user_id"])
    positive_griya_embeddings = self.griya_model(features["nama_griya"])
    return self.task(user_embeddings, positive_griya_embeddings)


In [61]:
model = griyalensModel(user_model, griya_model)
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

In [62]:
cached_train = train.shuffle(100_000).batch(2).cache()
cached_test = test.batch(1).cache()

In [63]:
model.fit(cached_train, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f83804385e0>

In [64]:
model.evaluate(cached_test, return_dict=True)




{'factorized_top_k/top_1_categorical_accuracy': 0.0,
 'factorized_top_k/top_5_categorical_accuracy': 0.5,
 'factorized_top_k/top_10_categorical_accuracy': 1.0,
 'factorized_top_k/top_50_categorical_accuracy': 1.0,
 'factorized_top_k/top_100_categorical_accuracy': 1.0,
 'loss': 0.0,
 'regularization_loss': 0,
 'total_loss': 0.0}

In [65]:
# Create a model that takes in raw query features, and
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
# recommends griya out of the entire griya dataset.
index.index_from_dataset(
  tf.data.Dataset.zip((griya_dataset.batch(100), griya_dataset.batch(100).map(model.griya_model)))
)

# Get recommendations.
_, titles = index(tf.constant(["2"]))
print(f"Recommendations for user 42: {titles[0, :3]}")

Recommendations for user 42: [b'Kos 168' b'Kost Uluwatu Stay' b'Lakuna House']


In [1]:
# Export the query model.
with tempfile.TemporaryDirectory() as tmp:
  path = os.path.join(tmp, "model")

  # Save the index.
  tf.saved_model.save(index, path)

  # Load it back; can also be done in TensorFlow Serving.
  loaded = tf.saved_model.load(path)

  # Pass a user id in, get top predicted griya titles back.
  scores, titles = loaded(["42"])

  print(f"Recommendations: {titles[0][:3]}")



NameError: name 'tempfile' is not defined