In [7]:
!pip install -q tensorflow-recommenders
!pip install -q scann

In [6]:
import os
import pprint
import tempfile

from typing import Dict, Text

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

import tensorflow_recommenders as tfrs

from sklearn.model_selection import train_test_split

In [38]:
# this is just a simulation, the real data must be obtain by a call to the database api
# returns all the activities available in that slot!
activities = [
    {'id': 'A1', 'name': 'football', 'class': 'sports'},
    {'id': 'A2', 'name': 'chess', 'class': 'boardgame'},
    {'id': 'B1', 'name': 'jenga', 'class': 'boardgame'},
    {'id': 'C1', 'name': 'salsa', 'class': 'dancing'}
    # .... all activities
]

In [65]:
unique_activity_names = set(activity['name'] for activity in activities)
unique_activity_names = np.array(list(map(str.encode, unique_activity_names)))
unique_activity_names

array([b'jenga', b'salsa', b'football', b'chess'], dtype='|S8')

In our retrieval system the user acts as implicit feedback to the model since:

*   Choosing the activity is a positive feedback
*   Not choosing an activity is a negative feedback

It is not explicit since they don't tell us **how much** they like the activity.


In [55]:
# Implicit feedback: counter of the times the user has picked or not the activity
# picked        +1
# not picked    +0
user_feedback = {
    "user1":{'A1': 2, 'A2': 0, 'B1': 1,'C1': 0 }, # User 1 picked activity A1 twice (positive feedback). but has never picked activity A2 (negative feedback),...
    "user2":{'A1': 1, 'A2': 0, 'B1': 0,'C1': 1 },
    "user3":{'A1': 0, 'A2': 0, 'B1': 1,'C1': 0 },
    # Todo: Append feedback when recieving
}

In [59]:
unique_user_ids = list(user_feedback.keys())
# convert to bytes (tensorflow requires)
unique_user_ids = np.array(list(map(str.encode, unique_user_ids)))
unique_user_ids

array([b'user1', b'user2', b'user3'], dtype='|S5')

# The model

In [62]:
embedding_dimension = 32
# Higher values will correspond to models that may be more accurate,
# but will also be slower to fit and more prone to overfitting.

## Query tower = user

In [63]:
user_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_user_ids, mask_token=None),
  # We add an additional embedding to account for unknown tokens.
  tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
])

## Candidate tower = activity

In [66]:
movie_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_activity_names, mask_token=None),
  tf.keras.layers.Embedding(len(unique_activity_names) + 1, embedding_dimension)
])

In [None]:
def recommend_activities(user_id, model, activities):
    user_vector = np.array([1 if activity['id'] in [activity_id for activity_id, _ in user_feedback_dict[user_id]] else 0 for activity in activities], dtype=np.float32)

    # Predecir el feedback implícito para actividades no realizadas por el usuario
    predictions = model.predict(user_vector.reshape(1, -1)).flatten()

    # Ordenar actividades por las predicciones (mayor probabilidad de feedback positivo primero)
    sorted_indices = np.argsort(predictions)[::-1][:5]
    recommended_activities = [activities[i]['name'] for i in sorted_indices]

    return recommended_activities

