In [1]:
# import setup.py
import tensorflow as tf
import pandas as pd
import tensorflow_recommenders as tfrs
import tensorflow_datasets as tfds
from pprint import pprint




In [2]:
def load_data(filepath):
    df = pd.read_csv(filepath)
    
    return df

In [3]:
# load user data
df_user = load_data('../data/user_ratings_small.csv')

# create short version of user data with 50000 only rows 
df_user_short = df_user[-50000:]
print(df_user_short.shape)

(50000, 3)


In [6]:
# Convert pandas dataframe to TensorFlow Dataset
ratings = tf.data.Dataset.from_tensor_slices({
    'user': df_user_short['user'].values,
    'game_id': df_user_short['game_id'].values,
    'rating': df_user_short['rating'].values
})

In [11]:
# show one line from tensor flow dataset
for x in ratings.take(1).as_numpy_iterator():
  #pprint.pprint(x)
  pprint(x)

{'game_id': 16273, 'rating': 5.5, 'user': b'JCDINIZ'}


In [None]:
# Define the vocabulary, create a a lookup layer
user_lookup = tf.keras.layers.StringLookup()

# create the vocabulary from data
user_lookup.adapt(ratings.map(lambda x: x["user"]))

# testprint the vocabulary
print(f"Vocabulary: {user_lookup.get_vocabulary()[:3]}")

# create the embeddings
user_embedding = tf.keras.layers.Embedding(
    # Let's use the explicit vocabulary lookup.
    input_dim=user_lookup.vocabulary_size(),
    output_dim=32
)

# create the user model 
user_model = tf.keras.Sequential([user_lookup, user_embedding])



Vocabulary: ['[UNK]', 'loopoocat', 'The Eraser']


In [None]:
# get the embedding for one example
user_model(['The Eraser'])

<tf.Tensor: shape=(1, 32), dtype=float32, numpy=
array([[ 0.00918783, -0.01237703, -0.00634197, -0.0384252 , -0.01999358,
        -0.01638033,  0.02652749, -0.03374199, -0.04453336,  0.0280096 ,
        -0.02744752, -0.02172756, -0.02860097,  0.03531753, -0.04737209,
         0.03760756, -0.03413975, -0.04031839, -0.01528304,  0.00454368,
         0.03994949, -0.04935266,  0.04528964,  0.02020602, -0.03575091,
         0.04675219, -0.03119127, -0.04859721, -0.00635438,  0.04307124,
         0.02605419,  0.04420147]], dtype=float32)>

In [None]:
# Define the vocabulary, create a a lookup layer
game_id_lookup = tf.keras.layers.IntegerLookup()
# create the vocabulary from data
game_id_lookup.adapt(ratings.map(lambda x: x["game_id"]))

# testprint the vocabulary
print(f"Vocabulary: {game_id_lookup.get_vocabulary()[:3]}")

# create the embeddings
game_id_embedding = tf.keras.layers.Embedding(
    # Let's use the explicit vocabulary lookup.
    input_dim=game_id_lookup.vocabulary_size(),
    output_dim=32
)

# create the game id model 
game_id_model = tf.keras.Sequential([game_id_lookup, game_id_embedding])

In [None]:
# get the embedding for one example
game_id_model([265381])