In [None]:
#FOR THE FUTURE
#NEURAL COLLABORATIVE FILTERING?

#NEURAL COLLABORATIVE FILTERING
from keras.models import Model, Sequential
from keras.layers import Embedding, Flatten, Input, Dot, concatenate, Dropout, Dense, BatchNormalization, StringLookup
from keras.optimizers import Adam
from tensorflow import tensordot

latent_dim = 10

#Input definition (symbolic layers)
book_input = Input(shape=[1],name='book-input')
user_input = Input(shape=[1], name='user-input')

#Define lookups for books and users
#book_lookup = StringLookup(vocabulary=vocabulary_titles, mask_token=None)(book_input)
#user_lookup = StringLookup(vocabulary=vocabulary_users, mask_token=None)(user_input)

#Perceptrons layers for non linearity + Stringlookup for identifier vocabulary
book_embedding_mlp = Embedding(num_books + 1, latent_dim, name='book-embedding-mlp')(book_input)
book_vec_mlp = Flatten(name='flatten-book-mlp')(book_embedding_mlp)

user_embedding_mlp = Embedding(num_users + 1, latent_dim, name='user-embedding-mlp')(user_input)
user_vec_mlp = Flatten(name='flatten-user-mlp')(user_embedding_mlp)

#Embeddings for matrix factorization to concatenate to perceptrons embeddings
book_embedding_mf = Embedding(num_books + 1, latent_dim, name='book-embedding-mf')(book_input)
book_vec_mf = Flatten(name='flatten-book-mf')(book_embedding_mf)

user_embedding_mf = Embedding(num_users + 1, latent_dim, name='user-embedding-mf')(user_input)
user_vec_mf = Flatten(name='flatten-user-mf')(user_embedding_mf)

# CONCATENATION + DROPOUT LAYERS + DENSE LAYERS
concat = concatenate([book_vec_mlp, user_vec_mlp], name='concatenate_embedd_mlp')
concat_dropout = Dropout(0.2)(concat)
fc_1 = Dense(100, name='fc-1', activation='relu')(concat_dropout)
fc_1_bn = BatchNormalization(name='batch-norm-1')(fc_1)
fc_1_dropout = Dropout(0.2)(fc_1_bn)
fc_2 = Dense(50, name='fc-2', activation='relu')(fc_1_dropout)
fc_2_bn = BatchNormalization(name='batch-norm-2')(fc_2)
fc_2_dropout = Dropout(0.2)(fc_2_bn)

# COMBINE PREDICTIONS
pred_mlp = Dense(10, name='pred-mlp', activation='relu')(fc_2_dropout)
pred_mf = Dot(axes=1, name='pred-mf')([book_vec_mf, user_vec_mf]) #DOT PRODUCT FOR MATRIX FACTORIZATION
#print(pred_mf.shape)
combine_mlp_mf = concatenate([pred_mf, pred_mlp], name='combine-mlp-mf')

# OUTPUT LAYERS, ONE SIGMOID EXIT
result = Dense(1, name='result', activation='sigmoid')(combine_mlp_mf)

model = Model([user_input, book_input], result)
model.compile(optimizer=Adam(learning_rate=0.01), loss='binary_crossentropy')
model.summary()

In [None]:
#For the future
#Extended book vector in retrieval?
from typing import Dict, Text

class BookTower(tf.keras.layers.Layer):
  def __init__(self, embedding_dimension):
    super().__init__()
    
    self.embedding_dimension = embedding_dimension
    
    self.identifier_user = tf.keras.layers.StringLookup(
        vocabulary=users_vocabulary_2, mask_token=None)
    self.identifier_book = tf.keras.layers.StringLookup(
        vocabulary=titles_vocabulary_2, mask_token=None)
    self.moods = tf.keras.layers.StringLookup(
        vocabulary=mood_vocabulary, mask_token=None)
    self.genres = tf.keras.layers.StringLookup(
        vocabulary=genres_vocabulary, mask_token=None)
    self.emotions = tf.keras.layers.StringLookup(
        vocabulary=emotion_vocabulary, mask_token=None)
    self.keywords = tf.keras.layers.StringLookup(
        vocabulary=keyword_vocabulary, mask_token=None)#, output_mode='multi_hot')
    
    self.embedding_book = tf.keras.layers.Embedding(len(titles_vocabulary_2) + len(mood_vocabulary) + len(emotion_vocabulary) + 1 + len(genres_vocabulary) + len(keyword_vocabulary), self.embedding_dimension)    
    
  def call(self, list_features): 
    #BOOKS
    keyword_lookup = self.keywords(list_features[1])
    mood_lookup = self.moods(list_features[2])
    emotion_lookup = self.emotions(list_features[3])
    genre_lookup = self.genres(list_features[4])  
    book_lookup = self.identifier_book(list_features[0])
    
    #BOOK CONCAT
    concats = tf.concat([book_lookup, mood_lookup, emotion_lookup, genre_lookup, keyword_lookup], axis=0)
    book_embeddings = self.embedding_book(concats)
    return book_embeddings

class UsersBooksModel2(tfrs.Model):

  def __init__(self, embedding_dimension, features: Dict[Text, tf.Tensor]):
    super().__init__()
    
    self.embedding_dimension = embedding_dimension
    self.layers_size = layers
    
    self.book_tower = BookTower(self.embedding_dimension)
    
    #Embedding per utenti
    self.user_embeddings = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=users_vocabulary_2, mask_token=None),
      tf.keras.layers.Embedding(len(users_vocabulary_2) + 1, self.embedding_dimension),
    ])
    
    self.task = tfrs.tasks.Retrieval(
      metrics=tfrs.metrics.FactorizedTopK(
        candidates=
            TFdata_books_features.batch(128).cache().map(self.book_tower)
      )
    )
    if layers != None:
        self.dense_layers = tf.keras.Sequential()

        for layer_size in layers[:-1]:
            self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))

        # No activation for the last layer.
        for layer_size in layers[-1:]:
            self.dense_layers.add(tf.keras.layers.Dense(layer_size))
            
  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:   
    if self.layers_size == None:
        user_embeddings = self.user_embeddings(features['person_id'])
        book_embeddings = self.book_tower([features['title'], features['keyword_string'], features['mood_string'], features['emotion_string'], features['genre_string']])
    else:
        user_embeddings = self.dense_layers(self.user_embeddings(features['person_id']))
        book_embeddings = self.dense_layers(self.book_tower(features['title'], features['keyword_string'], features['mood_string'], features['emotion_string'], features['genre_string']))
    return self.task(user_embeddings, book_embeddings, compute_metrics = not training)

In [None]:
#Extended user vector with personalized layer

class UserTower(tf.keras.layers.Layer):
  def __init__(self, user_inputs, embedding_dimension, genre_features=None): #user_inputs è il vocabolario di utenti
    super().__init__()
    self.genre_features = genre_features
    self.embedding_dimension = embedding_dimension
    self.user_inputs = user_inputs
    self.list_lookups = [tf.keras.layers.StringLookup(vocabulary=self.user_inputs, mask_token=None)]
    self.embed = tf.keras.layers.Embedding(len(self.user_inputs) + 1, self.embedding_dimension)
    if genre_features is not None:
        if isinstance(genre_features, list):     
            lookup_users = tf.keras.layers.StringLookup(vocabulary=genre_features, mask_token=None)
            self.list_lookups.append(lookup_users)
            self.global_pooling = tf.keras.layers.GlobalAveragePooling1D()
        else:
            raise Exception("string_features deve essere una lista")
    
  def call(self, x):    
    output = self.list_lookups[0](x[0])
    lookups = [output]
    if self.genre_features == None:
        output = self.embed(output)
        return output
    else:     
        #if isinstance(genre_features_1, list):
        for index, data in enumerate(x[1:]):
            output = self.list_lookups[1](x[index + 1])
            lookups.append(output)   

        concat = tf.concat([[lookup for lookup in lookups]], axis=0)
        transp = self.embed(tf.transpose(concat))
        return self.global_pooling(transp)

x = UserTower(unique_users, 4, unique_books_genres) #USE OF USER TOWER
emb = x([['Alessandro', '1', '2', '3']])
print(emb)