In [None]:
import pprint
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_ranking as tfr
import tensorflow_recommenders as tfrs
import mysql.connector
import pandas as pd
from getpass import getpass
from mysql.connector import connect, Error

# Book Pre-Processing
Note: as listwise meaining 5:1 for book and user

In [None]:
# for now the book features we are doing are book_id, book_title, num_pages 

#book_id
book_id_lookup = tf.keras.layers.StringLookup()
book_id_lookup.adapt(book_ids) # list of all book ids, the strings

# Book title 
book_title_lookup = tf.keras.layers.StringLookup()
book_title_lookup.adapt(book_titles)# should be unique book titles, the strings


#num pages # not sure honestly
num_pages_lookup = tf.keras.layers.Normalization(axis=None)
num_pages_lookup.adapt(book_num_pages.astype(np.float32)) # list of all num pages

#rating # think wrong
rating_lookup = tf.keras.layers.Normalization(axis=None)
rating_lookup.adapt(book_ratings.astype(np.float32)) # list of all ratings # type is double


In [None]:
class BookModel(tf.keras.Model):

    def __init__(self):
        super().__init__()
        
        #book_id
        self.book_id_embedding = tf.keras.Sequential([
            book_id,
            tf.keras.layers.Embedding(book_id.vocab_size(), 32)
        ])

        self.book_title_embedding = tf.keras.Sequential([
            book_title_lookup,
            tf.keras.layers.Embedding(book_title_lookup.vocab_size(), 32)
        ])
        
        #num pages
        self.num_pages_embedding = tf.keras.Sequential([
            num_pages
        ])
        
        #rating
        self.rating_embedding = tf.keras.Sequential([
            rating
        ])
     
    def call(self, inputs):
        return tf.concat([
            self.book_title_embedding(inputs["book_titles"]),
            self.book_description_embeddings(inputs["book_descriptions"]),
            self.num_pages_embedding(inputs["num_pages"]),
            self.rating_embedding(inputs["ratings"]),
            self.book_author_embedding(inputs["authors"]),
            self.book_genre_embedding(inputs["genres"])
        ], axis=1)

# User Pre-Processing

In [None]:
#  For now features are user_id, sex;
# User ID 
user_id_lookup = tf.keras.layers.StringLookup()
user_id_lookup.adapt(user_ids)

#  User age # TODO: get all user ages as array
user_age_lookup = tf.keras.layers.Normalization(axis=None)
user_age_lookup.adapt(np.array([user_age])) 

In [None]:
class UserModel(tf.keras.Model):

    # don't these need to be passed in?
    def __init__(self):
        super().__init__()

        # User embedding based on user_id
        self.user_id_embedding = tf.keras.Sequential([
            user_id_lookup,
            tf.keras.layers.Embedding(user_id_lookup.vocabulary_size(), 32),
        ])

        # Sex as a boolean feature, could be treated directly as an input or embedded
        self.user_sex_embedding = tf.keras.layers.Embedding(
            2, 32)  # Assuming sex is represented as 0 or 1

    def call(self, inputs):
        # User feature from user ID
        user_id_feature = self.user_id_embedding(inputs["user_id"])

        
        # Process sex
        user_sex_feature = self.user_sex_embedding(tf.cast(inputs["sex"], tf.int32))

        # Process preferences
        # Concatenate all features
        return tf.concat([
            user_id_feature,
            user_sex_feature,
        ], axis=1)

In [None]:
class RankingModel(tfrs.Model):

    def __init__(self, loss):
        super().__init__()
        embedding_dimension = 32

        # User embeddings
        self.user_embeddings = UserModel()
        # Restaurand embeddings
        self.book_embeddings = BookModel()
        # Compute predictions
        self.score_model = tf.keras.Sequential([
            # Learn multiple dense layers.
            tf.keras.layers.Dense(128, activation="relu"),
            tf.keras.layers.Dense(64, activation="relu"),
            tf.keras.layers.Dense(1)
        ])

        self.task = tfrs.tasks.Ranking(
            loss=loss,
            metrics=[
                tfr.keras.metrics.NDCGMetric(name="ndcg_metric"),
                tf.keras.metrics.RootMeanSquaredError()
            ]
        )

    def call(self, features):  # features Type: Dict{}, Key: UserID, BookID, Value: Tensor
        user_embeddings = self.user_model({
            'user_id': features['user_id'],
            'sex': features['user_sex'],  # Adjust field name as necessary
        })

        # Process book features through the BookModel
        # Adjust field names and structure as necessary to match your BookModel's expected input
        book_embeddings = self.book_model({
            'book_id': features['book_id'],
            'book_title': features['book_title'],
            'num_pages': features['num_pages'],  # If applicable
        })
        
        user_embeddings_expanded = tf.expand_dims(user_embeddings, 1)
    
        combined_embeddings = tf.concat([user_embeddings_expanded, book_embeddings], axis=-1)

        return self.score_model(combined_embeddings)

    def compute_loss(self, features, training=False):
        labels = features.pop("user_rating")
        scores = self(features)
        return self.task(
            labels=labels,
            predictions=tf.squeeze(scores, axis=-1),
        )

In [None]:
listwise_model = RankingModel(tfr.keras.losses.ListMLELoss())
listwise_model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
listwise_model.fit(cached_train, epochs=100, verbose=True)  