In [1]:

import pprint
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_ranking as tfr
import tensorflow_recommenders as tfrs
import mysql.connector
import pandas as pd
from getpass import getpass
from mysql.connector import connect, Error
import collections
from typing import Optional, List
# Connect to database
try:
    with connect(
        host="localhost",
        user="root",
        password="mysql",
        database="Sprint1BasicEComDb"
    ) as connection:
        print(connection)
except Error as e:
    print(e)
    
def save_to_csv(data, filename, header):
    df = pd.DataFrame(data).set_axis(header, axis=1)
    df.to_csv(filename, index=False, )




  from .autonotebook import tqdm as notebook_tqdm



<mysql.connector.connection_cext.CMySQLConnection object at 0x0000019C2348CF50>


In [2]:
def _sample_list(feature_lists, num_examples_per_list, book_features, random_state):
    indices = random_state.choice(
        range(len(feature_lists['book_id'])),
        size=num_examples_per_list,
        replace=False
    )
    return {feature: [feature_lists[feature][i] for i in indices] for feature in book_features}, [feature_lists["user_rating"][i] for i in indices]


def sample_listwise(
    rating_dataset: tf.data.Dataset,
    user_features: List[str],
    book_features: List[str],
    num_list_per_user: int = 50,
    num_examples_per_list: int = 5,
    seed: Optional[int] = None,
) -> tf.data.Dataset:
    """Function for converting a dataset to a listwise dataset with user and book features.
    Args:
        ... (arguments remain the same)
    Returns:
        A tf.data.Dataset containing list examples with additional user and book features.
    """
    random_state = np.random.RandomState(seed)
    example_lists_by_user = collections.defaultdict(
        lambda: collections.defaultdict(list))

    for example in rating_dataset.as_numpy_iterator():
        user_id = example["user_id"]
        for feature in user_features + book_features + ["user_rating"]:
            example_lists_by_user[user_id][feature].append(example[feature])

    # Initialize tensor_slices with empty lists for each feature
    tensor_slices = {feature: []
                     for feature in user_features + book_features + ["user_rating"]}

    for user_id, feature_lists in example_lists_by_user.items():
        for _ in range(num_list_per_user):
            if len(feature_lists["book_id"]) < num_examples_per_list:
                continue

            sampled_indices = random_state.choice(
                len(feature_lists["book_id"]),
                size=num_examples_per_list,
                replace=False
            )

            for feature in book_features:
                tensor_slices[feature].extend(
                    [feature_lists[feature][i] for i in sampled_indices])

            tensor_slices["user_rating"].extend(
                [feature_lists["user_rating"][i] for i in sampled_indices])

            for feature in user_features:
                tensor_slices[feature].extend(
                    [feature_lists[feature][0]] * num_examples_per_list)

    # Convert lists to numpy arrays or Tensors
    tensor_slices = {k: tf.convert_to_tensor(
        np.array(v)) for k, v in tensor_slices.items()}

    # Finally, return a tf.data.Dataset object
    return tf.data.Dataset.from_tensor_slices(tensor_slices)


def sample_listwise_2(
    rating_dataset: tf.data.Dataset,
    user_features: List[str],
    book_features: List[str],
    num_list_per_user: int = 50,
    num_examples_per_list: int = 5,
    seed: Optional[int] = None
) -> tf.data.Dataset:
    """Convert a dataset to a listwise dataset with user and book features."""
    random_state = np.random.RandomState(seed)
    example_lists_by_user = collections.defaultdict(lambda: collections.defaultdict(list))

    for example in rating_dataset.as_numpy_iterator():
        user_id = example["user_id"]
        for feature in user_features + book_features + ["user_rating"]:
            example_lists_by_user[user_id][feature].append(example[feature])

    # Initialize tensor_slices with empty lists for structured data
    structured_data = {feature: [] for feature in user_features + book_features + ["user_rating"]}

    for user_id, feature_lists in example_lists_by_user.items():
        for _ in range(num_list_per_user):
            if len(feature_lists["book_id"]) < num_examples_per_list:
                # Skip users with fewer books than required for a full list
                continue

            # Sample indices for books, ensuring exactly num_examples_per_list are chosen
            sampled_indices = random_state.choice(
                len(feature_lists["book_id"]),
                size=num_examples_per_list,
                replace=False
            )

            # For each feature, gather the sampled data
            for feature in book_features + ["user_rating"]:
                structured_data[feature].extend(
                    [feature_lists[feature][i] for i in sampled_indices])

            # Replicate user features across the examples in a list
            for feature in user_features:
                structured_data[feature].extend(
                    [feature_lists[feature][0]] * num_examples_per_list)

    # Convert the structured data lists into tensors, preserving the shape expectation
    tensor_slices = {k: tf.convert_to_tensor(np.array(v)) for k, v in structured_data.items()}

    # Adjust the shape of tensors to ensure compatibility with expected input structure
    for feature, tensor in tensor_slices.items():
        # This reshaping ensures that book_id and book_title are structured correctly
        if feature in book_features + ["user_rating"]:
            # Reshape to have -1 lists of size num_examples_per_list
            desired_shape = (-1, num_examples_per_list) if tensor.ndim > 1 else (-1,)
            tensor_slices[feature] = tf.reshape(tensor, desired_shape)

    # Return a tf.data.Dataset from the structured tensor_slices
    return tf.data.Dataset.from_tensor_slices(tensor_slices)
# Assuming the rating_dataset is preloaded and structured correctly.


# Here's how you would call the function with the specific features:

In [3]:
def get_all_books_user_and_ratings(): #TODO: like get All Books, add attributes to the query
    query = """SELECT 
    ubr.userid, 
    u.birthdate, 
    u.sex, 
    u.genrePreference, 
    u.authorPreference,
    ubr.bookid,
    b.book,
    b.description,
    b.num_page,
    b.pre_rating,
    b.publication,
    b.authorid,
    b.genreid,
    ubr.rating AS user_rating
FROM userbookratings ubr
JOIN (
    -- User Details and Preferences Subquery
    SELECT 
        u.id AS userid, 
        u.birthdate, 
        u.sex, 
        bp.genrePreference, 
        bp.authorPreference
    FROM users u
    JOIN bookpreferences bp ON u.id = bp.userID
) u ON ubr.userid = u.userid
JOIN (
    -- Book Details Subquery
    SELECT 
        bk.id AS bookid, 
        bk.book,
        bk.description,
        bk.numPages AS num_page,
        bk.rating AS pre_rating,
        bk.publication,
        GROUP_CONCAT(DISTINCT ba.authorId ORDER BY ba.authorId ASC) AS authorid,
        GROUP_CONCAT(DISTINCT bg.genreId ORDER BY bg.genreId ASC) AS genreid
    FROM bookitems bk
    LEFT JOIN bookauthors ba ON ba.bookId = bk.id
    LEFT JOIN bookgenres bg ON bg.bookId = bk.id
    GROUP BY bk.id
) b ON ubr.bookid = b.bookid; """
    # incase connection is lost, reconnect
    connection.reconnect(attempts=3, delay=5)
    mydb = connection.cursor()
    mydb.execute(query)
    user_ratings = mydb.fetchall()
    return user_ratings


print()
save_to_csv(get_all_books_user_and_ratings(), '../User_book_Ratings.csv', ['user_id','birth_date','sex','genre_preference','author_preference','book_id','book_title','description','num_pages','pre_rating','publication','author_id','genre_id','user_rating'])

rating_rank = pd.read_csv('../User_book_Ratings.csv')





In [4]:
# all the ones I want to use
user_features = ['userid', 'birthdate', 'sex',
                 'genrePreference', 'authorPreference']
book_features = ['book_id', 'book_title', 'description',
                 'num_page', 'pre_rating', 'publication', 'authorids', 'genreids']
# the ones I'm ac
user_features = ['user_id', 'sex']
book_features = ['book_id', 'book_title', 'num_pages']


# convert dataframe to tf.data.Dataset
rating_rank = tf.data.Dataset.from_tensor_slices(dict(rating_rank))# LINE CAN ONLY BE USED 1NCE

# TODO: Add more features to the dataset
rating_rank = rating_rank.map(lambda x: {
    "user_id": x["user_id"],
    "sex": x['sex'],
    "book_id": x['book_id'],
    "book_title": x['book_title'],
    "num_pages": x['num_pages'],
    "user_rating": x["user_rating"],
}
)

tf.random.set_seed(42)
#######################################
# NUMBER TAKINGS NEEDS TO BE ADJUSTED #
#######################################
shuffled = rating_rank.shuffle(100_000, seed=42, reshuffle_each_iteration=False)
train = shuffled.take(80_000)
test = shuffled.skip(80_000).take(20_000)


#################################################################
# Tensor Shape doesn't look right book != TensorSpec(shape=(5,) #
#################################################################
train = sample_listwise_2(train, user_features, book_features,
                              num_list_per_user=50, num_examples_per_list=5, seed=42)

print(train.element_spec)



ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).