In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers
from tensorflow import keras

2024-06-17 20:59:40.585898: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-06-17 20:59:41.441260: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/regy/miniconda3/envs/tf/lib/
2024-06-17 20:59:41.441378: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/regy/miniconda3/envs/tf/lib/


In [3]:
# Load data
book_df = pd.read_csv('books_data_clean_with_id.csv')
rating_df = pd.read_csv('books_rating_clean_with_book_id.csv')
merged_df = pd.merge(rating_df, book_df, left_on='book_id', right_on='id')

# Map user ID to a "user vector" via an embedding matrix
user_ids = merged_df["user_id"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
userencoded2user = {i: x for i, x in enumerate(user_ids)}

# Map books ID to a "books vector" via an embedding matrix
book_ids = merged_df["book_id"].unique().tolist()
book2book_encoded = {x: i for i, x in enumerate(book_ids)}
book_encoded2book = {i: x for i, x in enumerate(book_ids)}

merged_df["user"] = merged_df["user_id"].map(user2user_encoded)
merged_df["book"] = merged_df["book_id"].map(book2book_encoded)

num_users = len(user2user_encoded)
num_books = len(book_encoded2book)
merged_df['rating'] = merged_df['review/score'].values.astype(np.float32)

# min and max ratings will be used to normalize the ratings later
min_rating = min(merged_df["review/score"])
max_rating = max(merged_df["review/score"])

print(f"Number of users: {num_users}, Number of books: {num_books}, Min Rating: {min_rating}, Max Rating: {max_rating}")

# Shuffle data
merged_df = merged_df.sample(frac=1, random_state=42)
x = merged_df[["user", "book"]].values

# Normalizing the targets between 0 and 1. Makes it easy to train.
y = merged_df["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values

# Split data into training and validation sets
train_indices = int(0.9 * merged_df.shape[0])
x_train, x_val, y_train, y_val = (
    x[:train_indices],
    x[train_indices:],
    y[:train_indices],
    y[train_indices:],
)

EMBEDDING_SIZE = 64

@keras.utils.register_keras_serializable(package="RecommenderPackage")
class RecommenderNet(keras.Model):
    def __init__(self, num_users, num_books, embedding_size, dropout_rate=0.2, **kwargs):
        super(RecommenderNet, self).__init__(**kwargs)
        self.num_users = num_users
        self.num_books = num_books
        self.embedding_size = embedding_size
        self.dropout_rate = dropout_rate
        
        self.user_embedding = layers.Embedding(
            num_users,
            embedding_size,
            embeddings_initializer="he_normal",
            embeddings_regularizer=keras.regularizers.l2(1e-6),
        )
        self.user_bias = layers.Embedding(num_users, 1)
        self.book_embedding = layers.Embedding(
            num_books,
            embedding_size,
            embeddings_initializer="he_normal",
            embeddings_regularizer=keras.regularizers.l2(1e-6)
        )
        self.book_bias = layers.Embedding(num_books, 1)
        
        self.dropout = layers.Dropout(dropout_rate)
        self.batch_norm = layers.BatchNormalization()
    
    @tf.function 
    def call(self, inputs):
        user_vector = self.user_embedding(inputs[:, 0])
        user_bias = self.user_bias(inputs[:, 0])
        book_vector = self.book_embedding(inputs[:, 1])
        book_bias = self.book_bias(inputs[:, 1])
        dot_user_book = tf.tensordot(user_vector, book_vector, 2)
        # Add all the components (including bias)
        x = dot_user_book + user_bias + book_bias
        
        x = self.dropout(x)
        x = self.batch_norm(x)
        
        # The sigmoid activation forces the rating to be between 0 and 11
        return tf.nn.sigmoid(x)
    
    def get_config(self):
        config = super().get_config()
        config.update({
            "num_users": self.num_users,
            "num_books": self.num_books,
            "embedding_size": self.embedding_size,
            "dropout_rate": self.dropout_rate,
        })
        return config

    @classmethod
    def from_config(cls, config):
        return cls(**config)

model = RecommenderNet(num_users, num_books, EMBEDDING_SIZE)

# Load the previously saved model
# model = tf.keras.models.load_model('Colab_User', custom_objects={'RecommenderNet': RecommenderNet(num_users, num_books, EMBEDDING_SIZE)})

# Compile the loaded model


Number of users: 138933, Number of books: 9909, Min Rating: 1.0, Max Rating: 5.0


2024-06-17 21:00:30.766584: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-17 21:00:30.896023: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-17 21:00:30.896162: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-17 21:00:30.897642: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, 

In [6]:
model.load_weights('Colab_User.h5')

ValueError: Unable to load weights saved in HDF5 format into a subclassed Model which has not created its variables yet. Call the Model first, then load the weights.

In [5]:
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    metrics=['mse', 'accuracy']
)

In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers
from tensorflow import keras

# Load data
book_df = pd.read_csv('books_data_clean_with_id.csv')
rating_df = pd.read_csv('books_rating_clean_with_book_id.csv')
merged_df = pd.merge(rating_df, book_df, left_on='book_id', right_on='id')

# Map user ID to a "user vector" via an embedding matrix
user_ids = merged_df["user_id"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
userencoded2user = {i: x for i, x in enumerate(user_ids)}

# Map books ID to a "books vector" via an embedding matrix
book_ids = merged_df["book_id"].unique().tolist()
book2book_encoded = {x: i for i, x in enumerate(book_ids)}
book_encoded2book = {i: x for i, x in enumerate(book_ids)}

merged_df["user"] = merged_df["user_id"].map(user2user_encoded)
merged_df["book"] = merged_df["book_id"].map(book2book_encoded)

num_users = len(user2user_encoded)
num_books = len(book_encoded2book)
merged_df['rating'] = merged_df['review/score'].values.astype(np.float32)

# min and max ratings will be used to normalize the ratings later
min_rating = min(merged_df["review/score"])
max_rating = max(merged_df["review/score"])

print(f"Number of users: {num_users}, Number of books: {num_books}, Min Rating: {min_rating}, Max Rating: {max_rating}")

# Shuffle data
merged_df = merged_df.sample(frac=1, random_state=42)
x = merged_df[["user", "book"]].values

# Normalizing the targets between 0 and 1. Makes it easy to train.
y = merged_df["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values

# Split data into training and validation sets
train_indices = int(0.9 * merged_df.shape[0])
x_train, x_val, y_train, y_val = (
    x[:train_indices],
    x[train_indices:],
    y[:train_indices],
    y[train_indices:],
)

EMBEDDING_SIZE = 64

@keras.utils.register_keras_serializable(package="RecommenderPackage")
class RecommenderNet(keras.Model):
    def __init__(self, num_users, num_books, embedding_size, dropout_rate=0.2, **kwargs):
        super(RecommenderNet, self).__init__(**kwargs)
        self.num_users = num_users
        self.num_books = num_books
        self.embedding_size = embedding_size
        self.dropout_rate = dropout_rate
        
        self.user_embedding = layers.Embedding(
            num_users,
            embedding_size,
            embeddings_initializer="he_normal",
            embeddings_regularizer=keras.regularizers.l2(1e-6),
        )
        self.user_bias = layers.Embedding(num_users, 1)
        self.book_embedding = layers.Embedding(
            num_books,
            embedding_size,
            embeddings_initializer="he_normal",
            embeddings_regularizer=keras.regularizers.l2(1e-6)
        )
        self.book_bias = layers.Embedding(num_books, 1)
        
        self.dropout = layers.Dropout(dropout_rate)
        self.batch_norm = layers.BatchNormalization()
    
    @tf.function 
    def call(self, inputs):
        user_vector = self.user_embedding(inputs[:, 0])
        user_bias = self.user_bias(inputs[:, 0])
        book_vector = self.book_embedding(inputs[:, 1])
        book_bias = self.book_bias(inputs[:, 1])
        dot_user_book = tf.tensordot(user_vector, book_vector, 2)
        # Add all the components (including bias)
        x = dot_user_book + user_bias + book_bias
        
        x = self.dropout(x)
        x = self.batch_norm(x)
        
        # The sigmoid activation forces the rating to be between 0 and 11
        return tf.nn.sigmoid(x)
    
    def get_config(self):
        config = super().get_config()
        config.update({
            "num_users": self.num_users,
            "num_books": self.num_books,
            "embedding_size": self.embedding_size,
            "dropout_rate": self.dropout_rate,
        })
        return config

    @classmethod
    def from_config(cls, config):
        return cls(**config)

# Initialize the model
model = RecommenderNet(num_users, num_books, EMBEDDING_SIZE)

# Load the weights
model.load_weights('Colab_User.h5')

# Compile the model
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    metrics=['mse', 'accuracy']
)

# Now the model is ready for prediction or further training


2024-06-17 21:03:18.665369: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-06-17 21:03:19.707648: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/regy/miniconda3/envs/tf/lib/
2024-06-17 21:03:19.707819: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/regy/miniconda3/envs/tf/lib/


Number of users: 138933, Number of books: 9909, Min Rating: 1.0, Max Rating: 5.0


2024-06-17 21:03:22.833841: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-17 21:03:22.862081: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-17 21:03:22.862165: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-17 21:03:22.862857: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, 

ValueError: Unable to load weights saved in HDF5 format into a subclassed Model which has not created its variables yet. Call the Model first, then load the weights.

2024-06-17 21:05:43.756170: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-06-17 21:05:44.696791: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/regy/miniconda3/envs/tf/lib/
2024-06-17 21:05:44.696944: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/regy/miniconda3/envs/tf/lib/


Number of users: 138933, Number of books: 9909, Min Rating: 1.0, Max Rating: 5.0


2024-06-17 21:05:47.898112: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-17 21:05:47.930037: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-17 21:05:47.930108: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-17 21:05:47.931019: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, 