Load in Parameters & data

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, GRU, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import Adam

In [11]:
# Parameters
embedding_dim = 50    # Dimension for user/item embeddings
hidden_units = 128    # Number of units in LSTM/GRU layer
sequence_length = 10  # Length of interaction sequences
learning_rate = 0.001

df_children_books_final_encoded = pd.read_csv('df_children_books_final_encoded.csv')
df_interactions_final_merged = pd.read_csv('df_interactions_final_merged.csv')[:100000]

  df_children_books_final_encoded = pd.read_csv('df_children_books_final_encoded.csv')


In [None]:
df_children_books_final_encoded.head()

In [12]:
df_interactions_final_merged.head()

Unnamed: 0,user_id,book_id,review_id,rating,review_text_incomplete,date_added,n_votes,review_age,processed_review,sentiment
0,8842281e1d1347389f2ab93d60773d4d,23310161,f4b4b050f4be00e9283c92a814af2670,4,Fun sequel to the original.,2015-11-17 19:37:35+00:00,7.0,3247,fun sequel original,0.6808
1,8842281e1d1347389f2ab93d60773d4d,18296097,bc9cff98f54be3b2b8c5b34598a7546c,5,,2015-09-21 15:16:57+00:00,,3304,,0.574139
2,8842281e1d1347389f2ab93d60773d4d,817720,75fd46041466ceb406b7fd69b089b9c5,5,,2015-05-21 04:29:23+00:00,,3428,,0.574139
3,8842281e1d1347389f2ab93d60773d4d,502362,be1ad51fa3d519e39050d2a61ffab534,5,,2015-04-01 03:00:12+00:00,,3478,,0.574139
4,8842281e1d1347389f2ab93d60773d4d,1969280,5809d5592ee32745e048a9c67ac27100,5,,2014-11-08 16:56:58+00:00,,3621,,0.574139


In [4]:
book_to_index = {book_id: i for i, book_id in enumerate(df_children_books_final_encoded['book_id'].unique())}
num_books = len(book_to_index)

user_to_index = {user_id: i for i, user_id in enumerate(df_interactions_final_merged['user_id'].unique())}
num_users = len(user_to_index)

# Map book IDs in interactions to indices
df_interactions_final_merged['book_index'] = df_interactions_final_merged['book_id'].map(book_to_index)

In [5]:
# Sort interactions by user and timestamp to maintain chronological order
user_book_interactions = df_interactions_final_merged.sort_values(by=['user_id', 'date_added'])

# Create sequences of interactions for each user
user_sequences = user_book_interactions.groupby('user_id')['book_index'].apply(list).tolist()

In [10]:
print(num_books)

111158


In [8]:
# Labels (Assume binary classification for simplicity)
# 1 if the user interacts with the item, else 0
labels = np.random.randint(2, size=num_users)

# Model Definition
model = Sequential([
    # Embedding layer for item embeddings
    Embedding(input_dim=num_books + 1, output_dim=embedding_dim, input_length=sequence_length),
    # Recurrent layer (LSTM)
    LSTM(hidden_units, return_sequences=False),
    # FC layer to output probability of interaction
    Dense(1, activation='sigmoid')
])

padded_sequences = pad_sequences(user_sequences, padding='post', dtype='float32')

# Step 2: Convert labels to numpy array if they are in a list
labels = np.array(labels)
model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['AUC'])
model.fit(padded_sequences, labels, epochs=10, batch_size=32, validation_split=0.2)




# top_5_recommendations for user
def recommend(user_history, top_n=5):
    # Pad user history to match the sequence length expected by the model
    user_history_padded = pad_sequences([user_history], maxlen=sequence_length, padding='pre')
    # Predict interaction probabilities for each item
    scores = model.predict(user_history_padded)[0]
    # Rank items by their scores and get top 5 items
    recommended_items = np.argsort(scores)[-top_n:][::-1]
    return recommended_items



Epoch 1/10


ValueError: in user code:

    File "/Users/seanpbteo/opt/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1051, in train_function  *
        return step_function(self, iterator)
    File "/Users/seanpbteo/opt/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1040, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/seanpbteo/opt/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1030, in run_step  **
        outputs = model.train_step(data)
    File "/Users/seanpbteo/opt/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 889, in train_step
        y_pred = self(x, training=True)
    File "/Users/seanpbteo/opt/anaconda3/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/Users/seanpbteo/opt/anaconda3/lib/python3.9/site-packages/keras/engine/input_spec.py", line 264, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" is '

    ValueError: Input 0 of layer "sequential_2" is incompatible with the layer: expected shape=(None, 10), found shape=(None, 1634)


In [None]:
user_history = [np.random.randint(1, num_books) for _ in range(sequence_length)]
recommended_items = recommend(user_history)
print("Recommended items:", recommended_items)