Load in Parameters & data

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, GRU, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import Adam

In [12]:
# Parameters
embedding_dim = 50    # Dimension for user/item embeddings
hidden_units = 128    # Number of units in LSTM/GRU layer
sequence_length = 10  # Length of interaction sequences
learning_rate = 0.001

df_children_books_final_encoded = pd.read_csv('df_children_books_final_encoded.csv')
df_interactions_final_merged = pd.read_csv('df_interactions_final_merged.csv')

  df_children_books_final_encoded = pd.read_csv('df_children_books_final_encoded.csv')


In [7]:
df_children_books_final_encoded.head()

Unnamed: 0,isbn,isbn13,book_id,title,authors,num_pages,description,is_ebook,popular_shelves,average_rating,...,lang_tha,lang_tur,lang_ukr,lang_vie,lang_vls,lang_yid,lang_zho,length_long,length_medium,length_short
0,1599150603,9781599150604,287141,The Aeneid for Boys and Girls,"[{'author_id': '3041852', 'role': ''}]",162.0,"Relates in vigorous prose the tale of Aeneas, ...",False,"[{'count': '56', 'name': 'to-read'}, {'count':...",4.13,...,0,0,0,0,0,0,0,0,1,0
1,1934876569,9781934876565,6066812,All's Fairy in Love and War (Avalon: Web of Ma...,"[{'author_id': '19158', 'role': ''}]",216.0,"To Kara's astonishment, she discovers that a p...",False,"[{'count': '515', 'name': 'to-read'}, {'count'...",4.22,...,0,0,0,0,0,0,0,0,1,0
2,590417010,9780590417013,89378,Dog Heaven,"[{'author_id': '5411', 'role': ''}]",40.0,In Newbery Medalist Cynthia Rylant's classic b...,False,"[{'count': '450', 'name': 'to-read'}, {'count'...",4.43,...,0,0,0,0,0,0,0,0,0,1
3,915190575,9780915190577,3209312,"Moths and Mothers, Feathers and Fathers: A Sto...","[{'author_id': '589328', 'role': ''}, {'author...",,,False,"[{'count': '8', 'name': 'to-read'}, {'count': ...",4.29,...,0,0,0,0,0,0,0,1,0,0
4,1416904999,9781416904991,1698376,What Do You Do?,"[{'author_id': '169159', 'role': ''}]",24.0,WHAT DO YOU DO?\nA hen lays eggs...\nA cow giv...,False,"[{'count': '8', 'name': 'to-read'}, {'count': ...",3.57,...,0,0,0,0,0,0,0,0,0,1


In [8]:
df_interactions_final_merged.head()

Unnamed: 0,user_id,book_id,review_id,rating,review_text_incomplete,date_added,n_votes,review_age,processed_review,sentiment
0,8842281e1d1347389f2ab93d60773d4d,23310161,f4b4b050f4be00e9283c92a814af2670,4,Fun sequel to the original.,2015-11-17 19:37:35+00:00,7.0,3247,fun sequel original,0.6808
1,8842281e1d1347389f2ab93d60773d4d,18296097,bc9cff98f54be3b2b8c5b34598a7546c,5,,2015-09-21 15:16:57+00:00,,3304,,0.574139
2,8842281e1d1347389f2ab93d60773d4d,817720,75fd46041466ceb406b7fd69b089b9c5,5,,2015-05-21 04:29:23+00:00,,3428,,0.574139
3,8842281e1d1347389f2ab93d60773d4d,502362,be1ad51fa3d519e39050d2a61ffab534,5,,2015-04-01 03:00:12+00:00,,3478,,0.574139
4,8842281e1d1347389f2ab93d60773d4d,1969280,5809d5592ee32745e048a9c67ac27100,5,,2014-11-08 16:56:58+00:00,,3621,,0.574139


In [14]:
book_to_index = {book_id: i for i, book_id in enumerate(df_children_books_final_encoded['book_id'].unique())}
num_items = len(book_to_index)

user_to_index = {user_id: i for i, user_id in enumerate(df_interactions_final_merged['user_id'].unique())}
num_users = len(user_to_index)

# Map book IDs in interactions to indices
df_interactions_final_merged['book_index'] = df_interactions_final_merged['book_id'].map(book_to_index)



In [10]:
# Sort interactions by user and timestamp to maintain chronological order
user_book_interactions = df_interactions_final_merged.sort_values(by=['user_id', 'date_added'])

# Create sequences of interactions for each user
user_sequences = user_book_interactions.groupby('user_id')['book_index'].apply(list).tolist()

In [None]:
# Labels (Assume binary classification for simplicity)
# 1 if the user interacts with the item, else 0
labels = np.random.randint(2, size=num_users)

# Model Definition
model = Sequential([
    # Embedding layer for item embeddings
    Embedding(input_dim=num_items + 1, output_dim=embedding_dim, input_length=sequence_length),
    # Recurrent layer (choose LSTM or GRU)
    LSTM(hidden_units, return_sequences=False),
    # Fully connected layer to output probability of interaction
    Dense(1, activation='sigmoid')
])

# Compile Model
model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])

# Training the model
model.fit(user_sequences, labels, epochs=10, batch_size=32, validation_split=0.2)

# Example recommendation for a user
def recommend(user_history, top_n=5):
    # Pad user history to match the sequence length expected by the model
    user_history_padded = pad_sequences([user_history], maxlen=sequence_length, padding='pre')
    # Predict interaction probabilities for each item
    scores = model.predict(user_history_padded)[0]
    # Rank items by their scores and get top N items
    recommended_items = np.argsort(scores)[-top_n:][::-1]
    return recommended_items



2024-11-06 19:55:14.974824: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
# Example usage
user_history = [np.random.randint(1, num_items) for _ in range(sequence_length)]
recommended_items = recommend(user_history)
print("Recommended items:", recommended_items)