# Recurrent Neural Network (RNN)

## Import Packages

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, GRU, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import Adam

## Load Data

In [None]:
# Parameters
embedding_dim = 50    # Dimension for user/item embeddings
hidden_units = 128    # Number of units in LSTM/GRU layer
sequence_length = 10  # Length of interaction sequences
learning_rate = 0.001

df_children_books_final_encoded = pd.read_csv('df_books_final.csv')
df_interactions_train = pd.read_csv('train_interactions.csv')
df_interactions_test = pd.read_csv('test_interactions.csv')

In [3]:
df_children_books_final_encoded.head()

Unnamed: 0,isbn,isbn13,book_id,title,authors,num_pages,language_code,description,format,is_ebook,popular_shelves,average_rating,ratings_count,text_reviews_count,isbn_combined,book_length,top_popular_shelves,author_ids,popular_shelf_names,shelf_names
0,1599150603,9781599150604,287141,The Aeneid for Boys and Girls,"[{'author_id': '3041852', 'role': ''}]",162.0,,"Relates in vigorous prose the tale of Aeneas, ...",Paperback,False,"[{'count': '56', 'name': 'to-read'}, {'count':...",4.13,46,7,1599150603,medium,"[{'count': '4', 'name': 'history'}, {'count': ...",['3041852'],[],"['history', 'classics', 'level-4to5']"
1,1934876569,9781934876565,6066812,All's Fairy in Love and War (Avalon: Web of Ma...,"[{'author_id': '19158', 'role': ''}]",216.0,,"To Kara's astonishment, she discovers that a p...",Paperback,False,"[{'count': '515', 'name': 'to-read'}, {'count'...",4.22,98,6,1934876569,medium,"[{'count': '9', 'name': 'favorites'}, {'count'...",['19158'],[],"['favorites', 'magic', 'avalon']"
2,590417010,9780590417013,89378,Dog Heaven,"[{'author_id': '5411', 'role': ''}]",40.0,eng,In Newbery Medalist Cynthia Rylant's classic b...,Hardcover,False,"[{'count': '450', 'name': 'to-read'}, {'count'...",4.43,1331,193,590417010,short,"[{'count': '8', 'name': 'pets'}, {'count': '8'...",['5411'],[],"['pets', 'children-s-lit', 'fiction']"
3,915190575,9780915190577,3209312,"Moths and Mothers, Feathers and Fathers: A Sto...","[{'author_id': '589328', 'role': ''}, {'author...",,,,,False,"[{'count': '8', 'name': 'to-read'}, {'count': ...",4.29,11,4,915190575,long,"[{'count': '1', 'name': 'kids-bookshelf'}, {'c...","['589328', '2608283']",[],"['kids-bookshelf', 'school', 'books-to-buy-loo..."
4,1416904999,9781416904991,1698376,What Do You Do?,"[{'author_id': '169159', 'role': ''}]",24.0,,WHAT DO YOU DO?\nA hen lays eggs...\nA cow giv...,Board Book,False,"[{'count': '8', 'name': 'to-read'}, {'count': ...",3.57,23,4,1416904999,short,"[{'count': '4', 'name': 'board-books'}, {'coun...",['169159'],[],"['board-books', 'animals', 'board-book']"


In [8]:
df_interactions_train.head()

Unnamed: 0,user_id,book_id,rating,n_votes,review_age,sentiment,book_index
0,1,39343,2,0.0,3434,0.8511,
1,1,21648,0,,3435,0.406046,
2,2,7794,4,0.0,3046,-0.4215,107090.0
3,3,8637,3,,3343,0.40686,
4,3,50719,4,,3367,0.527973,


## Encode Book & User IDs

In [None]:
book_to_index = {book_id: i for i, book_id in enumerate(df_children_books_final_encoded['book_id'].unique())}
num_books = len(book_to_index)

user_to_index = {user_id: i for i, user_id in enumerate(df_interactions_train['user_id'].unique())}
num_users = len(user_to_index)

# Map book IDs in interactions to indices
df_interactions_train['book_index'] = df_interactions_train['book_id'].map(book_to_index)
df_interactions_test['book_index'] = df_interactions_test['book_id'].map(book_to_index)

# Sort interactions by user and timestamp to maintain chronological order
user_book_interactions_train = df_interactions_train.sort_values(by=['user_id', 'review_age'])
user_book_interactions_test = df_interactions_test.sort_values(by=['user_id', 'review_age'])

In [None]:
user_book_interactions_train.fillna(0, inplace=True)
user_book_interactions_test.fillna(0, inplace=True)

In [None]:
user_sequences_train = user_book_interactions_train.groupby('user_id')['book_index'].apply(list).tolist()

## Model Training

In [None]:
# Labels 
# 1 if the user interacts with the item, else 0
labels = np.random.randint(2, size=num_users)
sequence_length = max(len(seq) for seq in user_sequences_train)
# Model Definition
model = Sequential([
    # Embedding layer for item embeddings
    Embedding(input_dim=num_books + 1, output_dim=embedding_dim, input_length=sequence_length),
    # Recurrent layer (LSTM)
    LSTM(hidden_units, return_sequences=False),
    # FC layer to output probability of interaction
    Dense(1, activation='sigmoid')
])

padded_sequences_train = pad_sequences(user_sequences_train, padding='post', dtype='int32', maxlen=sequence_length)

#Convert labels to numpy array if they are in a list
labels = np.array(labels)
model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['AUC'])
model.fit(padded_sequences_train, labels, epochs=5, batch_size=32, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


## Recommend Books

In [None]:
# top_5_recommendations for user
def recommend(user_history, top_n=5):
    # Pad user history to match the sequence length expected by the model
    user_history_padded = pad_sequences([user_history], maxlen=sequence_length, padding='pre')
    # Predict interaction probabilities for each item
    scores = model.predict(user_history_padded)[0]
    # Rank items by their scores and get top 5 items
    recommended_items = np.argsort(scores)[-top_n:][::-1]
    return recommended_items

In [20]:
user_sequences_test = user_book_interactions_test.groupby('user_id')['book_index'].apply(list).tolist()
print(user_sequences_test)

[[0.0], [0.0], [0.0, 0.0], [0.0, 0.0, 0.0], [0.0], [0.0, 0.0, 0.0], [0.0, 0.0], [0.0], [0.0], [0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0], [0.0, 0.0, 0.0], [0.0], [0.0], [0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5644.0, 0.0, 0.0, 0.0, 40837.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 62072.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0], [0.0, 0.0, 0.0], [0.0], [0.0, 0.0, 0.0], [0.0, 0.0], [0.0], [0.0, 0.0], [0.0], [0.0, 0.0], [0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.

In [25]:
user_history = [np.random.randint(1, num_books) for _ in range(sequence_length)]
recommended_items = recommend(user_history)
print("Recommended items:", recommended_items)

Recommended items: [0]


## Remarks

Epoch 1/5<br>
385/385 [==============================] - 1648s 4s/step - loss: 0.6934 - auc: 0.4980 - val_loss: 0.6936 - val_auc: 0.5000<br>
Epoch 2/5<br>
385/385 [==============================] - 3329s 9s/step - loss: 0.6933 - auc: 0.4957 - val_loss: 0.6934 - val_auc: 0.5000<br>
Epoch 3/5<br>
385/385 [==============================] - 4784s 12s/step - loss: 0.6932 - auc: 0.4905 - val_loss: 0.6932 - val_auc: 0.5000<br>
Epoch 4/5<br>
385/385 [==============================] - 2690s 7s/step - loss: 0.6932 - auc: 0.4941 - val_loss: 0.6932 - val_auc: 0.5000<br>
Epoch 5/5<br>
385/385 [==============================] - 1852s 5s/step - loss: 0.6933 - auc: 0.4908 - val_loss: 0.6933 - val_auc: 0.5000<br>


We chose not to proceed with this model as we were getting very low accuracies from the training set and each iteration of model training takes a few hours as we lack computation capacity.