In [2]:
# To create deep learning models
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import pandas as pd
#from collections import deque

import numpy as np

print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")

#----------------------------------------------------------------------#

myModel = keras.Sequential([
      layers.Dense(64, activation='relu'),
      layers.Dense(64, activation='relu'),
      layers.Dense(64, activation='relu'),
      layers.Dense(64, activation='relu'),
      layers.Dense(64, activation='relu'),
      layers.Dense(64, activation='relu'),
      layers.Dense(1)
  ])

myModel.compile(loss='mean_absolute_error',
                optimizer=tf.keras.optimizers.Adam(0.001))

print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")

#----------------------------------------------------------------------#

# Load the 'Books and Book-Titles' 
df_books_and_titles = pd.read_csv('books.csv')
df_books_and_titles = df_books_and_titles[['book_id','title']]
print("\n\nSample of the 'Books and Book-Titles' dataframe\n")
print(df_books_and_titles.sample(5))

# Create a dictionary mapping 'book_id' to 'title'
di_book_title = {}
for i in range(len(df_books_and_titles)):
    bk_id = df_books_and_titles.loc[i,'book_id']
    title = df_books_and_titles.loc[i,'title']
    di_book_title[bk_id] = title
    
# Load the 'User-ID ratings of Book-ID'
df_user_book_ratings = pd.read_csv('ratings-books.csv')
print("\n\nSample of the 'User-ID ratings of Book-ID' dataframe\n")
print(df_user_book_ratings.sample(5))


print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")

#----------------------------------------------------------------------#

# Filter sparse books
min_book_ratings = 100
filter_books = (df_user_book_ratings['book_id'].value_counts()>min_book_ratings)
filter_books = filter_books[filter_books].index.tolist()

# Filter sparse users
min_user_ratings = 100
filter_users = (df_user_book_ratings['user_id'].value_counts()>min_user_ratings)
filter_users = filter_users[filter_users].index.tolist()

# Actual filtering
df_filtered = df_user_book_ratings[(df_user_book_ratings['book_id'].isin(filter_books)) & \
                                   (df_user_book_ratings['user_id'].isin(filter_users))]

del filter_books, filter_users, min_book_ratings, min_user_ratings
print('Shape User-Ratings unfiltered:\t{}'.format(df_user_book_ratings.shape))
print('Shape User-Ratings filtered:\t{}'.format(df_filtered.shape))

print("\n unique user_id counts:", len(df_filtered.groupby(['user_id']).count()))
print("\n unique book_id counts:", len(df_filtered.groupby(['book_id']).count()))

print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")

#----------------------------------------------------------------------#

# Testingsize
n = 100000

# Split train- & testset
df_train = df_filtered[:-n]
df_test = df_filtered[-n:]
print(df_train.shape, df_test.shape)
print(df_train.sample(25))

print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")

#----------------------------------------------------------------------#

# Create user and movie-id mapping to convert to numbers
user_id_mapping = {id:i for i, id in enumerate(df_filtered['user_id'].unique())}
#print(user_id_mapping) # user_id_mapping is a dictionary that simply re-enumerates userIDs to sequential numbers 0,1,2,3
book_id_mapping = {id:i for i, id in enumerate(df_filtered['book_id'].unique())}

print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")

#----------------------------------------------------------------------#

# use dataframe map function to map users & movies to mapped ids based on above mapping
train_user_data = df_train['user_id'].map(user_id_mapping)
train_book_data = df_train['book_id'].map(book_id_mapping)

print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")

#----------------------------------------------------------------------#

data_feed_into_ann = pd.DataFrame({"0":train_user_data,"1":train_book_data})
print(type(data_feed_into_ann))
print(data_feed_into_ann.head(25))
print(data_feed_into_ann.sample(25))
data_values = data_feed_into_ann.values
print(data_values)

print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")

#----------------------------------------------------------------------#

batch_size = 1024
epochs = 5
validation_split = 0.1

myModel.fit(data_values, df_train['rating'].values,
            batch_size=batch_size, 
            epochs=epochs,
            validation_split=validation_split,
            verbose=1)

print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")

#----------------------------------------------------------------------#

test_user_data = df_test['user_id'].map(user_id_mapping)
test_book_data = df_test['book_id'].map(book_id_mapping)

test_data_feed_into_ann = pd.DataFrame({"0":test_user_data,"1":test_book_data})
test_data_values = test_data_feed_into_ann.values

print(test_data_values)

print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")

#----------------------------------------------------------------------#

from sklearn.metrics import mean_squared_error

# Test model by making predictions on test data


y_pred = myModel.predict(test_data_values)
#y_pred = list(map(lambda x: 1.0 if x < 1 else 5.0 if x > 5.0 else x, y_pred))

# get true labels
y_true = df_test['rating'].values

#  Compute RMSE
rmse = np.sqrt(mean_squared_error(y_pred=y_pred, y_true=y_true))
print('\n\nTesting Result: {:.4f} RMSE'.format(rmse))

print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")

#----------------------------------------------------------------------#

#print("test_user_data stuff follows:", type(test_user_data))
#print(test_user_data)
#print(test_user_data.values)

#print("\n\ntest_book_data stuff follows:", type(test_book_data))
#print(test_book_data)
#print(test_book_data.values)

print(y_pred)

y_pred_temp = np.round(y_pred, 3)
#y_pred_temp1 = np.array(y_pred_temp)
y_pred_temp1 = [y_pred_temp[i][0] for i in range(len(y_pred_temp))]
#print("\n\ny_pred_temp1 stuff follows:", type(y_pred_temp1))
#print(y_pred_temp1)

results_df = pd.DataFrame({
    'User ID': test_user_data.values,
    'Book ID': test_book_data.values,
    'Predicted Rating': y_pred_temp1,
    'Actual Rating': y_true
})

print(results_df.head(20))
print(results_df.sample(25))



print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")

#----------------------------------------------------------------------#







- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


Sample of the 'Books and Book-Titles' dataframe

      book_id                                              title
771       772                                       The Republic
7949     7950  Hot, Flat, and Crowded: Why We Need a Green Re...
4565     4566         With the Old Breed: At Peleliu and Okinawa
7692     7693                                   How Google Works
6564     6565                             Where Things Come Back


Sample of the 'User-ID ratings of Book-ID' dataframe

         user_id  book_id  rating
2584865    33493     3464       4
407313      8305       48       4
3379423    40520     2666       4
3404641    38066     7025       2
2767751    35249      665       3

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Shape User-Ratings unfiltered:	(5976479, 3)
Shape User-Ratings filtered:	(4508993, 3)

 unique use