In [42]:
# To create deep learning models
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import pandas as pd
#from collections import deque

import numpy as np

from datetime import datetime

from sklearn.utils import shuffle


def PrintCurrentTime():
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    print("\n- - - - - - - - - - - - - - -")
    print("| Current Time = ", current_time," |")
    print("- - - - - - - - - - - - - - -")
    
#----------------------------------------------------------------------#
PrintCurrentTime()

myModel = keras.Sequential([
      layers.Dense(128, activation='relu'),
      layers.Dense(32, activation='relu'),
      layers.Dense(5,activation='softmax')
  ])

myModel.compile(loss='categorical_crossentropy',
                optimizer=tf.keras.optimizers.Adam(0.001),
                metrics=['accuracy'])

print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")

#----------------------------------------------------------------------#

# Load the 'Books and Book-Titles' 
df_books_and_titles = pd.read_csv('dataset\\books.csv')
df_books_and_titles = df_books_and_titles[['book_id','title']]
print("\n\nSample of the 'Books and Book-Titles' dataframe\n")
print(df_books_and_titles.sample(5))

# Create a dictionary mapping 'book_id' to 'title'
di_book_title = {}
for i in range(len(df_books_and_titles)):
    bk_id = df_books_and_titles.loc[i,'book_id']
    title = df_books_and_titles.loc[i,'title']
    di_book_title[bk_id] = title

rev_di_book_title = {value : key for (key, value) in di_book_title.items()} # 'title' to 'book_id'

# Load the 'User-ID ratings of Book-ID'
df_user_book_ratings = pd.read_csv('dataset\\ratings-books.csv')
df_user_book_ratings = shuffle(df_user_book_ratings)

print("\n\nSample of the 'User-ID ratings of Book-ID' dataframe\n")
print(df_user_book_ratings.sample(5))


print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")

#----------------------------------------------------------------------#

# Filter sparse books
min_book_ratings = 100
filter_books = (df_user_book_ratings['book_id'].value_counts()>min_book_ratings)
filter_books = filter_books[filter_books].index.tolist()

# Filter sparse users
min_user_ratings = 100
filter_users = (df_user_book_ratings['user_id'].value_counts()>min_user_ratings)
filter_users = filter_users[filter_users].index.tolist()

# Actual filtering
df_filtered = df_user_book_ratings[(df_user_book_ratings['book_id'].isin(filter_books)) & \
                                   (df_user_book_ratings['user_id'].isin(filter_users))]

del filter_books, filter_users, min_book_ratings, min_user_ratings
print('Shape User-Ratings unfiltered:\t{}'.format(df_user_book_ratings.shape))
print('Shape User-Ratings filtered:\t{}'.format(df_filtered.shape))

print("\n unique user_id counts:", len(df_filtered.groupby(['user_id']).count()))
print("\n unique book_id counts:", len(df_filtered.groupby(['book_id']).count()))

print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")

#----------------------------------------------------------------------#

# Testingsize
n = 100000

# Split train- & testset
df_train = df_filtered[:-n]
df_test = df_filtered[-n:]
print("df_train shape = ",df_train.shape, "df_test shape = ",df_test.shape)
print("sample of df_train follows:")
print(df_train.sample(5))

print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")

#----------------------------------------------------------------------#

# Create user and movie-id mapping to convert to numbers
user_id_mapping = {id:i for i, id in enumerate(df_filtered['user_id'].unique())}
#print(user_id_mapping) # user_id_mapping is a dictionary that simply re-enumerates userIDs to sequential numbers 0,1,2,3
book_id_mapping = {id:i for i, id in enumerate(df_filtered['book_id'].unique())}

# get the reverse of these mappings
rev_user_id_mapping = {value : key for (key, value) in user_id_mapping.items()}
rev_book_id_mapping = {value : key for (key, value) in book_id_mapping.items()}

book_names = [name for idx,name in di_book_title.items()]
book_names.sort()

print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")

#----------------------------------------------------------------------#

# use dataframe map function to map users & movies to mapped ids based on above mapping
train_user_data = df_train['user_id'].map(user_id_mapping)
train_book_data = df_train['book_id'].map(book_id_mapping)
#print("\ntype(train_user_data) = ",type(train_user_data))

print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")

#----------------------------------------------------------------------#

data_feed_into_ann = pd.DataFrame({"user_id":train_user_data,"book_id":train_book_data})
#print("\ntype(data_feed_into_ann) = ",type(data_feed_into_ann))
#print("sample of data_feed_into_ann follows:")
#print(data_feed_into_ann.sample(10))

data_feed_into_ann_values = data_feed_into_ann.values
print("\nThe 'Two Features' that will be fed into the ANN (prior to scaling) follows:\n")
print(data_feed_into_ann_values)

#----------------------------------------------------------------------#

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
data_feed_into_ann_values_scaled = sc.fit_transform(data_feed_into_ann_values)
#print("\ntype(data_feed_into_ann_values_scaled) = ",type(data_feed_into_ann_values_scaled))
print("\nThe 'Two Features' that will be fed into the ANN (after scaling) follows:\n")
print(data_feed_into_ann_values_scaled)

print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")

#----------------------------------------------------------------------#

PrintCurrentTime()
print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")








- - - - - - - - - - - - - - -
| Current Time =  13:40:13  |
- - - - - - - - - - - - - - -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


Sample of the 'Books and Book-Titles' dataframe

      book_id                                              title
849       850  Mr. Penumbra's 24-Hour Bookstore (Mr. Penumbra...
8401     8402               Archangel's Blade (Guild Hunter, #4)
4716     4717  The Shunning (The Heritage of Lancaster County...
4455     4456                      Fallen in Love (Fallen, #3.5)
3938     3939                                     History of Art


Sample of the 'User-ID ratings of Book-ID' dataframe

         user_id  book_id  rating
1099208    16770     4994       5
796026     12991       53       4
4415989    32567     3138       3
2472504     7042       48       5
3342606    28007      172       4

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Shape User-Ratings unfiltered:	(5976479, 3)
Shape User-Ratings filtered:	(

In [43]:
print(type(book_names))
print(book_names[0:50])
print(book_names[1000:1050])

<class 'list'>
[' Angels (Walsh Family, #3)', '"حكايات فرغلي المستكاوي "حكايتى مع كفر السحلاوية', '#GIRLBOSS', "'Salem's Lot", "'Salem's Lot", "'Tis (Frank McCourt, #2)", '1,000 Places to See Before You Die', '1/4 جرام', '10% Happier: How I Tamed the Voice in My Head, Reduced Stress Without Losing My Edge, and Found Self-Help That Actually Works', '100 Bullets, Vol. 1: First Shot, Last Call', '100 Love Sonnets', '100 Selected Poems', "10th Anniversary (Women's Murder Club, #10)", '11 Birthdays (Willow Falls, #1)', '11/22/63', "11th Hour (Women's Murder Club, #11)", "12th of Never (Women's Murder Club, #12)", '13 Gifts (Willow Falls, #3)', '13 Hours: The Inside Account of What Really Happened In Benghazi', '13 Little Blue Envelopes (Little Blue Envelope, #1)', '14', '1421: The Year China Discovered America', '1491: New Revelations of the Americas Before Columbus', '1493: Uncovering the New World Columbus Created', '14th Deadly Sin (Women’s Murder Club, #14)', "15th Affair (Women's Murde

In [44]:

PrintCurrentTime()

#"""
# One Hot Encoding the "rating" column
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [2])], remainder='passthrough')
x1 = ct.fit_transform(df_train)
#print("\ntype(x1) = ",type(x1),"shape= ",x1.shape,"x1 is df_train after OneHotEncoder for rating column")

#x2 = np.array(x1)
#print("\ntype(x2) = ",type(x2),"shape= ",x1.shape)
#print(x2[:15])
x2 = x1.astype(np.int32)
#print("\ntype(x2) = ",type(x2),"shape= ",x1.shape, "x2 is x1 just converting to integer types")
#print("First 5 columns are 'rating', then user_id, then book_id")
#print(x2[:15])

x_hotencode = x2[:,0:5]
#print("\ntype(x_hotencode) = ",type(x_hotencode),"shape= ",x1.shape)
print("\ny_train after onehotencoding of the 'ratings' column (first 15 rows are displayed)\n")
print(x_hotencode[:15])



PrintCurrentTime()



- - - - - - - - - - - - - - -
| Current Time =  13:40:29  |
- - - - - - - - - - - - - - -

y_train after onehotencoding of the 'ratings' column (first 15 rows are displayed)

[[0 0 0 1 0]
 [0 0 1 0 0]
 [0 0 0 0 1]
 [0 0 0 1 0]
 [0 0 0 0 1]
 [0 0 1 0 0]
 [0 0 1 0 0]
 [0 0 0 1 0]
 [0 0 0 1 0]
 [0 0 0 1 0]
 [0 1 0 0 0]
 [0 0 0 1 0]
 [0 0 0 1 0]
 [0 0 0 0 1]
 [0 0 0 0 1]]

- - - - - - - - - - - - - - -
| Current Time =  13:40:29  |
- - - - - - - - - - - - - - -


In [45]:
#----------------------------------------------------------------------#
PrintCurrentTime()

batch_size = 1024
epochs = 5
validation_split = 0.1

myModel.fit(data_feed_into_ann_values_scaled, x_hotencode,
            batch_size=batch_size, 
            epochs=epochs,
            validation_split=validation_split,
            verbose=1)

print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")
PrintCurrentTime()



- - - - - - - - - - - - - - -
| Current Time =  13:40:30  |
- - - - - - - - - - - - - - -
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

- - - - - - - - - - - - - - -
| Current Time =  13:40:58  |
- - - - - - - - - - - - - - -


In [46]:
import pickle
#pickle.dump(myModel, open("MyPicklefile", 'wb'))
# Get error 'cannot pickle '_thread.RLock' object'

#loaded_model = pickle.load(open(MyPicklefile, 'rb'))

# Use keras to save the model to folder 'SavedModels'
# It created file 'saved_model.pb' and also two subfolders 'assets' and 'variables'
myModel.save("SavedModels")
#sc.save("my_sc.p")
pickle.dump(sc, open("my_sc.p", 'wb'))
pickle.dump(book_names, open("my_book_names.p", 'wb'))
pickle.dump(rev_di_book_title, open("my_rev_di_book_title.p", 'wb'))
pickle.dump(book_id_mapping, open("my_book_id_mapping.p", 'wb'))
pickle.dump(user_id_mapping, open("my_user_id_mapping.p", 'wb'))

#myModel = "hey-hey"  # just set it to some dummie value

#myModel = keras.models.load_model("SavedModels")  #WORKS!!!

INFO:tensorflow:Assets written to: SavedModels\assets


In [47]:
#----------------------------------------------------------------------#
# Now repeat exactly what we did to prepare the data (i.e. the train data) 
# that gets fed into the ANN, but now do it for the test data

test_user_data = df_test['user_id'].map(user_id_mapping)
test_book_data = df_test['book_id'].map(book_id_mapping)

test_data_feed_into_ann = pd.DataFrame({"user_id":test_user_data,"book_id":test_book_data})
test_data_feed_into_ann_values = test_data_feed_into_ann.values

test_data_feed_into_ann_values_scaled = sc.transform(test_data_feed_into_ann_values)

print("test data that will get fed into the 'predict' method")
print("first column is the scaled 'user_id', while second column is scaled 'book_id'")
print(test_data_feed_into_ann_values_scaled)

print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")
PrintCurrentTime()

test data that will get fed into the 'predict' method
first column is the scaled 'user_id', while second column is scaled 'book_id'
[[ 1.64972196  0.63688379]
 [-0.46396415  2.32600679]
 [ 0.52285813  0.32003888]
 ...
 [-0.52207467 -0.95292962]
 [ 0.84981385 -0.23626684]
 [-1.05092842  1.26799414]]

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

- - - - - - - - - - - - - - -
| Current Time =  13:41:02  |
- - - - - - - - - - - - - - -


In [48]:
y_pred = myModel.predict(test_data_feed_into_ann_values_scaled)
#y_pred = list(map(lambda x: 1.0 if x < 1 else 5.0 if x > 5.0 else x, y_pred))

print("\ny_pred follows:type(y_pred)=",type(y_pred))
print("Each prediction consists of 5 probabilities")
print(y_pred)

def GetWeightedValue(arr):
    weighted_sum = 0.0
    for idx in range(len(arr)):
        rating = idx + 1
        weighted_sum = weighted_sum + rating * arr[idx]
    return weighted_sum

y_pred_arr = []
        
for i in range(len(y_pred)):
    wt_sum = GetWeightedValue(y_pred[i])
    y_pred_arr.append(wt_sum)

y_pred_arr = np.array(y_pred_arr)

# get true labels
y_true = df_test['rating'].values

PrintCurrentTime()


y_pred follows:type(y_pred)= <class 'numpy.ndarray'>
Each prediction consists of 5 probabilities
[[0.01745463 0.0605141  0.24508111 0.36896268 0.30798742]
 [0.01768521 0.06099328 0.25550786 0.36669752 0.2991161 ]
 [0.01936655 0.06236345 0.2491574  0.36416897 0.30494365]
 ...
 [0.0239382  0.06036066 0.22266977 0.3482039  0.3448275 ]
 [0.01985889 0.06323113 0.2462273  0.36292318 0.30775946]
 [0.0179888  0.06108334 0.25308672 0.36558148 0.30225962]]

- - - - - - - - - - - - - - -
| Current Time =  13:41:06  |
- - - - - - - - - - - - - - -


In [49]:

#----------------------------------------------------------------------#

from sklearn.metrics import mean_squared_error

# Test model by making predictions on test data




#  Compute RMSE
rmse = np.sqrt(mean_squared_error(y_pred=y_pred_arr, y_true=y_true))
print('\n\nTesting Result: {:.4f} RMSE'.format(rmse))

print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")

#----------------------------------------------------------------------#

#print("test_user_data stuff follows:", type(test_user_data))
#print(test_user_data)
#print(test_user_data.values)

#print("\n\ntest_book_data stuff follows:", type(test_book_data))
#print(test_book_data)
#print(test_book_data.values)

y_pred_temp = np.round(y_pred_arr, 2)

# go back to the original user id
orig_user_id = [rev_user_id_mapping[test_user_data.values[i]] for i in range(len(test_user_data.values))]

# go back to the original book id
orig_book_id = [rev_book_id_mapping[test_book_data.values[i]] for i in range(len(test_book_data.values))]


vals = test_book_data.values
test_book_names = [di_book_title[orig_book_id[i]][0:30] for i in range(len(orig_book_id))]
    
print(test_book_names[:5])
print("\n\n")

results_df = pd.DataFrame({
    'User ID': orig_user_id,
    'Book ID': orig_book_id,
    'Book Name': test_book_names,    
    'Predicted': y_pred_temp,
    'Actual': y_true
})

print("\nFirst 20 rows of results_df\n")
print(results_df.head(20))
print("\nRandom sample of 25 rows of results_df\n")
print(results_df.sample(25))



print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")

#----------------------------------------------------------------------#

PrintCurrentTime()






Testing Result: 0.9889 RMSE

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
['The Regulators', 'The Strange Library', 'Gone for Good', 'A Light in the Attic', 'Into Thin Air: A Personal Acco']




First 20 rows of results_df

    User ID  Book ID                       Book Name  Predicted  Actual
0      7966     2352                  The Regulators       3.89       3
1     39399     4516             The Strange Library       3.87       4
2     44001     3277                   Gone for Good       3.87       5
3     44987      278            A Light in the Attic       3.91       5
4      2860      236  Into Thin Air: A Personal Acco       3.91       4
5     27767     1168                 The Tao of Pooh       3.96       4
6     47869     1096  America (The Book): A Citizen'       3.86       3
7     48414     9042                     The Painter       3.87       3
8     41754      615                    This Lullaby       3.93       5
9     47839     3020  The Metamor

In [50]:
print(di_book_title[23])
print(di_book_title[3517])


Harry Potter and the Chamber of Secrets (Harry Potter, #2)
The 8th Confession (Women's Murder Club, #8)


In [51]:
# Test using user_id = 9106 and book_id 136
test_user_data = [user_id_mapping[9106]]
test_book_data = [book_id_mapping[136]]
print(test_user_data)
print(test_book_data)
print(di_book_title[book_id_mapping[136]])
print(rev_di_book_title[di_book_title[book_id_mapping[136]]])

number_of_user_id = len(user_id_mapping)
number_of_book_id = len(book_id_mapping)
print("Number of User ID: ",number_of_user_id, "Number of Book ID: ",number_of_book_id)
print(rev_user_id_mapping[35])

myS = "U R D Man"
if myS in rev_di_book_title:
    print("yep")
else:
    print("nope")
myS = "The Wise Man's Fear (The Kingkiller Chronicle, #2)"
if myS in rev_di_book_title:
    print("yep")
    print(f"book id for book {myS} = ",rev_di_book_title[myS])
else:
    print("nope")

# Note to self: The # of user-id is from 0 to 36198, but to "track" that back 
# to the DB, need to look at rev_user_id_mapping.
# By way of an example, although id = 35 (prior to scaling of course) may be fed into the ANN,
# it may NOT appear in the DB!  What does appear in the DB is rev_user_id_mapping[35], 
# which turns out to be 36705

[689]
[2057]
The Litigators
2057
Number of User ID:  36199 Number of Book ID:  9466
6466
nope
yep
book id for book The Wise Man's Fear (The Kingkiller Chronicle, #2) =  307


In [52]:
# Now do what we did for the TruncatedSVD project.
# That is let someone pick a "user_id/book_id" pair and we tell them how this user_id 
# would rate this particular book_id

# 36828     5010      Inca Gold (Dirk Pitt, #12) 3.89

# Test using user_id = 9106 and book_id 136
# First, what are the valid user_id?
number_of_user_id = len(user_id_mapping)
actual_uid = 36828
uid = user_id_mapping[actual_uid]
actual_bid = 5010
number_of_book_id = len(book_id_mapping)
bid = book_id_mapping[actual_bid]

test_user_data = [uid]
test_book_data = [bid]
print(test_user_data)
print(test_book_data)


test_data_feed_into_ann = pd.DataFrame({"user_id":test_user_data,"book_id":test_book_data})
test_data_feed_into_ann_values = test_data_feed_into_ann.values

test_data_feed_into_ann_values_scaled = sc.transform(test_data_feed_into_ann_values)

print("test data that will get fed into the 'predict' method")
print("first column is the scaled 'user_id', while second column is scaled 'book_id'")
print(test_data_feed_into_ann_values_scaled)

print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -")
PrintCurrentTime()

# get the rating for this book
y_pred = myModel.predict(test_data_feed_into_ann_values_scaled)
#y_pred = list(map(lambda x: 1.0 if x < 1 else 5.0 if x > 5.0 else x, y_pred))

print("\ny_pred follows:type(y_pred)=",type(y_pred))
print("Each prediction consists of 5 probabilities")
print(y_pred)



y_pred_arr = []
        
for i in range(len(y_pred)):
    wt_sum = GetWeightedValue(y_pred[i])
    y_pred_arr.append(wt_sum)

y_pred_arr = np.array(y_pred_arr)

print("Prediction: ",y_pred_arr)

PrintCurrentTime()

[4513]
[4169]
test data that will get fed into the 'predict' method
first column is the scaled 'user_id', while second column is scaled 'book_id'
[[-1.28577176  0.75381977]]

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

- - - - - - - - - - - - - - -
| Current Time =  13:41:18  |
- - - - - - - - - - - - - - -

y_pred follows:type(y_pred)= <class 'numpy.ndarray'>
Each prediction consists of 5 probabilities
[[0.01904849 0.06266943 0.2519381  0.36317402 0.30317003]]
Prediction:  [3.86874789]

- - - - - - - - - - - - - - -
| Current Time =  13:41:18  |
- - - - - - - - - - - - - - -


In [53]:
# sort a list of tuples on the 2nd element
def Sort_Tuple(tup): 
  
    # reverse = None (Sorts in Ascending order) 
    # key is set to sort using second element of 
    # sublist lambda has been used 
    tup.sort(key = lambda x: x[1],reverse = False) 
    #tup.sort(key=sortSecond,reverse = False)
    return tup 

# book_name ==> index into book_names list
def get_index_for_book(book_name):
    for i in range(num_books_in_list):
        if book_names[i] == book_name:
            return i

    return -1

# book_name ==> ordered list of tuples, with descending
# values of cosine-similarity (each ordered tuple is of the form
# (book name,cos-sim value)
def get_ordered_list_for_book(book_name):
    ordered_list = []
    idx = get_index_for_book(book_name)
    if (idx < 0):
        return ordered_list
    unordered_list = [(book_names[i],corr_mat[idx,i]) for i in range(len(book_names))]
    
    ordered_list = Sort_Tuple(unordered_list)

    return ordered_list

# book_name ==> index of a book in book_names that is closest
# to book_name
def get_first_index_closes_to_name(book_name):
    short = book_name[0:7].upper()
    for idx in range(len(book_names)):
        if book_names[idx][0:7].upper().find(short) >= 0:
            return idx

    short = book_name[0:6].upper()
    for idx in range(len(book_names)):
        if book_names[idx][0:6].upper().find(short) >= 0:
            return idx

    short = book_name[0:5].upper()
    for idx in range(len(book_names)):
        if book_names[idx][0:5].upper().find(short) >= 0:
            return idx

    short = book_name[0:4].upper()
    for idx in range(len(book_names)):
        if book_names[idx][0:4].upper().find(short) >= 0:
            return idx

    short = book_name[0:3].upper()
    for idx in range(len(book_names)):
        if book_names[idx][0:3].upper().find(short) >= 0:
            return idx

    short = book_name[0:2].upper()
    for idx in range(len(book_names)):
        if book_names[idx][0:2].upper().find(short) >= 0:
            return idx

    short = book_name[0:1].upper()
    for idx in range(len(book_names)):
        if book_names[idx][0:1].upper().find(short) >= 0:
            return idx

    
    return -1

# book_name ==> get list of 20 books that are before book_name and
# 20 books that are after book_name
def get_books_close_by_in_name(book_name):
    idx = get_first_index_closes_to_name(book_name)
    the_list = []
    if idx < 0:
        return the_list
    
    lower = idx - int(max_books_to_display/2)
    if lower < 0:
        lower = 0
    higher = idx + int(max_books_to_display/2)
    if higher >= num_books_in_list:
        higher = num_books_in_list - 1
    for i in range(lower,higher):
        the_list.append(book_names[i])

    return the_list

def GetUserPredRatingOfBook(uid,bid):
    # I'm trying to be consistent - when I write uid or user_id I mean the 
    # user id in the DB, while user_index is what user_id gets mapped to 
    # (i.e. consecutive numbers from 0 thru 36198).
    # Same for bid - this is the number that appears in the DB, while book_index 
    # is what bid gets mapped to (i.e. consecutive numbers from 0 to 9465)
    test_user_data = [uid]
    test_book_data = [bid]


    test_data_feed_into_ann = pd.DataFrame({"user_id":test_user_data,"book_id":test_book_data})
    test_data_feed_into_ann_values = test_data_feed_into_ann.values

    test_data_feed_into_ann_values_scaled = sc.transform(test_data_feed_into_ann_values)

    # get the rating for this book
    y_pred = myModel.predict(test_data_feed_into_ann_values_scaled)

    wt_sum = GetWeightedValue(y_pred[0])
    return wt_sum


In [54]:
# Test using 'saved off files'
myModel = keras.models.load_model("SavedModels")

sc = pickle.load( open( "my_sc.p", "rb" ) )
book_names = pickle.load( open( "my_book_names.p", "rb" ) )
rev_di_book_title = pickle.load( open( "my_rev_di_book_title.p", "rb" ) )
book_id_mapping = pickle.load( open( "my_book_id_mapping.p", "rb" ) )
user_id_mapping = pickle.load(open("my_user_id_mapping.p", 'rb'))


max_books_to_display = 20
num_books_in_list = len(book_names)

def GetMainMenuOption():
    print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n")
    print("1. Enter '1' to enter a user_id and bookname in order to get predicted rating")
    print("2. Enter '2' to see if a book is in the list")
    print("3. Enter '3' to quit")
    answer = int(input("\nEnter 1, 2, or 3: "))
    if answer < 1 or answer > 3:
        answer = 3
    return answer

    

answer = GetMainMenuOption()
while answer != 3:
    if answer == 1:
        print("\n")
        user_index = int(input(f"\nEnter a user index (0 thru {number_of_user_id-1}): "))
        book_name = input(f"\nEnter a book name: ");
        if (user_index >= 0) and (user_index < number_of_user_id) and (book_name in rev_di_book_title):
            #uid = rev_user_id_mapping[user_index]
            book_id = rev_di_book_title[book_name]
            book_index = book_id_mapping[book_id]
            
            pred_rating = GetUserPredRatingOfBook(user_index,book_index)          

            print(f"\n\nUser Index: {user_index} (user id: {uid}), book: '{book_name}' (book id: {book_id}), Predicted Rating: {pred_rating:.3f}")                    
        else:
            if user_index < 0:
                print("\nuser index must be >= 0")
            elif user_index >= number_of_user_id:
                print(f"\nuser index must be <= {number_of_user_id-1}")
            else:
                print(f"\nThe book name you entered '{book_name}' is not in our database")

                
    elif answer == 2:
        print("\n")
        title = input("\nEnter book title to search for: ")
        if title in rev_di_book_title:
            print(f"\nbook title '{title}' is in our database")
        else:
            the_list = get_books_close_by_in_name(title)
            print(f"\nThe book '{title}' is NOT in our database, here are books that are close-by\n")
            for i in range(len(the_list)):
                print(the_list[i])
            print("---------------------")
    
    answer = GetMainMenuOption()           
    

print("\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n")
print("DONE!")


- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

1. Enter '1' to enter a user_id and bookname in order to get predicted rating
2. Enter '2' to see if a book is in the list
3. Enter '3' to quit

Enter 1, 2, or 3: 1



Enter a user index (0 thru 36198): 149

Enter a book name: Are you ok

The book name you entered 'Are you ok' is not in our database

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

1. Enter '1' to enter a user_id and bookname in order to get predicted rating
2. Enter '2' to see if a book is in the list
3. Enter '3' to quit

Enter 1, 2, or 3: 2



Enter book title to search for: Are You ok

The book 'Are You ok' is NOT in our database, here are books that are close-by

Appointment with Death (Hercule Poirot, #19)
April 1865: The Month That Saved America
Apt Pupil
Arcadia
Arcadia
Arch of Triumph: A Novel of a Man Without a Country
Archangel's Blade (Guild Hunter, #4)
Archangel's Consor