In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from tqdm import tqdm
from sklearn.neighbors import NearestNeighbors
import numpy as np
import random
import pickle 

def scrape_goodreads_ratings(user_id, max_pages=10):
    """
    Scrape a user's star ratings from Goodreads.
    
    Args:
    - user_id (str): Goodreads user ID or profile suffix.
    - max_pages (int): Maximum number of pages to scrape (each page contains ~30 books).
    
    Returns:
    - pd.DataFrame: A DataFrame containing book titles and ratings.
    """
    base_url = f"https://www.goodreads.com/review/list/{user_id}?shelf=read"
    headers = {"User-Agent": "Mozilla/5.0"}
    books = []

    for page in range(1, max_pages + 1):
        url = f"{base_url}&page={page}"
        response = requests.get(url, headers=headers)
        
        if response.status_code != 200:
            print(f"Failed to fetch page {page}. Status code: {response.status_code}")
            break

        soup = BeautifulSoup(response.content, "html.parser")
        
        # Find all book entries in the table
        rows = soup.find_all("tr", class_="bookalike review")
        if not rows:
            print("No more data found.")
            break

        for row in rows:
            try:
                title = row.find("td", class_="field title").a.text.strip()
                rating_element = row.find("td", class_="field rating")
                rating = rating_element.find("span", class_="staticStars").get("title", "No rating")
                stars = map_rating(rating)
                books.append({"Title": title, "Rating": stars, "User_id": user_id})
#                 print(title, rating, stars)
            except AttributeError:
                # Handle rows with missing data
                continue

        print(f"Page {page} scraped successfully.")
        time.sleep(random.uniform(1, 5))  # Be kind to the server and avoid being blocked

    # Return data as a pandas DataFrame
    return pd.DataFrame(books)



In [2]:
def map_rating(phrase):
    rating_map = {
        "liked it": 3,
        "really liked it": 4,
        "it was ok": 2, 
        "it was amazing": 5, 
        "did not like it": 1,
    }
    
    return rating_map.get(phrase, "Invalid rating")  # Default to "Invalid rating" if the phrase isn't in the dictionary


In [3]:
# if __name__ == "__main__":
# #     user_id = "6688207"  # Replace with the Goodreads user ID or profile suffix
# #     for user_id in tqdm(['30181442', '75009563', '11345366', '110912303', '113964939', '11215896', '53701594', '4622890', '93628736', '176180116']):
# #     for user_id in tqdm(['2974095', '4622890', '28953843', '16174645', '4159922', '4125660', '54886546', '16912659', '260116', '4685500', '21865425']):
# #     for user_id in tqdm(['53701594', '27709782', '7566229', '16652861', '30817744', '56259255', '4125660', '60964126', 
# #                          '176167767', '28510930', '1029975', '131020767', '28862120', '88713906', '160141433', '41097916', 
# #                          '20809863', '69519261', '24017481', '7376365', '75941333', '13571407', '106618742', '17792052',
# #                          '3534528', '130656897', '7474475', '4125412', '6336365', '6026811', '3438047']):
#     for user_id in ['169695556']:
#         print("User_id = ", user_id)
#         max_pages = 30  # Adjust based on expected data
#         ratings_data = scrape_goodreads_ratings(user_id, max_pages)

#         if not ratings_data.empty:
# #             print(ratings_data.head())
# #             ratings_data.to_csv("goodreads_ratings.csv", index=False)
#             ratings_data.to_csv('goodreads_ratings.csv', mode='a', header=False, index=False)
#             print("Data saved to goodreads_ratings.csv.")
#         else:
#             print("No data retrieved.")


In [4]:
df = pd.read_csv('goodreads_ratings_series.csv')
print(df.shape)
df = df.drop_duplicates()
# Print the entire DataFrame
print(df)

duplicate_count = df['Title'].duplicated().sum()
print("Number of books with at least two people rating it:", duplicate_count)
duplicate_counts_per_value = df['Title'].value_counts()
print(duplicate_counts_per_value)
print("Number of unique books: ", df['Title'].nunique())
num_users = df['User_id'].nunique()
user_ids = list(df['User_id'].unique())
print("number of users is: ", num_users)
print("user_ids = ", user_ids)

(187988, 6)
                                                    Title Rating    User_id  \
0                                       I Am Watching You      3  169695558   
1       Three to Get Deadly\n        (Stephanie Plum, #3)      3  169695558   
2       Before the Coffee Gets Cold\n        (Before t...      4  169695558   
3       Dark Sacred Night\n        (Renée Ballard, #2;...      4  169695558   
4         Two for the Dough\n        (Stephanie Plum, #2)      4  169695558   
...                                                   ...    ...        ...   
187983                           A Streetcar Named Desire      5    7519276   
187984                                   The Great Gatsby      3    7519276   
187985                             The Catcher in the Rye      5    7519276   
187986                              To Kill a Mockingbird      4    7519276   
187987                                Pride and Prejudice      4    7519276   

        Series  First  Suggest  
0     

In [5]:
# # Get a list of top titles in order
# top_titles = duplicate_counts_per_value.index.tolist()
# top_100 = top_titles[:100]

# for title in top_100:
#     print(title)
    
# with open("top_100.pkl", "wb") as file:
#     pickle.dump(top_100, file)

In [6]:
# threshold = 5#num_users * 0.1
# pop_titles = list(duplicate_counts_per_value[duplicate_counts_per_value > threshold].index)
# my_titles = df.loc[df["User_id"] == 169695558, "Title"].tolist()
# # print(my_titles)

# print("pop titles len = ", len(pop_titles))
# print(pop_titles)
# print("my titles len = ", len(my_titles))
# titles = list(set(pop_titles))# + my_titles))

# # #remove Harry Potter titles:
# # titles = [s for s in titles if "Harry Potter" not in s]

# num_titles = len(titles)


# print(titles)
# # print(titles)
# print("num_titles =", num_titles)

# # ratings = np.full((num_users, num_titles), None)
# ratings = np.zeros((num_users, num_titles))

# for index, row in df.iterrows():
#     if row['Title'] in titles:
#         try:
#             ratings[user_ids.index(row['User_id']), titles.index(row["Title"])] = int(row["Rating"])
# #             print("found ", row["Title"])
#         except:
#             pass
        
# print("ratings size = ", ratings.shape)
# ratings = ratings[~np.all(ratings == 0, axis=1)]
# print("ratings size = ", ratings.shape)
# # Save the list to a file
# with open("titles.pkl", "wb") as file:
#     pickle.dump(titles, file)

In [7]:
# # print(df['Suggest'])

# suggest = list(df['Suggest'])
# print(suggest)

# with open("suggest.pkl", "wb") as file:
#     suggest.dump(suggest, file)

In [8]:
# ratings_df = pd.DataFrame(ratings)
# print(ratings_df.shape)
# #delete users that don't have any of these ratings
# # ratings_df = ratings_df.loc[~(ratings_df == 0).all(axis=1)]
# ratings_df = ratings_df.loc[(ratings_df != 0).sum(axis=1) >= 4] #need at least 4 entries to stay
# print(ratings_df.shape)

# # Calculate percentage of non-zero elements
# percentage_nonzero = (np.count_nonzero(ratings_df) / ratings_df.size) * 100
# print("percentage_nonzero =", round(percentage_nonzero, 1), '%')

# # Save the list to a file
# with open("ratings_df.pkl", "wb") as file:
#     pickle.dump(ratings_df, file)


In [9]:
with open("titles.pkl", "rb") as file:
    titles = pickle.load(file)
    
with open("top_100.pkl", "rb") as file:
    top_100 = pickle.load(file)
    
with open("suggest.pkl", "rb") as file:
    suggest = pickle.load(file)
    
num_titles = len(titles)

# Load the list from the file
with open("ratings_df.pkl", "rb") as file:
    ratings_df = pickle.load(file)
    ratings = ratings_df.to_numpy()


In [10]:
# Convert the filled ratings data into a numpy array
ratings_matrix = ratings_df.values

# Initialize KNN (using user-based KNN)
import math
knn = NearestNeighbors(n_neighbors=50, metric='cosine')  # Using cosine similarity  math.ceil(num_users/10)
knn.fit(ratings_matrix)

with open("knn_model.pkl", "wb") as file:
    pickle.dump(knn, file)

# Example: Predict rating for User 0 on Item 2
user_id = 0  # Index of user in the matrix
item_id = 2  # Index of item to predict rating for

# Get the nearest neighbors for user 0 (excluding the user itself)
distances, indices = knn.kneighbors([ratings_matrix[user_id]])

pred_ratings_list = np.array([])
rankings_list = np.array([])
for item_id in range(num_titles):
    # Get the ratings for the neighbors on item 2
    neighbor_ratings = np.array([ratings_matrix[i, item_id] for i in indices[0] if not np.isnan(ratings_matrix[i, item_id])])

    predicted_rating = np.mean(neighbor_ratings[np.nonzero(neighbor_ratings)])
    rankings = np.sum(neighbor_ratings[np.nonzero(neighbor_ratings)])

#     print(predicted_rating)

    pred_ratings_list = np.append(pred_ratings_list, predicted_rating)
    rankings_list = np.append(rankings_list, rankings)
#     print(f"Predicted rating for User {user_id} on Item {item_id}: {predicted_rating}")
    
best_book_rating = np.max(pred_ratings_list)
best_book_idx = np.argmax(pred_ratings_list)

sorted_indices = np.argsort(pred_ratings_list)[::-1]
print("Top books are:")
n = 1
for i, idx in enumerate(sorted_indices): 
    if (ratings[0, idx] > 0) or (np.isnan(pred_ratings_list[idx])):
        pass
    else:
        print("#", (n) , titles[idx], "Rating:", round(pred_ratings_list[idx], 1))
        n+=1

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Top books are:
# 1 Dead Ever After
        (Sookie Stackhouse, #13) Rating: 5.0
# 2 Miss Peregrine's Home for Peculiar Children
        (Miss Peregrine's Peculiar Children, #1) Rating: 5.0
# 3 Xenocide
        (Ender's Saga, #3) Rating: 5.0
# 4 Vision in White
        (Bride Quartet, #1) Rating: 5.0
# 5 The Holy Bible: King James Version Rating: 5.0
# 6 Freedom Rating: 5.0
# 7 The Raven Boys
        (The Raven Cycle, #1) Rating: 5.0
# 8 The High King
        (The Chronicles of Prydain, #5) Rating: 5.0
# 9 The Gunslinger
        (The Dark Tower, #1) Rating: 5.0
# 10 Magyk
        (Septimus Heap, #1) Rating: 5.0
# 11 Shadow Puppets
        (The Shadow Series, #3) Rating: 5.0
# 12 A Little Princess Rating: 5.0
# 13 My Ántonia Rating: 5.0
# 14 The Camel Club
        (The Camel Club, #1) Rating: 5.0
# 15 The Little Prince Rating: 5.0
# 16 Franny and Zooey Rating: 5.0
# 17 The Mouse and the Motorcycle
        (Ralph S. Mouse, #1) Rating: 5.0
# 18 Love You Forever Rating: 5.0
# 19 A Bend in t

        (Percy Jackson and the Olympians, #5) Rating: 4.5
# 300 The Other Boleyn Girl
        (The Plantagenet and Tudor Novels, #9) Rating: 4.5
# 301 Divine Secrets of the Ya-Ya Sisterhood Rating: 4.5
# 302 Anya's Ghost Rating: 4.5
# 303 Written in My Own Heart's Blood
        (Outlander, #8) Rating: 4.5
# 304 Wings
        (Wings, #1) Rating: 4.5
# 305 We Should All Be Feminists Rating: 4.5
# 306 A Short History of Nearly Everything Rating: 4.5
# 307 Slaughterhouse-Five Rating: 4.5
# 308 The Icebound Land
        (Ranger's Apprentice, #3) Rating: 4.5
# 309 The Longest Ride Rating: 4.5
# 310 The Hunger Games Trilogy Boxset
        (The Hunger Games, #1-3) Rating: 4.5
# 311 When You Are Engulfed in Flames Rating: 4.5
# 312 Passion
        (Fallen, #3) Rating: 4.5
# 313 Atomic Habits: An Easy & Proven Way to Build Good Habits & Break Bad Ones Rating: 4.5
# 314 Bridge to Terabithia Rating: 4.5
# 315 The Lost Symbol
        (Robert Langdon, #3) Rating: 4.5
# 316 The Lost City of Faar
    

# 540 Verity Rating: 4.0
# 541 Sing You Home Rating: 4.0
# 542 Dragons of Autumn Twilight
        (Dragonlance: Chronicles, #1) Rating: 4.0
# 543 The Picture of Dorian Gray Rating: 4.0
# 544 How the Grinch Stole Christmas! Rating: 4.0
# 545 The Color of Water: A Black Man's Tribute to His White Mother Rating: 4.0
# 546 Thinner Rating: 4.0
# 547 Baby Proof Rating: 4.0
# 548 Death Note, Vol. 1: Boredom
        (Death Note, #1) Rating: 4.0
# 549 The Innocent Rating: 4.0
# 550 A Midsummer Night’s Dream Rating: 4.0
# 551 A ​Court of Silver Flames
        (A Court of Thorns and Roses, #4) Rating: 4.0
# 552 The Cricket in Times Square
        (Chester Cricket and His Friends, #1) Rating: 4.0
# 553 The Sanatorium
        (Detective Elin Warner, #1) Rating: 4.0
# 554 Wonder
        (Wonder, #1) Rating: 4.0
# 555 One Fish, Two Fish, Red Fish, Blue Fish Rating: 4.0
# 556 The Tao of Pooh Rating: 4.0
# 557 The Year of Living Biblically: One Man's Humble Quest to Follow the Bible as Literally as Pos

# 777 Seabiscuit: An American Legend Rating: 3.0
# 778 And the Mountains Echoed Rating: 3.0
# 779 Flat-Out Love
        (Flat-Out Love, #1) Rating: 3.0
# 780 Snow Falling on Cedars Rating: 3.0
# 781 Lean In: Women, Work, and the Will to Lead Rating: 3.0
# 782 The Girls Rating: 3.0
# 783 Prep Rating: 3.0
# 784 Under the Banner of Heaven: A Story of Violent Faith Rating: 3.0
# 785 The Story of Ferdinand Rating: 3.0
# 786 The Hitchhikerâ€™s Guide to the Galaxy
        (Hitchhiker's Guide to the Galaxy, #1) Rating: 3.0
# 787 Dead Poets Society Rating: 3.0
# 788 The Lottery and Other Stories Rating: 3.0
# 789 An Abundance of Katherines Rating: 3.0
# 790 Memoirs of a Teenage Amnesiac Rating: 3.0
# 791 Jemima J Rating: 3.0
# 792 Dolores Claiborne Rating: 3.0
# 793 A Prayer for Owen Meany Rating: 3.0
# 794 A Widow for One Year Rating: 3.0
# 795 The Portrait of a Lady Rating: 3.0
# 796 Forever... Rating: 3.0
# 797 The Adventures of Huckleberry Finn Rating: 3.0
# 798 Educated Rating: 3.0
# 799 J

In [11]:
#make knn for the similar user part
# Initialize KNN (using user-based KNN)
# import math
knn_30 = NearestNeighbors(n_neighbors=50, metric='cosine')  # Using cosine similarity
knn_30.fit(ratings_matrix)

with open("knn_model_30.pkl", "wb") as file:
    pickle.dump(knn_30, file)

In [12]:
rankings_list

best_book_rating = np.max(rankings_list)
best_book_idx = np.argmax(rankings_list)

sorted_indices = np.argsort(rankings_list)[::-1]
print("Top books are:")
for i, idx in enumerate(sorted_indices): 
    print("#", (i+1) , titles[idx], "Rating:", round(pred_ratings_list[idx], 1), ". Ranking:", rankings_list[idx])

Top books are:
# 1 Harry Potter and the Goblet of Fire
        (Harry Potter, #4) Rating: 4.8 . Ranking: 212.0
# 2 Harry Potter and the Prisoner of Azkaban
        (Harry Potter, #3) Rating: 4.7 . Ranking: 211.0
# 3 Harry Potter and the Deathly Hallows
        (Harry Potter, #7) Rating: 4.7 . Ranking: 208.0
# 4 Harry Potter and the Half-Blood Prince
        (Harry Potter, #6) Rating: 4.6 . Ranking: 208.0
# 5 Harry Potter and the Order of the Phoenix
        (Harry Potter, #5) Rating: 4.5 . Ranking: 192.0
# 6 Harry Potter and the Chamber of Secrets
        (Harry Potter, #2) Rating: 4.3 . Ranking: 189.0
# 7 Harry Potter and the Sorcerer's Stone
        (Harry Potter, #1) Rating: 4.6 . Ranking: 189.0
# 8 The Hunger Games
        (The Hunger Games, #1) Rating: 4.7 . Ranking: 155.0
# 9 Catching Fire
        (The Hunger Games, #2) Rating: 4.6 . Ranking: 123.0
# 10 Mockingjay
        (The Hunger Games, #3) Rating: 4.3 . Ranking: 116.0
# 11 Twilight
        (The Twilight Saga, #1) Rating: 4.0

        (Gallagher Girls, #1) Rating: 5.0 . Ranking: 10.0
# 208 The Mark of Athena
        (The Heroes of Olympus, #3) Rating: 5.0 . Ranking: 10.0
# 209 Miss Peregrine's Home for Peculiar Children
        (Miss Peregrine's Peculiar Children, #1) Rating: 5.0 . Ranking: 10.0
# 210 Divine Rivals
        (Letters of Enchantment, #1) Rating: 5.0 . Ranking: 10.0
# 211 Inheritance
        (The Inheritance Cycle, #4) Rating: 5.0 . Ranking: 10.0
# 212 Can You Keep a Secret? Rating: 3.3 . Ranking: 10.0
# 213 Mere Christianity Rating: 5.0 . Ranking: 10.0
# 214 The Thorn Birds Rating: 5.0 . Ranking: 10.0
# 215 A Tree Grows in Brooklyn Rating: 5.0 . Ranking: 10.0
# 216 The Merchant of Death
        (Pendragon, #1) Rating: 5.0 . Ranking: 10.0
# 217 Something Blue
        (Darcy & Rachel, #2) Rating: 3.3 . Ranking: 10.0
# 218 Eighth Grade Bites
        (The Chronicles of Vladimir Tod, #1) Rating: 5.0 . Ranking: 10.0
# 219 The Magicianâ€™s Nephew
        (Chronicles of Narnia, #6) Rating: 4.5 . Rankin

# 404 Bedtime for Frances Rating: 5.0 . Ranking: 5.0
# 405 The Hero and the Crown
        (Damar, #2) Rating: 5.0 . Ranking: 5.0
# 406 Key of Valor
        (Key Trilogy, #3) Rating: 5.0 . Ranking: 5.0
# 407 The Drawing of the Three
        (The Dark Tower, #2) Rating: 5.0 . Ranking: 5.0
# 408 From Blood and Ash
        (Blood and Ash, #1) Rating: 5.0 . Ranking: 5.0
# 409 Howlâ€™s Moving Castle
        (Howlâ€™s Moving Castle, #1) Rating: 5.0 . Ranking: 5.0
# 410 Corduroy Rating: 5.0 . Ranking: 5.0
# 411 A Kingdom of Flesh and Fire
        (Blood and Ash, #2) Rating: 5.0 . Ranking: 5.0
# 412 Mary Poppins
        (Mary Poppins, #1) Rating: 5.0 . Ranking: 5.0
# 413 Marley and Me: Life and Love With the Worldâ€™s Worst Dog Rating: 5.0 . Ranking: 5.0
# 414 Shadow Puppets
        (The Shadow Series, #3) Rating: 5.0 . Ranking: 5.0
# 415 The Name of the Wind
        (The Kingkiller Chronicle, #1) Rating: 5.0 . Ranking: 5.0
# 416 Vision in White
        (Bride Quartet, #1) Rating: 5.0 . Ranking

        (Amelia Bedelia, #1) Rating: 4.0 . Ranking: 4.0
# 621 The Innocent Rating: 4.0 . Ranking: 4.0
# 622 I'll Be Gone in the Dark: One Woman's Obsessive Search for the Golden State Killer Rating: 4.0 . Ranking: 4.0
# 623 The Tao of Pooh Rating: 4.0 . Ranking: 4.0
# 624 Where She Went
        (If I Stay, #2) Rating: 4.0 . Ranking: 4.0
# 625 The Year of Living Biblically: One Man's Humble Quest to Follow the Bible as Literally as Possible Rating: 4.0 . Ranking: 4.0
# 626 Lord of Chaos
        (The Wheel of Time, #6) Rating: 4.0 . Ranking: 4.0
# 627 Leaving Time Rating: 4.0 . Ranking: 4.0
# 628 The Color of Water: A Black Man's Tribute to His White Mother Rating: 4.0 . Ranking: 4.0
# 629 Midnight Sun
        (The Twilight Saga, #5) Rating: 4.0 . Ranking: 4.0
# 630 Northanger Abbey Rating: 4.0 . Ranking: 4.0
# 631 The Iliad Rating: 2.0 . Ranking: 4.0
# 632 Looking for Alaska Rating: 4.0 . Ranking: 4.0
# 633 The Fifth Witness
        (The Lincoln Lawyer, #4; Harry Bosch Universe, #23) Ra

# 842 Thirteen Reasons Why Rating: 4.0 . Ranking: 4.0
# 843 Ross Poldark
        (Poldark, #1) Rating: 4.0 . Ranking: 4.0
# 844 Check & Mate Rating: 4.0 . Ranking: 4.0
# 845 Something in the Water Rating: 4.0 . Ranking: 4.0
# 846 Boy: Tales of Childhood
        (Roald Dahl's Autobiography, #1) Rating: 3.0 . Ranking: 3.0
# 847 Fall of Giants
        (The Century Trilogy, #1) Rating: 3.0 . Ranking: 3.0
# 848 Where the Heart Is Rating: 3.0 . Ranking: 3.0
# 849 Are You There God? It’s Me, Margaret Rating: 3.0 . Ranking: 3.0
# 850 The Thirteenth Tale Rating: 3.0 . Ranking: 3.0
# 851 The Last to Vanish Rating: 3.0 . Ranking: 3.0
# 852 Narrative of the Life of Frederick Douglass Rating: 3.0 . Ranking: 3.0
# 853 The City of Ember
        (Book of Ember, #1) Rating: 3.0 . Ranking: 3.0
# 854 Normal People Rating: 3.0 . Ranking: 3.0
# 855 The Remains of the Day Rating: 3.0 . Ranking: 3.0
# 856 The Story of Babar
        (Babar, #1) Rating: 3.0 . Ranking: 3.0
# 857 Starship Troopers Rating: 3.0 . 

# 1067 The Sugar Queen Rating: nan . Ranking: 0.0
# 1068 For Whom the Bell Tolls Rating: nan . Ranking: 0.0
# 1069 The Druid of Shannara
        (Heritage of Shannara, #2) Rating: nan . Ranking: 0.0
# 1070 Midnight at the Bright Ideas Bookstore Rating: nan . Ranking: 0.0
# 1071 The Therapist Rating: nan . Ranking: 0.0
# 1072 Redhead by the Side of the Road Rating: nan . Ranking: 0.0
# 1073 Lover Reborn
        (Black Dagger Brotherhood, #10) Rating: nan . Ranking: 0.0
# 1074 We Are All Completely Beside Ourselves Rating: nan . Ranking: 0.0
# 1075 The Hero of Ages
        (Mistborn, #3) Rating: nan . Ranking: 0.0
# 1076 The Truth: Stage Adaptation Rating: nan . Ranking: 0.0
# 1077 Towers of Midnight
        (The Wheel of Time, #13) Rating: nan . Ranking: 0.0
# 1078 A Great Deliverance
        (Inspector Lynley, #1) Rating: nan . Ranking: 0.0
# 1079 The Golem and the Jinni
        (The Golem and the Jinni, #1) Rating: nan . Ranking: 0.0
# 1080 Love Redesigned
        (Lakefront Billionai

# 1245 Apeirogon Rating: nan . Ranking: 0.0
# 1246 To Paradise Rating: nan . Ranking: 0.0
# 1247 Gold Coast Rating: nan . Ranking: 0.0
# 1248 Ivanhoe Rating: nan . Ranking: 0.0
# 1249 One of Us Is Next
        (One of Us Is Lying, #2) Rating: nan . Ranking: 0.0
# 1250 Anna and the French Kiss
        (Anna and the French Kiss, #1) Rating: nan . Ranking: 0.0
# 1251 Dial A for Aunties
        (Aunties, #1) Rating: nan . Ranking: 0.0
# 1252 Broken Harbor
        (Dublin Murder Squad, #4) Rating: nan . Ranking: 0.0
# 1253 Magic Bites
        (Kate Daniels, #1) Rating: nan . Ranking: 0.0
# 1254 The Time Keeper Rating: nan . Ranking: 0.0
# 1255 Ink Exchange
        (Wicked Lovely, #2) Rating: nan . Ranking: 0.0
# 1256 The Wrong Family Rating: nan . Ranking: 0.0
# 1257 Cloud Cuckoo Land Rating: nan . Ranking: 0.0
# 1258 I Hope This Finds You Well Rating: nan . Ranking: 0.0
# 1259 The Family Remains
        (The Family Upstairs, #2) Rating: nan . Ranking: 0.0
# 1260 The True Love Experiment Ra

# 1460 The Dispossessed: An Ambiguous Utopia Rating: nan . Ranking: 0.0
# 1461 Rule of Wolves
        (King of Scars, #2) Rating: nan . Ranking: 0.0
# 1462 Dark Force Rising
        (Star Wars: The Thrawn Trilogy, #2) Rating: nan . Ranking: 0.0
# 1463 Two Nights in Lisbon Rating: nan . Ranking: 0.0
# 1464 Cruel Winter with You
        (Under the Mistletoe Collection, #1) Rating: nan . Ranking: 0.0
# 1465 Gabriel's Inferno
        (Gabriel's Inferno, #1) Rating: nan . Ranking: 0.0
# 1466 Invisible Cities Rating: nan . Ranking: 0.0
# 1467 Just for the Summer
        (Part of Your World, #3) Rating: nan . Ranking: 0.0
# 1468 Skeleton Crew Rating: nan . Ranking: 0.0
# 1469 It's Always the Husband Rating: nan . Ranking: 0.0
# 1470 The Midnight Line
        (Jack Reacher, #22) Rating: nan . Ranking: 0.0
# 1471 The Rumor Rating: nan . Ranking: 0.0
# 1472 Losing It
        (Losing It, #1) Rating: nan . Ranking: 0.0
# 1473 Legacy Rating: nan . Ranking: 0.0
# 1474 The Switch Rating: nan . Rankin

# 1686 Every Last Fear Rating: nan . Ranking: 0.0
# 1687 My Sister's Grave
        (Tracy Crosswhite, #1) Rating: nan . Ranking: 0.0
# 1688 The Diamond Throne
        (The Elenium, #1) Rating: nan . Ranking: 0.0
# 1689 Inheritance
        (The Lost Bride Trilogy, #1) Rating: nan . Ranking: 0.0
# 1690 The No-Show Rating: nan . Ranking: 0.0
# 1691 The Atlantis Complex
        (Artemis Fowl, #7) Rating: nan . Ranking: 0.0
# 1692 Life After Life
        (Todd Family, #1) Rating: nan . Ranking: 0.0
# 1693 Euphoria Rating: nan . Ranking: 0.0
# 1694 The Merchant of Venice Rating: nan . Ranking: 0.0
# 1695 Empire of the Summer Moon Rating: nan . Ranking: 0.0
# 1696 Catherine the Great: Portrait of a Woman Rating: nan . Ranking: 0.0
# 1697 The Chamber Rating: nan . Ranking: 0.0
# 1698 Sybil: The Classic True Story of a Woman Possessed by Sixteen Personalities Rating: nan . Ranking: 0.0
# 1699 Alice’s Adventures in Wonderland / Through the Looking-Glass Rating: nan . Ranking: 0.0
# 1700 Infinite

# 1917 The Nazi Officer's Wife: How One Jewish Woman Survived the Holocaust Rating: nan . Ranking: 0.0
# 1918 The Immortalists Rating: nan . Ranking: 0.0
# 1919 The Kitchen God's Wife Rating: nan . Ranking: 0.0
# 1920 Fever Rating: nan . Ranking: 0.0
# 1921 Women Rating: nan . Ranking: 0.0
# 1922 Hollow City
        (Miss Peregrine's Peculiar Children, #2) Rating: nan . Ranking: 0.0
# 1923 The Circle
        (The Circle, #1) Rating: nan . Ranking: 0.0
# 1924 The Last Town
        (Wayward Pines, #3) Rating: nan . Ranking: 0.0
# 1925 Songs of the Humpback Whale Rating: nan . Ranking: 0.0
# 1926 The Client Rating: nan . Ranking: 0.0
# 1927 In an Instant Rating: nan . Ranking: 0.0
# 1928 Pride and Prejudice and Zombies
        (Pride and Prejudice and Zombies, #1) Rating: nan . Ranking: 0.0
# 1929 8th Confession
        (Women's Murder Club, #8) Rating: nan . Ranking: 0.0
# 1930 The Lost Daughter Rating: nan . Ranking: 0.0
# 1931 The School of Essential Ingredients Rating: nan . Ranking: 

# 2142 Transcendent Kingdom Rating: nan . Ranking: 0.0
# 2143 Villette Rating: nan . Ranking: 0.0
# 2144 The Killing Dance
        (Anita Blake, Vampire Hunter, #6) Rating: nan . Ranking: 0.0
# 2145 The Replacement Rating: nan . Ranking: 0.0
# 2146 Journey to the Center of the Earth Rating: nan . Ranking: 0.0
# 2147 Bury My Heart at Wounded Knee: An Indian History of the American West Rating: nan . Ranking: 0.0
# 2148 The Bird Hotel Rating: nan . Ranking: 0.0
# 2149 Y: The Last Man, Vol. 7: Paper Dolls Rating: nan . Ranking: 0.0
# 2150 I Know Who You Are Rating: nan . Ranking: 0.0
# 2151 The Tortilla Curtain Rating: nan . Ranking: 0.0
# 2152 Jude the Obscure Rating: nan . Ranking: 0.0
# 2153 Ignite Me
        (Shatter Me, #3) Rating: nan . Ranking: 0.0
# 2154 A Good Marriage Rating: nan . Ranking: 0.0
# 2155 Bury Your Dead
        (Chief Inspector Armand Gamache, #6) Rating: nan . Ranking: 0.0
# 2156 The Boy in the Field Rating: nan . Ranking: 0.0
# 2157 The Night We Lost Him Rating: n

        (Amgash, #5) Rating: nan . Ranking: 0.0
# 2368 Home Fire Rating: nan . Ranking: 0.0
# 2369 The End
        (A Series of Unfortunate Events, #13) Rating: nan . Ranking: 0.0
# 2370 When All Is Said Rating: nan . Ranking: 0.0
# 2371 The Silent Wife Rating: nan . Ranking: 0.0
# 2372 The Chaperone Rating: nan . Ranking: 0.0
# 2373 Magnolia Parks
        (Magnolia Parks Universe, #1) Rating: nan . Ranking: 0.0
# 2374 Last Sacrifice
        (Vampire Academy, #6) Rating: nan . Ranking: 0.0
# 2375 The Grownup Rating: nan . Ranking: 0.0
# 2376 I Know This Much Is True Rating: nan . Ranking: 0.0
# 2377 Faking Christmas Rating: nan . Ranking: 0.0
# 2378 The Well of Ascension
        (Mistborn, #2) Rating: nan . Ranking: 0.0
# 2379 Open House Rating: nan . Ranking: 0.0
# 2380 The Happiness Project Rating: nan . Ranking: 0.0
# 2381 Pachinko Rating: nan . Ranking: 0.0
# 2382 House of Earth and Blood
        (Crescent City, #1) Rating: nan . Ranking: 0.0
# 2383 The Stand: Captain Trips Rating:

        (Silo, #2) Rating: nan . Ranking: 0.0
# 2584 Elantris
        (Elantris, #1) Rating: nan . Ranking: 0.0
# 2585 The Battle of Corrin
        (Legends of Dune, #3) Rating: nan . Ranking: 0.0
# 2586 The Eyre Affair
        (Thursday Next, #1) Rating: nan . Ranking: 0.0
# 2587 Bird by Bird Rating: nan . Ranking: 0.0
# 2588 The Three-Body Problem
        (Remembrance of Earthâ€™s Past, #1) Rating: nan . Ranking: 0.0
# 2589 The Affair
        (Jack Reacher, #16) Rating: nan . Ranking: 0.0
# 2590 Bone Crossed
        (Mercy Thompson, #4) Rating: nan . Ranking: 0.0
# 2591 The Other Woman Rating: nan . Ranking: 0.0
# 2592 The Life We Bury
        (Joe Talbert, #1; Detective Max Rupert, #1) Rating: nan . Ranking: 0.0
# 2593 War Rating: nan . Ranking: 0.0
# 2594 Alone
        (Detective D.D. Warren, #1) Rating: nan . Ranking: 0.0
# 2595 Chain-Gang All-Stars Rating: nan . Ranking: 0.0
# 2596 Manhattan Beach Rating: nan . Ranking: 0.0
# 2597 The Decameron Rating: nan . Ranking: 0.0
# 2598 C

# 2806 I Must Betray You Rating: nan . Ranking: 0.0
# 2807 Gone Tomorrow
        (Jack Reacher, #13) Rating: nan . Ranking: 0.0
# 2808 Bloody Bones
        (Anita Blake, Vampire Hunter, #5) Rating: nan . Ranking: 0.0
# 2809 One Plus One Rating: nan . Ranking: 0.0
# 2810 The Last One at the Wedding Rating: nan . Ranking: 0.0
# 2811 Troubles in Paradise
        (Paradise, #3) Rating: nan . Ranking: 0.0
# 2812 The Latecomer Rating: nan . Ranking: 0.0
# 2813 One Grave at a Time
        (Night Huntress, #6) Rating: nan . Ranking: 0.0
# 2814 Rising Sun Rating: nan . Ranking: 0.0
# 2815 Iona Iverson's Rules for Commuting Rating: nan . Ranking: 0.0
# 2816 Antony and Cleopatra Rating: nan . Ranking: 0.0
# 2817 Strange Sally Diamond Rating: nan . Ranking: 0.0
# 2818 Escape from Warsaw Rating: nan . Ranking: 0.0
# 2819 Stiff: The Curious Lives of Human Cadavers Rating: nan . Ranking: 0.0
# 2820 Ringworld
        (Ringworld, #1) Rating: nan . Ranking: 0.0
# 2821 Deacon King Kong Rating: nan . Rank

        (To All the Boys I've Loved Before, #2) Rating: nan . Ranking: 0.0
# 3028 Get a Life, Chloe Brown
        (The Brown Sisters, #1) Rating: nan . Ranking: 0.0
# 3029 Jitterbug Perfume Rating: nan . Ranking: 0.0
# 3030 Special Topics in Calamity Physics Rating: nan . Ranking: 0.0
# 3031 A Study in Scarlet
        (Sherlock Holmes, #1) Rating: nan . Ranking: 0.0
# 3032 Parable of the Talents
        (Earthseed, #2) Rating: nan . Ranking: 0.0
# 3033 I'll Give You the Sun Rating: nan . Ranking: 0.0
# 3034 The Light We Lost Rating: nan . Ranking: 0.0
# 3035 Rikki-Tikki-Tavi Rating: nan . Ranking: 0.0
# 3036 Dark Desires After Dusk
        (Immortals After Dark, #5) Rating: nan . Ranking: 0.0
# 3037 The Book of Doors Rating: nan . Ranking: 0.0
# 3038 The Lacuna Rating: nan . Ranking: 0.0
# 3039 The Guest Rating: nan . Ranking: 0.0
# 3040 The Moon Is a Harsh Mistress Rating: nan . Ranking: 0.0
# 3041 Dreamcatcher Rating: nan . Ranking: 0.0
# 3042 I See You Rating: nan . Ranking: 0.0
# 3

# 3252 Collected Stories Rating: nan . Ranking: 0.0
# 3253 The Deal
        (Off-Campus, #1) Rating: nan . Ranking: 0.0
# 3254 We Begin at the End Rating: nan . Ranking: 0.0
# 3255 Steelheart
        (The Reckoners, #1) Rating: nan . Ranking: 0.0
# 3256 Between Shades of Gray Rating: nan . Ranking: 0.0
# 3257 The Cuckoo's Calling
        (Cormoran Strike, #1) Rating: nan . Ranking: 0.0
# 3258 The Secret
        (The Secret, #1) Rating: nan . Ranking: 0.0
# 3259 Escape from Camp 14: One Man's Remarkable Odyssey from North Korea to Freedom in the West Rating: nan . Ranking: 0.0
# 3260 Nickel and Dimed: On (Not) Getting By in America Rating: nan . Ranking: 0.0
# 3261 The Broken Girls Rating: nan . Ranking: 0.0
# 3262 Song of Solomon Rating: nan . Ranking: 0.0
# 3263 Oh, the Places Youâ€™ll Go! Rating: nan . Ranking: 0.0
# 3264 The Valley of Amazement Rating: nan . Ranking: 0.0
# 3265 The Soulmate Rating: nan . Ranking: 0.0
# 3266 Strega Nona Rating: nan . Ranking: 0.0
# 3267 The History o

# 3486 The Last Days of Night Rating: nan . Ranking: 0.0
# 3487 The Pearl Rating: nan . Ranking: 0.0
# 3488 Saints for All Occasions Rating: nan . Ranking: 0.0
# 3489 We'll Always Have Summer
        (Summer #3) Rating: nan . Ranking: 0.0
# 3490 Farmer Boy
        (Little House, #2) Rating: nan . Ranking: 0.0
# 3491 Sycamore Row
        (Jake Brigance, #2) Rating: nan . Ranking: 0.0
# 3492 The Happy Ever After Playlist
        (The Friend Zone, #2) Rating: nan . Ranking: 0.0
# 3493 Frost Burned
        (Mercy Thompson, #7) Rating: nan . Ranking: 0.0
# 3494 Swamplandia! Rating: nan . Ranking: 0.0
# 3495 Pretty Girls Rating: nan . Ranking: 0.0
# 3496 French Women Don't Get Fat: The Secret of Eating for Pleasure Rating: nan . Ranking: 0.0
# 3497 The Song of Achilles Rating: nan . Ranking: 0.0
# 3498 If You Would Have Told Me Rating: nan . Ranking: 0.0
# 3499 Unlikely Animals Rating: nan . Ranking: 0.0
# 3500 Someone We Know Rating: nan . Ranking: 0.0
# 3501 Olive, Again
        (Olive Kit

# 3716 The Night Watchman Rating: nan . Ranking: 0.0
# 3717 'Tis Rating: nan . Ranking: 0.0
# 3718 How to Eat Fried Worms Rating: nan . Ranking: 0.0
# 3719 The Search Rating: nan . Ranking: 0.0
# 3720 Not My Father's Son Rating: nan . Ranking: 0.0
# 3721 The Black Echo
        (Harry Bosch, #1; Harry Bosch Universe, #1) Rating: nan . Ranking: 0.0
# 3722 Look Alive Twenty-Five
        (Stephanie Plum, #25) Rating: nan . Ranking: 0.0
# 3723 The Day the World Came to Town: 9/11 in Gander, Newfoundland Rating: nan . Ranking: 0.0
# 3724 The Collector Rating: nan . Ranking: 0.0
# 3725 The Warm Hands of Ghosts Rating: nan . Ranking: 0.0
# 3726 Hunting Adeline
        (Cat and Mouse, #2) Rating: nan . Ranking: 0.0
# 3727 Fool's Errand
        (Tawny Man, #1) Rating: nan . Ranking: 0.0
# 3728 Fables, Vol. 6: Homelands Rating: nan . Ranking: 0.0
# 3729 Two Little Girls in Blue Rating: nan . Ranking: 0.0
# 3730 The Indigo Girl Rating: nan . Ranking: 0.0
# 3731 The Magician's Assistant Rating: nan

# 3945 The German Wife Rating: nan . Ranking: 0.0
# 3946 The Firm
        (The Firm, #1) Rating: nan . Ranking: 0.0
# 3947 Winter Garden Rating: nan . Ranking: 0.0
# 3948 Utopia Rating: nan . Ranking: 0.0
# 3949 The Long Walk Rating: nan . Ranking: 0.0
# 3950 Inheritance: A Memoir of Genealogy, Paternity, and Love Rating: nan . Ranking: 0.0
# 3951 Twelve Years a Slave Rating: nan . Ranking: 0.0
# 3952 Dear Child Rating: nan . Ranking: 0.0
# 3953 Hop On Pop Rating: nan . Ranking: 0.0
# 3954 Eleven Minutes Rating: nan . Ranking: 0.0
# 3955 Soulless
        (Parasol Protectorate, #1) Rating: nan . Ranking: 0.0
# 3956 In Five Years Rating: nan . Ranking: 0.0
# 3957 The Man Who Died Twice
        (Thursday Murder Club, #2) Rating: nan . Ranking: 0.0
# 3958 Orphan X
        (Orphan X, #1) Rating: nan . Ranking: 0.0
# 3959 The Book of Speculation Rating: nan . Ranking: 0.0
# 3960 Good to Great: Why Some Companies Make the Leap... and Others Don't Rating: nan . Ranking: 0.0
# 3961 Severance Ra

# 4174 Chronicle of a Death Foretold Rating: nan . Ranking: 0.0
# 4175 The Penultimate Peril
        (A Series of Unfortunate Events, #12) Rating: nan . Ranking: 0.0
# 4176 Cryptonomicon Rating: nan . Ranking: 0.0
# 4177 Cathedral Rating: nan . Ranking: 0.0
# 4178 The Lost Boy
        (Dave Pelzer #2) Rating: nan . Ranking: 0.0
# 4179 Day Rating: nan . Ranking: 0.0
# 4180 Edge of Eternity
        (The Century Trilogy, #3) Rating: nan . Ranking: 0.0
# 4181 Know My Name Rating: nan . Ranking: 0.0
# 4182 Crossroads of Twilight
        (The Wheel of Time, #10) Rating: nan . Ranking: 0.0
# 4183 Frostbite
        (Vampire Academy, #2) Rating: nan . Ranking: 0.0
# 4184 Lady in the Lake Rating: nan . Ranking: 0.0
# 4185 Gabriel's Rapture
        (Gabriel's Inferno, #2) Rating: nan . Ranking: 0.0
# 4186 Jar of Hearts Rating: nan . Ranking: 0.0
# 4187 Cross Fire
        (Alex Cross, #17) Rating: nan . Ranking: 0.0
# 4188 The Canterbury Tales Rating: nan . Ranking: 0.0
# 4189 Exit West Rating: na

# 4400 The Lions of Fifth Avenue Rating: nan . Ranking: 0.0
# 4401 A Constellation of Vital Phenomena Rating: nan . Ranking: 0.0
# 4402 Aquarium Rating: nan . Ranking: 0.0
# 4403 The Epic of Gilgamesh Rating: nan . Ranking: 0.0
# 4404 Swimsuit Rating: nan . Ranking: 0.0
# 4405 People of the Lie: The Hope for Healing Human Evil Rating: nan . Ranking: 0.0
# 4406 How the Light Gets In
        (Chief Inspector Armand Gamache, #9) Rating: nan . Ranking: 0.0
# 4407 The Five Love Languages: How to Express Heartfelt Commitment to Your Mate Rating: nan . Ranking: 0.0
# 4408 The Blue Bistro Rating: nan . Ranking: 0.0
# 4409 The Book Woman of Troublesome Creek
        (The Book Woman of Troublesome Creek, #1) Rating: nan . Ranking: 0.0
# 4410 Chronicles, Volume One Rating: nan . Ranking: 0.0
# 4411 The One Minute Manager Rating: nan . Ranking: 0.0
# 4412 Kushiel's Chosen
        (PhÃ¨dre's Trilogy, #2) Rating: nan . Ranking: 0.0
# 4413 Morality for Beautiful Girls
        (No. 1 Ladies' Detective

        (Dollanganger, #3) Rating: nan . Ranking: 0.0
# 4628 The Judge's List
        (The Whistler, #2) Rating: nan . Ranking: 0.0
# 4629 Lover Eternal
        (Black Dagger Brotherhood, #2) Rating: nan . Ranking: 0.0
# 4630 Make Your Bed: Little Things That Can Change Your Life...And Maybe the World Rating: nan . Ranking: 0.0
# 4631 Kitchen Rating: nan . Ranking: 0.0
# 4632 Under the Dome Rating: nan . Ranking: 0.0
# 4633 Lasher
        (Lives of the Mayfair Witches, #2) Rating: nan . Ranking: 0.0
# 4634 Where Men Win Glory: The Odyssey of Pat Tillman Rating: nan . Ranking: 0.0
# 4635 The Nothing Man Rating: nan . Ranking: 0.0
# 4636 Camino Island
        (Camino Island, #1) Rating: nan . Ranking: 0.0
# 4637 The Maid's Diary Rating: nan . Ranking: 0.0
# 4638 The Stonekeeper
        (Amulet, #1) Rating: nan . Ranking: 0.0
# 4639 The Stranger Beside Me: Ted Bundy: The Shocking Inside Story Rating: nan . Ranking: 0.0
# 4640 Birthday Girl Rating: nan . Ranking: 0.0
# 4641 The Beach House

In [13]:
#give a list sorted out with books you've already read:
# sorted_indices = np.argsort(pred_ratings_list)[::-1]
print("Top books are:")
list_num = 1
for idx in sorted_indices: 
#     print("ratings_matrix[user_id, idx]= ", ratings_matrix[user_id, idx])
    if (ratings_matrix[user_id, idx] > 0) or (np.isnan(pred_ratings_list[idx])):
        continue
    print("#", list_num , titles[idx], "Rating:", round(pred_ratings_list[idx], 1))
    list_num += 1

Top books are:
# 1 Harry Potter and the Sorcerer's Stone
        (Harry Potter, #1) Rating: 4.6
# 2 The da Vinci Code
        (Robert Langdon, #2) Rating: 4.2
# 3 Eclipse
        (The Twilight Saga, #3) Rating: 4.2
# 4 Pride and Prejudice Rating: 4.3
# 5 The Great Gatsby Rating: 3.5
# 6 Breaking Dawn
        (The Twilight Saga, #4) Rating: 4.1
# 7 The Hobbit, or There and Back Again
        (The Lord of the Rings, #0) Rating: 4.5
# 8 The Kite Runner Rating: 4.5
# 9 The Help Rating: 4.8
# 10 The Diary of a Young Girl Rating: 4.7
# 11 The Fellowship of the Ring
        (The Lord of the Rings, #1) Rating: 4.0
# 12 Lord of the Flies Rating: 3.3
# 13 Of Mice and Men Rating: 3.6
# 14 1984 Rating: 3.9
# 15 The Lion, the Witch and the Wardrobe
        (Chronicles of Narnia, #1) Rating: 4.3
# 16 The Lovely Bones Rating: 3.4
# 17 Angels & Demons
        (Robert Langdon, #1) Rating: 4.4
# 18 Little Women Rating: 3.4
# 19 The Host
        (The Host, #1) Rating: 4.7
# 20 The Time Traveler's Wife Ra

# 192 Clockwork Angel
        (The Infernal Devices, #1) Rating: 4.5
# 193 Summer Sisters Rating: 4.5
# 194 Passion
        (Fallen, #3) Rating: 4.5
# 195 Little House on the Prairie
        (Little House, #3) Rating: 4.5
# 196 World War Z: An Oral History of the Zombie War Rating: 4.5
# 197 The Icebound Land
        (Ranger's Apprentice, #3) Rating: 4.5
# 198 Where the Wild Things Are Rating: 4.5
# 199 The Lost City of Faar
        (Pendragon, #2) Rating: 4.5
# 200 My Sisterâ€™s Keeper Rating: 4.5
# 201 The Other Boleyn Girl
        (The Plantagenet and Tudor Novels, #9) Rating: 4.5
# 202 Persuasion Rating: 4.5
# 203 The Alchemyst
        (The Secrets of the Immortal Nicholas Flamel, #1) Rating: 4.5
# 204 Atomic Habits: An Easy & Proven Way to Build Good Habits & Break Bad Ones Rating: 4.5
# 205 We Should All Be Feminists Rating: 4.5
# 206 Anya's Ghost Rating: 4.5
# 207 A Short History of Nearly Everything Rating: 4.5
# 208 The Color Purple Rating: 3.0
# 209 Girl in Translation Rating

# 387 A Wizard of Earthsea
        (Earthsea Cycle, #1) Rating: 5.0
# 388 Days Gone Bye
        (The Walking Dead, #1) Rating: 5.0
# 389 The Silence of the Lambs
        (Hannibal Lecter, #2) Rating: 5.0
# 390 Dead Ever After
        (Sookie Stackhouse, #13) Rating: 5.0
# 391 The High King
        (The Chronicles of Prydain, #5) Rating: 5.0
# 392 The Mouse and the Motorcycle
        (Ralph S. Mouse, #1) Rating: 5.0
# 393 Maus: A Survivor's Tale II: And Here My Troubles Began
        (Maus, #2) Rating: 5.0
# 394 Wish You Well Rating: 5.0
# 395 Anne of the Island
        (Anne of Green Gables, #3) Rating: 5.0
# 396 Dracula Rating: 5.0
# 397 Anne of Green Gables
        (Anne of Green Gables, #1) Rating: 5.0
# 398 Cujo Rating: 5.0
# 399 The Long Winter
        (Little House, #6) Rating: 5.0
# 400 Go Ask Alice Rating: 5.0
# 401 Stone Cold
        (The Camel Club, #3) Rating: 5.0
# 402 Ruthless Vows
        (Letters of Enchantment, #2) Rating: 5.0
# 403 Blueberries for Sal Rating: 5.0
# 404

# 678 Song of Susannah
        (The Dark Tower, #6) Rating: 4.0
# 679 J.R.R. Tolkien 4-Book Boxed Set: The Hobbit and The Lord of the Rings Rating: 4.0
# 680 Sex, Drugs, and Cocoa Puffs: A Low Culture Manifesto Rating: 4.0
# 681 The Cask of Amontillado Rating: 4.0
# 682 Savannah Blues
        (Weezie and Bebe Mysteries, #1) Rating: 4.0
# 683 Treasure Island Rating: 4.0
# 684 Chicka Chicka Boom Boom Rating: 4.0
# 685 The Importance of Being Earnest Rating: 4.0
# 686 Shanghai Girls
        (Shanghai Girls, #1) Rating: 4.0
# 687 If You Give a Mouse a Cookie Rating: 4.0
# 688 Gone for Good Rating: 4.0
# 689 Little Dorrit Rating: 4.0
# 690 Shadow and Bone
        (The Shadow and Bone Trilogy, #1) Rating: 4.0
# 691 The 6th Target
        (Women's Murder Club, #6) Rating: 4.0
# 692 My Ãntonia Rating: 4.0
# 693 Handle with Care Rating: 4.0
# 694 Into the Water Rating: 4.0
# 695 And Then There Were None Rating: 4.0
# 696 The Last Straw
        (Diary of a Wimpy Kid, #3) Rating: 4.0
# 697 Cold 

In [14]:
idx = 8
this_ratings = ratings_matrix[indices[0,idx]]
print(this_ratings)

my_ratings = ratings_matrix[indices[0,0]]
print(my_ratings)

for i, rating in enumerate(this_ratings):
    if rating > 0 and my_ratings[i]>0:
        print("-", titles[i], ", their Rating:", rating, " My Rating:", my_ratings[i])

# print(distances, indices[-1,-1])

[0. 5. 0. ... 0. 0. 0.]
[0. 5. 0. ... 0. 0. 0.]
- Divergent
        (Divergent, #1) , their Rating: 5.0  My Rating: 5.0
- Catching Fire
        (The Hunger Games, #2) , their Rating: 5.0  My Rating: 5.0
- Harry Potter and the Half-Blood Prince
        (Harry Potter, #6) , their Rating: 5.0  My Rating: 5.0
- Princess Academy
        (Princess Academy, #1) , their Rating: 5.0  My Rating: 4.0
- Harry Potter and the Prisoner of Azkaban
        (Harry Potter, #3) , their Rating: 5.0  My Rating: 5.0
- Harry Potter and the Deathly Hallows
        (Harry Potter, #7) , their Rating: 5.0  My Rating: 5.0
- Twilight
        (The Twilight Saga, #1) , their Rating: 5.0  My Rating: 3.0
- Mockingjay
        (The Hunger Games, #3) , their Rating: 4.0  My Rating: 5.0
- New Moon
        (The Twilight Saga, #2) , their Rating: 5.0  My Rating: 3.0
- Harry Potter and the Chamber of Secrets
        (Harry Potter, #2) , their Rating: 5.0  My Rating: 5.0
- Harry Potter and the Order of the Phoenix
        (Har

In [15]:
#find most similar books using cosine similarity
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_similarity

similarity_matrix = cosine_similarity(ratings_matrix.T)

similarity_df = pd.DataFrame(similarity_matrix, index=titles, columns=titles)

# Function to get k nearest neighbors for a movie
def get_similar_book(book_name, k=3):
    similar_book = similarity_df[book_name].sort_values(ascending=False)[1:k+1]
    return similar_book

book_name = 'First Lie Wins'
print("\nTop 5 similar book to", book_name, ":")
print(get_similar_book(book_name, k=5))


Top 5 similar book to First Lie Wins :
Listen for the Lie             0.627657
Darling Girls                  0.627624
She's Not Sorry                0.616339
The Last One at the Wedding    0.572488
The Fury                       0.567387
Name: First Lie Wins, dtype: float64


In [16]:
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.impute import SimpleImputer

# Example user rating data (rows = users, columns = items)
ratings_df = pd.DataFrame(ratings)

# Step 1: Handle missing values using imputation (we will fill missing values with the mean rating)
imputer = SimpleImputer(strategy='mean')
ratings_filled = imputer.fit_transform(ratings_df)

# Step 2: Apply KMeans clustering
# We will use 2 clusters as an example
kmeans = KMeans(n_clusters=int(num_users/10), random_state=42)
clusters = kmeans.fit_predict(ratings_filled)

# Step 3: Add the cluster labels to the original DataFrame
ratings_df['Cluster'] = clusters

# # Print the user ratings with cluster assignments
# print("\nUser Ratings with Clusters:")
# print(ratings_df)

# # Step 4: Print the cluster centers (the centroid of each cluster)
# print("\nCluster Centers (Centroids):")
# print(kmeans.cluster_centers_)


user_id = 0
# print(clusters)

cluster_this_user = clusters[user_id]
# print(cluster_this_user)

pred_ratings_list = kmeans.cluster_centers_[user_id]
# print(pred_ratings_list)
# for i in range(len())

#give a list sorted out with books you've already read:
sorted_indices = np.argsort(pred_ratings_list)[::-1]
print("Top books are:")
list_num = 1
for idx in sorted_indices: 
#     print("ratings_matrix[user_id, idx]= ", ratings_matrix[user_id, idx])
    if (ratings_matrix[user_id, idx] > 0) or (np.isnan(pred_ratings_list[idx])) :
        continue
    print("#", list_num , titles[idx], "Rating:", round(pred_ratings_list[idx], 1))
    list_num += 1



Top books are:
# 1 Small Great Things Rating: 5.0
# 2 The Family Upstairs
        (The Family Upstairs, #1) Rating: 5.0
# 3 Unbroken: A World War II Story of Survival, Resilience and Redemption Rating: 5.0
# 4 The Great Alone Rating: 5.0
# 5 Where the Crawdads Sing Rating: 5.0
# 6 The Good Daughter
        (Good Daughter, #1) Rating: 5.0
# 7 The Simple Wild
        (Wild, #1) Rating: 5.0
# 8 Too Late Rating: 5.0
# 9 The Unhoneymooners
        (Unhoneymooners, #1) Rating: 5.0
# 10 The Stolen Marriage Rating: 5.0
# 11 The Life We Bury
        (Joe Talbert, #1; Detective Max Rupert, #1) Rating: 5.0
# 12 Big Little Lies Rating: 5.0
# 13 The Extraordinary Life of Sam Hell Rating: 5.0
# 14 The Pact Rating: 5.0
# 15 Untamed Rating: 5.0
# 16 The Last Mrs. Parrish Rating: 5.0
# 17 The One Hundred Years of Lenni and Margot Rating: 5.0
# 18 I Found You Rating: 5.0
# 19 The Red Tent Rating: 5.0
# 20 28 Summers Rating: 5.0
# 21 Then She Was Gone Rating: 5.0
# 22 Heart Bones Rating: 5.0
# 23 Ugly Lo

        (Tracy Crosswhite, #1) Rating: 0.0
# 263 The Diamond Throne
        (The Elenium, #1) Rating: 0.0
# 264 Inheritance
        (The Lost Bride Trilogy, #1) Rating: 0.0
# 265 The Mayor of Casterbridge Rating: 0.0
# 266 Catch-22 Rating: 0.0
# 267 Brave New World and Brave New World Revisited Rating: 0.0
# 268 The Shadows Rating: 0.0
# 269 Tweak: Growing Up On Methamphetamines Rating: 0.0
# 270 The No-Show Rating: 0.0
# 271 The Atlantis Complex
        (Artemis Fowl, #7) Rating: 0.0
# 272 Life After Life
        (Todd Family, #1) Rating: 0.0
# 273 Euphoria Rating: 0.0
# 274 Midnight Sun [2008 Draft] Rating: 0.0
# 275 The Merchant of Venice Rating: 0.0
# 276 Empire of the Summer Moon Rating: 0.0
# 277 The Serpent and the Wings of Night
        (Crowns of Nyaxia, #1) Rating: 0.0
# 278 Catherine the Great: Portrait of a Woman Rating: 0.0
# 279 A Grief Observed Rating: 0.0
# 280 The Chalk Man Rating: 0.0
# 281 The Path of Daggers
        (The Wheel of Time, #8) Rating: 0.0
# 282 The Midn

        (Shatter Me, #1.5) Rating: 0.0
# 538 After Annie Rating: 0.0
# 539 Under the Whispering Door Rating: 0.0
# 540 Thinking, Fast and Slow Rating: 0.0
# 541 Flowers in the Attic
        (Dollanganger, #1) Rating: 0.0
# 542 Kiss an Angel Rating: 0.0
# 543 Shelter Rating: 0.0
# 544 Bone: The Complete Edition Rating: 0.0
# 545 Tell No One Rating: 0.0
# 546 The Prophet Rating: 0.0
# 547 Crying in H Mart Rating: 0.0
# 548 Major Pettigrew's Last Stand Rating: 0.0
# 549 Labor Day Rating: 0.0
# 550 The Sentence Rating: 0.0
# 551 Anne's House of Dreams
        (Anne of Green Gables, #5) Rating: 0.0
# 552 The Valley of Horses
        (Earth's Children, #2) Rating: 0.0
# 553 The Murmur of Bees Rating: 0.0
# 554 With Any Luck
        (The Improbable Meet-Cute, #5) Rating: 0.0
# 555 The Sellout Rating: 0.0
# 556 Terms and Conditions
        (Dreamland Billionaires, #2) Rating: 0.0
# 557 The Wind-Up Bird Chronicle Rating: 0.0
# 558 Ragtime Rating: 0.0
# 559 Blackwood Farm
        (The Vampire Ch

        (On the Island, #1) Rating: 0.0
# 841 The Complete Persepolis Rating: 0.0
# 842 Eye of the Needle Rating: 0.0
# 843 One Foot in the Grave
        (Night Huntress, #2) Rating: 0.0
# 844 Are You There, Vodka? It's Me, Chelsea Rating: 0.0
# 845 Destiny of the Republic: A Tale of Madness, Medicine and the Murder of a President Rating: 0.0
# 846 Sword of Destiny
        (The Witcher, #0.7) Rating: 0.0
# 847 The Beginner's Goodbye Rating: 0.0
# 848 Before the Fall Rating: 0.0
# 849 David Copperfield Rating: 0.0
# 850 The Only Woman in the Room Rating: 0.0
# 851 We Were the Lucky Ones Rating: 0.0
# 852 Fried Green Tomatoes at the Whistle Stop Cafe Rating: 0.0
# 853 Dragonsong
        (Harper Hall, #1) Rating: 0.0
# 854 The Three-Body Problem
        (Remembrance of Earth’s Past, #1) Rating: 0.0
# 855 Crossed
        (Matched, #2) Rating: 0.0
# 856 The Wide Window
        (A Series of Unfortunate Events, #3) Rating: 0.0
# 857 What the Wind Knows Rating: 0.0
# 858 Gone Girl Rating: 0.0


# 1149 The Metamorphosis and Other Stories Rating: 0.0
# 1150 Heart of the Matter Rating: 0.0
# 1151 Narcissus in Chains
        (Anita Blake, Vampire Hunter, #10) Rating: 0.0
# 1152 Unseen
        (Will Trent, #7) Rating: 0.0
# 1153 The Castle of Llyr
        (The Chronicles of Prydain, #3) Rating: 0.0
# 1154 The Thousand Autumns of Jacob de Zoet Rating: 0.0
# 1155 Far From the Madding Crowd Rating: 0.0
# 1156 Plum Island
        (John Corey, #1) Rating: 0.0
# 1157 The Leftovers Rating: 0.0
# 1158 The House We Grew Up In Rating: 0.0
# 1159 The Origin of Species Rating: 0.0
# 1160 It's One of Us Rating: 0.0
# 1161 Fool's Fate
        (Tawny Man, #3) Rating: 0.0
# 1162 The Outsider Rating: 0.0
# 1163 Fahrenheit 451 Rating: 0.0
# 1164 The Word is Murder
        (Hawthorne & Horowitz, #1) Rating: 0.0
# 1165 All That Remains
        (Kay Scarpetta, #3) Rating: 0.0
# 1166 Desperation Rating: 0.0
# 1167 The Lunatic Cafe
        (Anita Blake, Vampire Hunter, #4) Rating: 0.0
# 1168 Never Lie R

# 1466 Robin Rating: 0.0
# 1467 Fall of Giants
        (The Century Trilogy, #1) Rating: 0.0
# 1468 The Lost Colony
        (Artemis Fowl, #5) Rating: 0.0
# 1469 Daughter of Fortune Rating: 0.0
# 1470 For One More Day Rating: 0.0
# 1471 The Story of Babar
        (Babar, #1) Rating: 0.0
# 1472 The Sociopath Next Door Rating: 0.0
# 1473 Eat, Pray, Love Rating: 0.0
# 1474 Prodigal Summer Rating: 0.0
# 1475 The Angel Maker Rating: 0.0
# 1476 The Best of Me Rating: 0.0
# 1477 The Lake House Rating: 0.0
# 1478 The Book of Laughter and Forgetting Rating: 0.0
# 1479 Flyte
        (Septimus Heap, #2) Rating: 0.0
# 1480 The Elegance of the Hedgehog Rating: 0.0
# 1481 The Glass Castle Rating: 0.0
# 1482 A Darker Shade of Magic
        (Shades of Magic, #1) Rating: 0.0
# 1483 Spirit Bound
        (Vampire Academy, #5) Rating: 0.0
# 1484 Inkheart
        (Inkworld, #1) Rating: 0.0
# 1485 The Handmaid’s Tale
        (The Handmaid's Tale, #1) Rating: 0.0
# 1486 Look Closer Rating: 0.0
# 1487 Worth D

# 1792 The Reptile Room
        (A Series of Unfortunate Events, #2) Rating: 0.0
# 1793 The Birth House Rating: 0.0
# 1794 The Collectors
        (The Camel Club, #2) Rating: 0.0
# 1795 The Shining Rating: 0.0
# 1796 Preacher, Volume 1: Gone to Texas Rating: 0.0
# 1797 The Twilight Saga Rating: 0.0
# 1798 Thunderhead
        (Nora Kelly, #0A) Rating: 0.0
# 1799 The Borrowers
        (The Borrowers, #1) Rating: 0.0
# 1800 The Exiles Rating: 0.0
# 1801 I Shall Wear Midnight
        (Discworld, #38; Tiffany Aching, #4) Rating: 0.0
# 1802 Daughter of No Worlds
        (The War of Lost Hearts, #1) Rating: 0.0
# 1803 The German Midwife Rating: 0.0
# 1804 Earth Awakens
        (The First Formic War, #3) Rating: 0.0
# 1805 The Apprentice
        (Rizzoli & Isles, #2) Rating: 0.0
# 1806 Seveneves Rating: 0.0
# 1807 Call Your Daughter Home Rating: 0.0
# 1808 The Elfstones of Shannara
        (The Original Shannara Trilogy, #2) Rating: 0.0
# 1809 The Iron Daughter
        (The Iron Fey, #2) Ratin

# 2092 The Enchanted Rating: 0.0
# 2093 Beauty Queens Rating: 0.0
# 2094 Charlie and the Chocolate Factory
        (Charlie Bucket, #1) Rating: 0.0
# 2095 The Deep Rating: 0.0
# 2096 The Witch Elm Rating: 0.0
# 2097 Winter in Paradise
        (Paradise, #1) Rating: 0.0
# 2098 The Metamorphosis Rating: 0.0
# 2099 The Cruelest Month
        (Chief Inspector Armand Gamache, #3) Rating: 0.0
# 2100 Binti
        (Binti, #1) Rating: 0.0
# 2101 Weapons of Math Destruction: How Big Data Increases Inequality and Threatens Democracy Rating: 0.0
# 2102 Intermezzo Rating: 0.0
# 2103 Secretly Yours
        (A Vine Mess, #1) Rating: 0.0
# 2104 The Gathering Rating: 0.0
# 2105 Matrix Rating: 0.0
# 2106 The Berry Pickers Rating: 0.0
# 2107 Sam Rating: 0.0
# 2108 Anne Rice's The Vampire Lestat: A Graphic Novel Rating: 0.0
# 2109 Side Jobs
        (The Dresden Files, #12.5) Rating: 0.0
# 2110 Say You're Sorry
        (Morgan Dane, #1) Rating: 0.0
# 2111 A Bend in the Road Rating: 0.0
# 2112 All American

# 2400 Mayflower: A Story of Courage, Community, and War Rating: 0.0
# 2401 Demon from the Dark
        (Immortals After Dark, #9) Rating: 0.0
# 2402 A Widow for One Year Rating: 0.0
# 2403 North Woods Rating: 0.0
# 2404 The Book of Longings Rating: 0.0
# 2405 Ward D Rating: 0.0
# 2406 Wonder
        (Wonder, #1) Rating: 0.0
# 2407 Dawn on a Distant Shore
        (Wilderness, #2) Rating: 0.0
# 2408 Make Me
        (Jack Reacher, #20) Rating: 0.0
# 2409 The Guilt Trip Rating: 0.0
# 2410 Walking Disaster
        (Beautiful, #2) Rating: 0.0
# 2411 The Chain Rating: 0.0
# 2412 Last Argument of Kings
        (The First Law, #3) Rating: 0.0
# 2413 Sweetbitter Rating: 0.0
# 2414 The Doors of Perception & Heaven and Hell Rating: 0.0
# 2415 The Man Who Mistook His Wife for a Hat and Other Clinical Tales Rating: 0.0
# 2416 Through the Looking-Glass and What Alice Found There
        (Alice's Adventures in Wonderland, #2) Rating: 0.0
# 2417 On a Quiet Street Rating: 0.0
# 2418 The Sanatorium
    

# 2714 Tick Tock
        (Michael Bennett, #4) Rating: 0.0
# 2715 Live from New York: An Uncensored History of Saturday Night Live Rating: 0.0
# 2716 The Goal: A Process of Ongoing Improvement Rating: 0.0
# 2717 Marley and Me: Life and Love With the Worldâ€™s Worst Dog Rating: 0.0
# 2718 The Boy from the Woods
        (Wilde, #1) Rating: 0.0
# 2719 The Orphan Master's Son Rating: 0.0
# 2720 Betsy-Tacy
        (Betsy-Tacy, #1) Rating: 0.0
# 2721 The Silence of the Lambs
        (Hannibal Lecter, #2) Rating: 0.0
# 2722 Worst Case
        (Michael Bennett, #3) Rating: 0.0
# 2723 Oh, the Places You’ll Go! Rating: 0.0
# 2724 Clear Rating: 0.0
# 2725 Walden Rating: 0.0
# 2726 Geisha, a Life Rating: 0.0
# 2727 The Appeal Rating: 0.0
# 2728 My Murder Rating: 0.0
# 2729 Count Zero
        (Sprawl, #2) Rating: 0.0
# 2730 Fallen
        (Will Trent, #5) Rating: 0.0
# 2731 The Girl with the Louding Voice Rating: 0.0
# 2732 Eight Hundred Grapes Rating: 0.0
# 2733 Black-Eyed Susans Rating: 0.0
# 273

# 3022 Being Mortal: Medicine and What Matters in the End Rating: 0.0
# 3023 Snow Rating: 0.0
# 3024 Someone Knows My Name Rating: 0.0
# 3025 Th1rt3en
        (Eddie Flynn, #4) Rating: 0.0
# 3026 Raise High the Roof Beam, Carpenters & Seymour: An Introduction Rating: 0.0
# 3027 Like Water for Chocolate Rating: 0.0
# 3028 Rainbow Six
        (John Clark, #2; Jack Ryan Universe, #10) Rating: 0.0
# 3029 The Winter Soldier Rating: 0.0
# 3030 Drive Your Plow Over the Bones of the Dead Rating: 0.0
# 3031 Illusions: The Adventures of a Reluctant Messiah Rating: 0.0
# 3032 Chainfire
        (Sword of Truth, #9) Rating: 0.0
# 3033 The Untethered Soul: The Journey Beyond Yourself Rating: 0.0
# 3034 The Tell-Tale Heart and Other Writings Rating: 0.0
# 3035 I'm Glad My Mom Died Rating: 0.0
# 3036 The Chamber Rating: 0.0
# 3037 Clap When You Land Rating: 0.0
# 3038 Incidents Around the House Rating: 0.0
# 3039 The Indian in the Cupboard
        (The Indian in the Cupboard, #1) Rating: 0.0
# 3040 Lo

# 3342 The Hobbit Rating: 0.0
# 3343 Pop Goes the Weasel
        (Alex Cross, #5) Rating: 0.0
# 3344 The Wrong Side of Goodbye
        (Harry Bosch, #19; Harry Bosch Universe, #29) Rating: 0.0
# 3345 And Every Morning the Way Home Gets Longer and Longer Rating: 0.0
# 3346 The Tale of the Body Thief
        (The Vampire Chronicles, #4) Rating: 0.0
# 3347 Remarkable Creatures Rating: 0.0
# 3348 The Drawing of the Three
        (The Dark Tower, #2) Rating: 0.0
# 3349 The Merchant of Death
        (Pendragon, #1) Rating: 0.0
# 3350 Pictures of You Rating: 0.0
# 3351 A Princess of Mars
        (Barsoom, #1) Rating: 0.0
# 3352 Await Your Reply Rating: 0.0
# 3353 Muse of Nightmares
        (Strange the Dreamer, #2) Rating: 0.0
# 3354 The Complete Works Rating: 0.0
# 3355 After That Night
        (Will Trent, #11) Rating: 0.0
# 3356 The Lowland Rating: 0.0
# 3357 White Noise Rating: 0.0
# 3358 The Girls Rating: 0.0
# 3359 Absolute Power Rating: 0.0
# 3360 Razorblade Tears Rating: 0.0
# 3361 Th

        (Myron Bolitar, #2) Rating: 0.0
# 3672 Selected Poems Rating: 0.0
# 3673 Weyward Rating: 0.0
# 3674 The Stories of John Cheever Rating: 0.0
# 3675 Wow, No Thank You.: Essays Rating: 0.0
# 3676 At Home in Mitford
        (Mitford Years, #1) Rating: 0.0
# 3677 Finlay Donovan Jumps the Gun
        (Finlay Donovan, #3) Rating: 0.0
# 3678 The Ugly Truth
        (Diary of a Wimpy Kid, #5) Rating: 0.0
# 3679 The Elite
        (The Selection, #2) Rating: 0.0
# 3680 Ghost Story Rating: 0.0
# 3681 The Problem of Pain Rating: 0.0
# 3682 If You Ask Me Rating: 0.0
# 3683 The House Across the Lake Rating: 0.0
# 3684 A Dance with Dragons
        (A Song of Ice and Fire, #5) Rating: 0.0
# 3685 The Family Across the Street Rating: 0.0
# 3686 Wool
        (Wool, #1) Rating: 0.0
# 3687 The Nature of Fragile Things Rating: 0.0
# 3688 The Ghost Brigades
        (Old Man's War, #2) Rating: 0.0
# 3689 No One Can Know Rating: 0.0
# 3690 Knuffle Bunny: A Cautionary Tale
        (Knuffle Bunny, #1) Rati

# 3996 Lasher
        (Lives of the Mayfair Witches, #2) Rating: 0.0
# 3997 Under the Dome Rating: 0.0
# 3998 Fifty Shades Darker
        (Fifty Shades, #2) Rating: 0.0
# 3999 Notes on an Execution Rating: 0.0
# 4000 The Story of My Life Rating: 0.0
# 4001 Tess of the Dâ€™Urbervilles Rating: 0.0
# 4002 1776 Rating: 0.0
# 4003 These Silent Woods Rating: 0.0
# 4004 Good Omens: The Nice and Accurate Prophecies of Agnes Nutter, Witch Rating: 0.0
# 4005 Upgrade Rating: 0.0
# 4006 Lamb: The Gospel According to Biff, Christâ€™s Childhood Pal Rating: 0.0
# 4007 Anna Dressed in Blood
        (Anna, #1) Rating: 0.0
# 4008 Death of a Salesman Rating: 0.0
# 4009 A Stranger in the House Rating: 0.0
# 4010 Predictably Irrational: The Hidden Forces That Shape Our Decisions Rating: 0.0
# 4011 It's Not Summer Without You
        (Summer, #2) Rating: 0.0
# 4012 Bittersweet Rating: 0.0
# 4013 Born a Crime: Stories From a South African Childhood Rating: 0.0
# 4014 The Silver Chair
        (Chronicles of N

# 4304 The Strange Case of Dr. Jekyll and Mr. Hyde and Other Tales of Terror Rating: 0.0
# 4305 The Sirens of Titan Rating: 0.0
# 4306 The Christmas Guest Rating: 0.0
# 4307 Gideon the Ninth
        (The Locked Tomb, #1) Rating: 0.0
# 4308 The Color of Water: A Black Man's Tribute to His White Mother Rating: 0.0
# 4309 A Town Called Solace Rating: 0.0
# 4310 The Guncle
        (The Guncle, #1) Rating: 0.0
# 4311 Nothing to Envy: Ordinary Lives in North Korea Rating: 0.0
# 4312 A Painted House Rating: 0.0
# 4313 White Night
        (The Dresden Files, #9) Rating: 0.0
# 4314 Belladonna
        (Belladonna, #1) Rating: 0.0
# 4315 Childhoodâ€™s End Rating: 0.0
# 4316 Lock In
        (Lock In, #1) Rating: 0.0
# 4317 Clockwork Angel
        (The Infernal Devices, #1) Rating: 0.0
# 4318 The Mysterious Affair at Styles
        (Hercule Poirot, #1) Rating: 0.0
# 4319 The Emperor's Soul Rating: 0.0
# 4320 Small Mercies Rating: 0.0
# 4321 Possible Side Effects Rating: 0.0
# 4322 The Rime of the A

        (Space Odyssey, #1) Rating: 0.0
# 4616 The Power Rating: 0.0
# 4617 Rendezvous with Rama
        (Rama, #1) Rating: 0.0
# 4618 Ghost World Rating: 0.0
# 4619 Escape from Camp 14: One Man's Remarkable Odyssey from North Korea to Freedom in the West Rating: 0.0
# 4620 Forever Odd
        (Odd Thomas, #2) Rating: 0.0
# 4621 The Radium Girls: The Dark Story of Americaâ€™s Shining Women Rating: 0.0
# 4622 Predator
        (Kay Scarpetta, #14) Rating: 0.0
# 4623 Fluke: Or, I Know Why the Winged Whale Sings Rating: 0.0
# 4624 Are You My Mother? Rating: 0.0
# 4625 Shuggie Bain Rating: 0.0
# 4626 The Bookseller of Kabul Rating: 0.0
# 4627 Wings
        (Wings, #1) Rating: 0.0
# 4628 John Dies at the End
        (John Dies at the End, #1) Rating: 0.0
# 4629 What the Dog Saw and Other Adventures Rating: 0.0
# 4630 Ghosted Rating: 0.0
# 4631 Proven Guilty
        (The Dresden Files, #8) Rating: 0.0
# 4632 The Mistake
        (Off-Campus, #2) Rating: 0.0
# 4633 Golden Fool
        (Tawny Ma

In [17]:
#doing masked autoencoder
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
from torch.utils.data import DataLoader, TensorDataset, random_split

# Mask for observed values (1 for observed, 0 for missing)
ratings_torch = torch.tensor(ratings).float()
mask = (ratings_torch != 0).float()
mask_tensor = torch.tensor(mask)


#Define autoencoder
class SparseAutoencoder(nn.Module):
    def __init__(self, num_items, latent_dim):
        super(SparseAutoencoder, self).__init__()
        self.encoder = nn.Linear(num_items, latent_dim)
        self.decoder = nn.Linear(latent_dim, num_items)
        
    def forward(self, x):
        encoded = torch.relu(self.encoder(x))
        decoded = self.decoder(encoded)
        # Scale sigmoid output to [1, 5]
        return 1 + 4 * torch.sigmoid(decoded)
        return decoded

    
#initialize the model
num_users, num_items = ratings_torch.shape
np.save("num_users.npy", np.array(num_users))
np.save("num_items.npy", np.array(num_items))
latent_dim = 100#int(num_items/4) # Number of latent features

model = SparseAutoencoder(num_items, latent_dim)
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Use MSE loss but only consider observed values
def masked_mse_loss(reconstructed, original, mask):
    loss = ((reconstructed - original) ** 2) * mask
    return loss.sum() / mask.sum()

#break up data into train and val
dataset = TensorDataset(ratings_torch, mask_tensor) #keeping the mask
print("ratings_torch shape =", ratings_torch.shape)
print(len(dataset))
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

print("len(train_loader) = ", len(train_loader))
print("len(val_loader) = ", len(val_loader))

#train the model
epochs = 5000
best_loss = 10e10
counter = 0
for epoch in range(epochs):
    train_loss = 0.0
    for inputs, mask in train_loader:
        model.train()
        optimizer.zero_grad()

        # Forward pass
        reconstructed = model(inputs)
        loss = masked_mse_loss(reconstructed, inputs, mask)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()

    train_loss /= len(train_loader)
    
    if (epoch + 1) % 10 == 0:
#         print(f"Epoch {epoch + 1}/{epochs}, Loss: {train_loss}")
        #check validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, mask in val_loader:
                outputs = model(inputs)
                loss = masked_mse_loss(outputs, inputs, mask)
                val_loss += loss.item()
                

        val_loss /= len(val_loader)
        

        print(f"Epoch {epoch + 1} - Train Loss: {train_loss:.4f} - Val Loss: {val_loss:.4f}")
        if val_loss < best_loss: #if improve then save
            #save model 
            torch.save(model.state_dict(), "model{}.pkl".format(latent_dim))
            best_loss = val_loss
            print("Model saved to model{}.pkl.".format(latent_dim))
            counter = 0
        else:
            counter += 10
            
    if counter > 200:
        print("Done training because of no improvement.")
        break
        
            


  mask_tensor = torch.tensor(mask)


ratings_torch shape = torch.Size([1188, 4726])
1188
len(train_loader) =  30
len(val_loader) =  8
Epoch 10 - Train Loss: 0.6830 - Val Loss: 1.2267
Model saved to model100.pkl.
Epoch 20 - Train Loss: 0.5487 - Val Loss: 1.1266
Model saved to model100.pkl.
Epoch 30 - Train Loss: 0.5986 - Val Loss: 1.0935
Model saved to model100.pkl.
Epoch 40 - Train Loss: 0.5854 - Val Loss: 1.1137
Epoch 50 - Train Loss: 0.6006 - Val Loss: 1.1159
Epoch 60 - Train Loss: 0.6215 - Val Loss: 1.1016
Epoch 70 - Train Loss: 0.6226 - Val Loss: 1.1013
Epoch 80 - Train Loss: 0.6243 - Val Loss: 1.0933
Model saved to model100.pkl.
Epoch 90 - Train Loss: 0.6495 - Val Loss: 1.0995
Epoch 100 - Train Loss: 0.6306 - Val Loss: 1.0936
Epoch 110 - Train Loss: 0.6597 - Val Loss: 1.0935
Epoch 120 - Train Loss: 0.6531 - Val Loss: 1.0960
Epoch 130 - Train Loss: 0.6444 - Val Loss: 1.0937
Epoch 140 - Train Loss: 0.6498 - Val Loss: 1.0851
Model saved to model100.pkl.
Epoch 150 - Train Loss: 0.6548 - Val Loss: 1.0931
Epoch 160 - Train

In [18]:
num_users, num_items

(1188, 4726)

In [19]:
print(sum(sum(mask)))
print(mask.shape)
print(mask.shape[0] * mask.shape[1])
print(sum(sum(mask)) / (mask.shape[0] * mask.shape[1]))



tensor(608.)
torch.Size([14, 4726])
66164
tensor(0.0092)


In [20]:
#Evaulating the model
model.eval()
with torch.no_grad():
    reconstructed = model(ratings_torch)

# Fill missing values in the original matrix
filled_data = ratings_torch.clone()
filled_data[mask_tensor == 0] = reconstructed[mask_tensor == 0]

print("Original Data:\n", ratings_torch)
print("Reconstructed Data:\n", reconstructed)
print("Filled Data:\n", filled_data)


Original Data:
 tensor([[0., 5., 0.,  ..., 0., 0., 0.],
        [0., 0., 4.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 4., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
Reconstructed Data:
 tensor([[4.6432, 5.0000, 1.4816,  ..., 1.0023, 5.0000, 4.8582],
        [3.6250, 3.7326, 3.4404,  ..., 2.7761, 4.6675, 3.4678],
        [3.6250, 3.7326, 3.4404,  ..., 2.7761, 4.6675, 3.4678],
        ...,
        [3.6250, 3.7326, 3.4404,  ..., 2.7761, 4.6675, 3.4678],
        [2.6608, 4.1373, 3.4384,  ..., 2.8527, 4.8805, 3.6008],
        [3.4228, 4.9446, 3.4852,  ..., 2.3691, 4.8673, 3.6917]])
Filled Data:
 tensor([[4.6432, 5.0000, 1.4816,  ..., 1.0023, 5.0000, 4.8582],
        [3.6250, 3.7326, 4.0000,  ..., 2.7761, 4.6675, 3.4678],
        [3.6250, 3.7326, 3.4404,  ..., 2.7761, 4.6675, 3.4678],
        ...,
        [3.6250, 4.0000, 3.4404,  ..., 2.7761, 4.6675, 3.4678],
        [2.6608, 4.1373, 3.4

In [21]:
print(reconstructed[0].numpy())
print(ratings_torch[0].numpy())

print((reconstructed[0].numpy()-ratings_torch[0].numpy())/ratings_torch[0].numpy())

[4.643169  5.        1.4816296 ... 1.0022721 4.9999537 4.858164 ]
[0. 5. 0. ... 0. 0. 0.]
[inf  0. inf ... inf inf inf]


  print((reconstructed[0].numpy()-ratings_torch[0].numpy())/ratings_torch[0].numpy())


In [22]:
pred_ratings_list = reconstructed[0].detach().numpy()

#give a list sorted out with books you've already read:
sorted_indices = np.argsort(pred_ratings_list)[::-1]
print("Top books are:")
list_num = 1
for idx in sorted_indices: 
#     print("ratings_matrix[user_id, idx]= ", ratings_matrix[user_id, idx])
    if  (ratings_matrix[user_id, idx] > 0) or(np.isnan(pred_ratings_list[idx])) :
        continue
    print("#", list_num , titles[idx], " - Predicted Rating:", round(pred_ratings_list[idx], 1))
    list_num += 1

Top books are:
# 1 The Diamond Eye  - Predicted Rating: 5.0
# 2 Daughter of Smoke & Bone
        (Daughter of Smoke & Bone, #1)  - Predicted Rating: 5.0
# 3 The Foot Book: Dr. Seuss's Wacky Book of Opposites  - Predicted Rating: 5.0
# 4 Love's Executioner and Other Tales of Psychotherapy  - Predicted Rating: 5.0
# 5 The Hunger Games Trilogy Boxset
        (The Hunger Games, #1-3)  - Predicted Rating: 5.0
# 6 The Guernsey Literary and Potato Peel Pie Society  - Predicted Rating: 5.0
# 7 Where the Red Fern Grows  - Predicted Rating: 5.0
# 8 Lover Revealed
        (Black Dagger Brotherhood, #4)  - Predicted Rating: 5.0
# 9 The Color of Water: A Black Man's Tribute to His White Mother  - Predicted Rating: 5.0
# 10 Club Dead
        (Sookie Stackhouse, #3)  - Predicted Rating: 5.0
# 11 Summers at the Saint  - Predicted Rating: 5.0
# 12 Stay  - Predicted Rating: 5.0
# 13 As Good As Dead
        (A Good Girl's Guide to Murder, #3)  - Predicted Rating: 5.0
# 14 David and Goliath: Underdogs, Mi

# 168 The Partner  - Predicted Rating: 5.0
# 169 Between Two Kingdoms: A Memoir of a Life Interrupted  - Predicted Rating: 5.0
# 170 The Devil in the White City  - Predicted Rating: 5.0
# 171 The Perks of Being a Wallflower  - Predicted Rating: 5.0
# 172 The Black Book
        (Billy Harney, #1)  - Predicted Rating: 5.0
# 173 A Time for Mercy
        (Jake Brigance, #3)  - Predicted Rating: 5.0
# 174 The Girl Who Played with Fire
        (Millennium, #2)  - Predicted Rating: 5.0
# 175 Where the Sidewalk Ends  - Predicted Rating: 5.0
# 176 The Zahir  - Predicted Rating: 5.0
# 177 Divine Rivals
        (Letters of Enchantment, #1)  - Predicted Rating: 5.0
# 178 A ​Court of Silver Flames
        (A Court of Thorns and Roses, #4)  - Predicted Rating: 5.0
# 179 Vanishing Acts  - Predicted Rating: 5.0
# 180 Water for Elephants  - Predicted Rating: 5.0
# 181 The Chain  - Predicted Rating: 5.0
# 182 Half the Sky: Turning Oppression into Opportunity for Women Worldwide  - Predicted Rating: 5.0


# 319 Manâ€™s Search for Meaning  - Predicted Rating: 5.0
# 320 The Road  - Predicted Rating: 5.0
# 321 White Fang  - Predicted Rating: 5.0
# 322 A Walk in the Woods: Rediscovering America on the Appalachian Trail  - Predicted Rating: 5.0
# 323 The Fountainhead  - Predicted Rating: 5.0
# 324 And the Mountains Echoed  - Predicted Rating: 5.0
# 325 'Tis  - Predicted Rating: 5.0
# 326 The Maze Runner
        (The Maze Runner, #1)  - Predicted Rating: 5.0
# 327 Where the Crawdads Sing  - Predicted Rating: 5.0
# 328 The Big Short: Inside the Doomsday Machine  - Predicted Rating: 5.0
# 329 The Empire of Gold
        (The Daevabad Trilogy, #3)  - Predicted Rating: 5.0
# 330 The Color Purple  - Predicted Rating: 5.0
# 331 This Time Tomorrow  - Predicted Rating: 5.0
# 332 Holiday Romance
        (Fitzpatrick Christmas, #1)  - Predicted Rating: 5.0
# 333 Mary Jane  - Predicted Rating: 5.0
# 334 A Feast for Crows
        (A Song of Ice and Fire, #4)  - Predicted Rating: 5.0
# 335 A Woman Is No Ma

        (The Inheritance Games, #1)  - Predicted Rating: 5.0
# 481 Something Borrowed
        (Darcy & Rachel, #1)  - Predicted Rating: 5.0
# 482 Shantaram  - Predicted Rating: 5.0
# 483 Bye, Baby  - Predicted Rating: 5.0
# 484 Contagious
        (Infected, #2)  - Predicted Rating: 5.0
# 485 Confessions  - Predicted Rating: 5.0
# 486 Magnolia Parks
        (Magnolia Parks Universe, #1)  - Predicted Rating: 5.0
# 487 Shiver
        (The Wolves of Mercy Falls, #1)  - Predicted Rating: 5.0
# 488 Treasure Island  - Predicted Rating: 5.0
# 489 Talking as Fast as I Can: From Gilmore Girls to Gilmore Girls, and Everything in Between  - Predicted Rating: 5.0
# 490 The Templar Legacy
        (Cotton Malone, #1)  - Predicted Rating: 5.0
# 491 Shopaholic Takes Manhattan
        (Shopaholic, #2)  - Predicted Rating: 5.0
# 492 Cress
        (The Lunar Chronicles, #3)  - Predicted Rating: 5.0
# 493 Harriet the Spy  - Predicted Rating: 5.0
# 494 The Absolutely True Diary of a Part-Time Indian  - Pred

# 704 Guess How Much I Love You  - Predicted Rating: 5.0
# 705 The Restaurant at the End of the Universe
        (The Hitchhiker's Guide to the Galaxy, #2)  - Predicted Rating: 5.0
# 706 We Solve Murders
        (We Solve Murders, #1)  - Predicted Rating: 5.0
# 707 The Testaments
        (The Handmaid's Tale, #2)  - Predicted Rating: 5.0
# 708 March  - Predicted Rating: 5.0
# 709 The Last Anniversary  - Predicted Rating: 5.0
# 710 Project Hail Mary  - Predicted Rating: 5.0
# 711 A Court of Silver Flames
        (A Court of Thorns and Roses, #4)  - Predicted Rating: 5.0
# 712 The Uncommon Reader  - Predicted Rating: 5.0
# 713 Nightmares and Dreamscapes  - Predicted Rating: 5.0
# 714 Muse of Nightmares
        (Strange the Dreamer, #2)  - Predicted Rating: 5.0
# 715 Little House in the Big Woods
        (Little House, #1)  - Predicted Rating: 5.0
# 716 What I Loved  - Predicted Rating: 5.0
# 717 The Notebook
        (The Notebook, #1)  - Predicted Rating: 5.0
# 718 The Golem's Eye
      

# 977 Timeline  - Predicted Rating: 5.0
# 978 The Magicianâ€™s Nephew
        (Chronicles of Narnia, #6)  - Predicted Rating: 5.0
# 979 Consider Phlebas
        (Culture, #1)  - Predicted Rating: 5.0
# 980 The Diviners
        (The Diviners, #1)  - Predicted Rating: 5.0
# 981 Somebody's Daughter  - Predicted Rating: 5.0
# 982 White Noise  - Predicted Rating: 5.0
# 983 The Tale of Peter Rabbit
        (World of Beatrix Potter, #1)  - Predicted Rating: 5.0
# 984 Watchers  - Predicted Rating: 5.0
# 985 The Love Songs of W.E.B. Du Bois  - Predicted Rating: 5.0
# 986 The Mysterious Affair at Styles
        (Hercule Poirot, #1)  - Predicted Rating: 5.0
# 987 We Should All Be Feminists  - Predicted Rating: 5.0
# 988 Stone Cold Fox  - Predicted Rating: 5.0
# 989 The Reader  - Predicted Rating: 5.0
# 990 Passing  - Predicted Rating: 5.0
# 991 Twisted Lies
        (Twisted, #4)  - Predicted Rating: 5.0
# 992 The Other Mrs.  - Predicted Rating: 5.0
# 993 Kitchen  - Predicted Rating: 5.0
# 994 I L

# 1269 The Sandcastle Girls  - Predicted Rating: 5.0
# 1270 Out of the Easy  - Predicted Rating: 5.0
# 1271 These Precious Days: Essays  - Predicted Rating: 5.0
# 1272 The Wedding Date
        (The Wedding Date, #1)  - Predicted Rating: 5.0
# 1273 Faust, First Part  - Predicted Rating: 5.0
# 1274 Curious George  - Predicted Rating: 5.0
# 1275 A Midsummer Nightâ€™s Dream  - Predicted Rating: 5.0
# 1276 The Book Woman of Troublesome Creek  - Predicted Rating: 5.0
# 1277 In My Dreams I Hold a Knife  - Predicted Rating: 5.0
# 1278 All the King's Men  - Predicted Rating: 5.0
# 1279 The Demon King
        (Seven Realms, #1)  - Predicted Rating: 5.0
# 1280 The Frozen River  - Predicted Rating: 5.0
# 1281 A Room of Oneâ€™s Own  - Predicted Rating: 5.0
# 1282 The Paris Wife  - Predicted Rating: 5.0
# 1283 On Chesil Beach  - Predicted Rating: 5.0
# 1284 Ubik  - Predicted Rating: 5.0
# 1285 Between the Lines
        (Between the Lines, #1)  - Predicted Rating: 5.0
# 1286 The Winter of Our Discont

# 1515 The Sense of an Ending  - Predicted Rating: 5.0
# 1516 The Boleyn Inheritance
        (The Plantagenet and Tudor Novels, #10)  - Predicted Rating: 5.0
# 1517 The Prophet  - Predicted Rating: 5.0
# 1518 Finlay Donovan Jumps the Gun
        (Finlay Donovan, #3)  - Predicted Rating: 5.0
# 1519 Tricky Twenty-Two
        (Stephanie Plum, #22)  - Predicted Rating: 5.0
# 1520 Ramona Quimby, Age 8
        (Ramona, #6)  - Predicted Rating: 5.0
# 1521 Sweet Tooth  - Predicted Rating: 5.0
# 1522 The German Midwife  - Predicted Rating: 5.0
# 1523 Red Rising
        (Red Rising Saga, #1)  - Predicted Rating: 5.0
# 1524 Before I Die  - Predicted Rating: 5.0
# 1525 Scary Stories to Tell in the Dark  - Predicted Rating: 5.0
# 1526 The Most Wonderful Crime of the Year  - Predicted Rating: 5.0
# 1527 Room  - Predicted Rating: 5.0
# 1528 The Good Earth
        (House of Earth, #1)  - Predicted Rating: 5.0
# 1529 Uncle Tom's Cabin  - Predicted Rating: 5.0
# 1530 Mrs. Frisby and the Rats of NIMH
   

# 1818 The Complete Poems  - Predicted Rating: 5.0
# 1819 Gabriel's Inferno
        (Gabriel's Inferno, #1)  - Predicted Rating: 5.0
# 1820 Fool's Errand
        (Tawny Man, #1)  - Predicted Rating: 5.0
# 1821 Invisible Man  - Predicted Rating: 5.0
# 1822 None of This Is True  - Predicted Rating: 5.0
# 1823 Skeleton Crew  - Predicted Rating: 5.0
# 1824 The Vanishing Act of Esme Lennox  - Predicted Rating: 5.0
# 1825 The Runaway Jury  - Predicted Rating: 5.0
# 1826 The Comedy of Errors  - Predicted Rating: 5.0
# 1827 Watchmen  - Predicted Rating: 5.0
# 1828 Motherless Brooklyn  - Predicted Rating: 5.0
# 1829 MaddAddam
        (MaddAddam, #3)  - Predicted Rating: 5.0
# 1830 The Beekeeper's Apprentice
        (Mary Russell and Sherlock Holmes, #1)  - Predicted Rating: 5.0
# 1831 The Wasp Factory  - Predicted Rating: 5.0
# 1832 Many Waters
        (Time Quintet, #4)  - Predicted Rating: 5.0
# 1833 The Overdue Life of Amy Byler  - Predicted Rating: 5.0
# 1834 Dumplin'
        (Dumplin', #1)

# 2131 Kushiel's Chosen
        (PhÃ¨dre's Trilogy, #2)  - Predicted Rating: 4.9
# 2132 The Zombie Survival Guide: Complete Protection from the Living Dead  - Predicted Rating: 4.9
# 2133 On Tyranny: Twenty Lessons from the Twentieth Century  - Predicted Rating: 4.9
# 2134 Let's Explore Diabetes with Owls: Essays, Etc.  - Predicted Rating: 4.9
# 2135 Gone
        (FBI Profiler, #5)  - Predicted Rating: 4.9
# 2136 Memories of My Melancholy Whores  - Predicted Rating: 4.9
# 2137 The Yellow Bird Sings  - Predicted Rating: 4.9
# 2138 Snow Country  - Predicted Rating: 4.9
# 2139 Fool's Fate
        (Tawny Man, #3)  - Predicted Rating: 4.9
# 2140 Mayflower: A Story of Courage, Community, and War  - Predicted Rating: 4.9
# 2141 Spells for Forgetting  - Predicted Rating: 4.9
# 2142 The Big Sleep
        (Philip Marlowe, #1)  - Predicted Rating: 4.9
# 2143 Detransition, Baby  - Predicted Rating: 4.9
# 2144 Mrs. Piggle-Wiggle
        (Mrs. Piggle Wiggle, #1)  - Predicted Rating: 4.9
# 2145 Ident

# 2415 The Eye of the World
        (The Wheel of Time, #1)  - Predicted Rating: 4.9
# 2416 The Way the Crow Flies  - Predicted Rating: 4.8
# 2417 12th of Never
        (Women's Murder Club, #12)  - Predicted Rating: 4.8
# 2418 House Rules  - Predicted Rating: 4.8
# 2419 The Villa  - Predicted Rating: 4.8
# 2420 Jingo
        (Discworld, #21; City Watch, #4)  - Predicted Rating: 4.8
# 2421 His Majesty's Dragon
        (Temeraire, #1)  - Predicted Rating: 4.8
# 2422 The Lion, the Witch and the Wardrobe
        (Chronicles of Narnia, #1)  - Predicted Rating: 4.8
# 2423 Pride and Prejudice  - Predicted Rating: 4.8
# 2424 UnWholly
        (Unwind, #2)  - Predicted Rating: 4.8
# 2425 Gideon the Ninth
        (The Locked Tomb, #1)  - Predicted Rating: 4.8
# 2426 The Invisible Man  - Predicted Rating: 4.8
# 2427 Cross Fire
        (Alex Cross, #17)  - Predicted Rating: 4.8
# 2428 Rules of Civility  - Predicted Rating: 4.8
# 2429 Rendezvous with Rama
        (Rama, #1)  - Predicted Rating: 4.8

# 2702 Maybe Someday
        (Maybe, #1)  - Predicted Rating: 4.6
# 2703 Second First Impressions  - Predicted Rating: 4.6
# 2704 Count Zero
        (Sprawl, #2)  - Predicted Rating: 4.6
# 2705 Whose Body?
        (Lord Peter Wimsey, #1)  - Predicted Rating: 4.6
# 2706 Sideways Stories from Wayside School
        (Wayside School, #1)  - Predicted Rating: 4.6
# 2707 The Closers
        (Harry Bosch, #11; Harry Bosch Universe, #15)  - Predicted Rating: 4.6
# 2708 The Shipping News  - Predicted Rating: 4.6
# 2709 On the Savage Side  - Predicted Rating: 4.6
# 2710 Rich People Problems
        (Crazy Rich Asians, #3)  - Predicted Rating: 4.6
# 2711 The Knife of Never Letting Go
        (Chaos Walking, #1)  - Predicted Rating: 4.6
# 2712 Love You More
        (Tessa Leoni, #1; Detective D.D. Warren, #5)  - Predicted Rating: 4.6
# 2713 Walk Two Moons  - Predicted Rating: 4.6
# 2714 Sex, Drugs, and Cocoa Puffs: A Low Culture Manifesto  - Predicted Rating: 4.6
# 2715 The Dictionary of Lost Word

# 2979 The Rooster Bar  - Predicted Rating: 4.1
# 2980 Red Dragon
        (Hannibal Lecter, #1)  - Predicted Rating: 4.1
# 2981 The Sun Down Motel  - Predicted Rating: 4.1
# 2982 Me and Earl and the Dying Girl  - Predicted Rating: 4.1
# 2983 Predator
        (Kay Scarpetta, #14)  - Predicted Rating: 4.1
# 2984 Beyond the Shadows
        (Night Angel, #3)  - Predicted Rating: 4.1
# 2985 The Gift of the Magi  - Predicted Rating: 4.1
# 2986 The Sandman, Vol. 4: Season of Mists  - Predicted Rating: 4.1
# 2987 Neuromancer
        (Sprawl, #1)  - Predicted Rating: 4.1
# 2988 Circling the Sun  - Predicted Rating: 4.1
# 2989 The Woman in Cabin 10  - Predicted Rating: 4.1
# 2990 The Hunchback of Notre-Dame  - Predicted Rating: 4.1
# 2991 Cat & Mouse
        (Alex Cross, #4)  - Predicted Rating: 4.1
# 2992 Lula Dean's Little Library of Banned Books  - Predicted Rating: 4.1
# 2993 Last Night in Twisted River  - Predicted Rating: 4.1
# 2994 Leonardo da Vinci  - Predicted Rating: 4.0
# 2995 Tribula

        (Psy-Changeling, #1)  - Predicted Rating: 3.1
# 3277 Live from New York: An Uncensored History of Saturday Night Live  - Predicted Rating: 3.0
# 3278 The Wrong Family  - Predicted Rating: 3.0
# 3279 Carmilla  - Predicted Rating: 3.0
# 3280 The Storyteller  - Predicted Rating: 3.0
# 3281 The Burgess Boys  - Predicted Rating: 3.0
# 3282 Klara and the Sun  - Predicted Rating: 3.0
# 3283 Fairy Tale  - Predicted Rating: 3.0
# 3284 The End of Her  - Predicted Rating: 3.0
# 3285 Lord of Chaos
        (The Wheel of Time, #6)  - Predicted Rating: 3.0
# 3286 A Scanner Darkly  - Predicted Rating: 3.0
# 3287 The Girl in the Spider's Web
        (Millennium, #4)  - Predicted Rating: 3.0
# 3288 Red Prophet
        (Tales of Alvin Maker, #2)  - Predicted Rating: 3.0
# 3289 Mountains Beyond Mountains: The Quest of Dr. Paul Farmer, a Man Who Would Cure the World  - Predicted Rating: 3.0
# 3290 The Celebrants  - Predicted Rating: 3.0
# 3291 Battle Hymn of the Tiger Mother  - Predicted Rating: 3.

# 3580 A Touch of Dead  - Predicted Rating: 2.0
# 3581 The Tenth Circle  - Predicted Rating: 2.0
# 3582 The Accidental Tourist  - Predicted Rating: 2.0
# 3583 The Ex Vows  - Predicted Rating: 2.0
# 3584 The Power of Habit: Why We Do What We Do in Life and Business  - Predicted Rating: 2.0
# 3585 Next of Kin  - Predicted Rating: 2.0
# 3586 Fifty Shades Trilogy
        (Fifty Shades, #1-3)  - Predicted Rating: 2.0
# 3587 Wizard and Glass
        (The Dark Tower, #4)  - Predicted Rating: 2.0
# 3588 The Lion Women of Tehran  - Predicted Rating: 2.0
# 3589 Kiss an Angel  - Predicted Rating: 2.0
# 3590 The Elegant Universe: Superstrings, Hidden Dimensions, and the Quest for the Ultimate Theory  - Predicted Rating: 1.9
# 3591 Ulysses  - Predicted Rating: 1.9
# 3592 Bossypants  - Predicted Rating: 1.9
# 3593 Heir of Fire
        (Throne of Glass, #3)  - Predicted Rating: 1.9
# 3594 Five Survive  - Predicted Rating: 1.9
# 3595 Academ's Fury
        (Codex Alera, #2)  - Predicted Rating: 1.9
# 3

# 3894 Get Shorty
        (Chili Palmer, #1)  - Predicted Rating: 1.3
# 3895 The Slippery Slope
        (A Series of Unfortunate Events, #10)  - Predicted Rating: 1.3
# 3896 Razorblade Tears  - Predicted Rating: 1.3
# 3897 Dragonsdawn
        (Pern, #9)  - Predicted Rating: 1.3
# 3898 Heat Wave
        (Nikki Heat, #1)  - Predicted Rating: 1.3
# 3899 The Mouse and the Motorcycle
        (Ralph S. Mouse, #1)  - Predicted Rating: 1.3
# 3900 The Beautiful Mystery
        (Chief Inspector Armand Gamache, #8)  - Predicted Rating: 1.3
# 3901 In a Holidaze  - Predicted Rating: 1.3
# 3902 Lost in a Good Book
        (Thursday Next, #2)  - Predicted Rating: 1.3
# 3903 The Book of Speculation  - Predicted Rating: 1.3
# 3904 Pandora
        (New Tales of the Vampires, #1)  - Predicted Rating: 1.3
# 3905 The Outlaw Demon Wails
        (The Hollows, #6)  - Predicted Rating: 1.3
# 3906 Great Expectations  - Predicted Rating: 1.3
# 3907 Throne of Glass
        (Throne of Glass, #1)  - Predicted Ratin

# 4188 Seating Arrangements  - Predicted Rating: 1.0
# 4189 Pretty Little Liars
        (Pretty Little Liars, #1)  - Predicted Rating: 1.0
# 4190 A History of Wild Places  - Predicted Rating: 1.0
# 4191 Moonflower Murders
        (Susan Ryeland, #2)  - Predicted Rating: 1.0
# 4192 You Are Not Alone  - Predicted Rating: 1.0
# 4193 Ruin and Rising
        (The Shadow and Bone Trilogy, #3)  - Predicted Rating: 1.0
# 4194 How the Irish Saved Civilization: The Untold Story of Ireland's Heroic Role from the Fall of Rome to the Rise of Medieval Europe  - Predicted Rating: 1.0
# 4195 Revival  - Predicted Rating: 1.0
# 4196 Neither Here nor There: Travels in Europe  - Predicted Rating: 1.0
# 4197 Our Missing Hearts  - Predicted Rating: 1.0
# 4198 Confess  - Predicted Rating: 1.0
# 4199 In a Sunburned Country  - Predicted Rating: 1.0
# 4200 Twisted Love
        (Twisted, #1)  - Predicted Rating: 1.0
# 4201 Then We Came to the End  - Predicted Rating: 1.0
# 4202 I Was Here  - Predicted Rating: 1.

# 4473 Catâ€™s Eye  - Predicted Rating: 1.0
# 4474 State of Terror  - Predicted Rating: 1.0
# 4475 Inside Out  - Predicted Rating: 1.0
# 4476 Sisterhood Everlasting
        (Sisterhood, #5)  - Predicted Rating: 1.0
# 4477 A Confederacy of Dunces  - Predicted Rating: 1.0
# 4478 A Wrinkle in Time
        (A Wrinkle in Time Quintet, #1)  - Predicted Rating: 1.0
# 4479 Safe Haven  - Predicted Rating: 1.0
# 4480 Do Androids Dream of Electric Sheep?  - Predicted Rating: 1.0
# 4481 Sex and Vanity  - Predicted Rating: 1.0
# 4482 Desperation  - Predicted Rating: 1.0
# 4483 The Fine Print
        (Dreamland Billionaires, #1)  - Predicted Rating: 1.0
# 4484 The Left Hand of Darkness  - Predicted Rating: 1.0
# 4485 The Locked Door  - Predicted Rating: 1.0
# 4486 Dewey: The Small-Town Library Cat Who Touched the World  - Predicted Rating: 1.0
# 4487 Fever 1793  - Predicted Rating: 1.0
# 4488 Gilead
        (Gilead, #1)  - Predicted Rating: 1.0
# 4489 The Best of Friends  - Predicted Rating: 1.0
# 4

In [23]:
#making weighted loss matrix
percents = np.array([ 2.0839861,   6.38564535, 22.8939068,  37.94135873, 30.69510302])
each_weights = 100/percents
print(each_weights)
print(each_weights.sum())

print(each_weights * percents)

weights_array = np.zeros(ratings_torch.shape)
for i in tqdm(range(len(ratings_torch))):
    for j in range(len(ratings_torch[0])):
        for num in [1, 2, 3, 4, 5]:
            if ratings_torch[i,j] == num:
                weights_array[i,j] = each_weights[num-1]
weights_tensor = torch.tensor(weights_array)

[47.98496497 15.66012431  4.36797445  2.63564625  3.25784865]
73.90655863743766
[100. 100. 100. 100. 100.]


100%|███████████████████████████████████████████████████████████████████████████████| 1188/1188 [04:51<00:00,  4.08it/s]


In [None]:
weights[0]

In [None]:
#doing masked autoencoder with weighted loss
latent_dim = 100 # Number of latent features

model = SparseAutoencoder(num_items, latent_dim)
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Use MSE loss with weights but only consider observed values
def masked_mse_loss_diff(reconstructed, original, mask, weights):
    loss = (((reconstructed - original) ** 2) * mask)
    weighted_loss = loss * weights
    return weighted_loss.sum() / mask.sum()/100

#break up data into train and val
print("ratings_torch shape = ", ratings_torch.shape)
print("mask_tensor shape = ", mask_tensor.shape)
print("weights shape = ", weights_tensor.shape)

dataset = TensorDataset(ratings_torch, mask_tensor, weights_tensor) #keeping the mask
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


#train the model
epochs = 5000
best_loss = 10e10
counter = 0
for epoch in range(epochs):
    train_loss = 0.0
    for inputs, mask, this_weight in train_loader:
        model.train()
        optimizer.zero_grad()

        # Forward pass
        reconstructed = model(inputs)
        loss = masked_mse_loss_diff(reconstructed, inputs, mask, this_weight)
#         loss_not_weighted = masked_mse_loss(reconstructed, inputs, mask)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()

    train_loss /= len(train_loader)
    
    if (epoch + 1) % 10 == 0:
#         print(f"Epoch {epoch + 1}/{epochs}, Loss: {train_loss}")
        #check validation
        model.eval()
        val_loss = 0.0
        val_loss_not_weighted = 0.0
        with torch.no_grad():
            for inputs, mask, this_weight in val_loader:
                outputs = model(inputs)
                loss = masked_mse_loss_diff(outputs, inputs, mask, this_weight)
                loss_not_weighted = masked_mse_loss(outputs, inputs, mask)
                val_loss += loss.item()
                val_loss_not_weighted += loss_not_weighted.item()
                

        val_loss /= len(val_loader)
        

        print(f"Epoch {epoch + 1} - Train Loss: {train_loss:.4f} - Val Loss: {val_loss:.4f} - Val not weighted: {val_loss_not_weighted:.4f}" )
        if val_loss < best_loss: #if improve then save
            #save model 
            torch.save(model.state_dict(), "model_weighted{}.pkl".format(latent_dim))
            best_loss = val_loss
            print("Model saved to model_weighted{}.pkl.".format(latent_dim))
            counter = 0
        else:
            counter += 10
            
    if counter > 200:
        print("Done training because of no improvement.")
        break
        
            


In [None]:
dfghj
import torch
from sklearn.model_selection import KFold

# Mask for observed values (1 for observed, 0 for missing)
ratings_torch = torch.tensor(ratings).float()
mask = (ratings_torch != 0).float()
print(mask)


#Define autoencoder
class SparseAutoencoder(nn.Module):
    def __init__(self, num_items, latent_dim):
        super(SparseAutoencoder, self).__init__()
        self.encoder = nn.Linear(num_items, latent_dim)
        self.decoder = nn.Linear(latent_dim, num_items)
        
    def forward(self, x):
        encoded = torch.relu(self.encoder(x))
        decoded = self.decoder(encoded)
        # Scale sigmoid output to [1, 5]
        return 1 + 4 * torch.sigmoid(decoded)
        return decoded

    
#initialize the model
num_users, num_items = ratings_torch.shape
np.save("num_users.npy", np.array(num_users))
np.save("num_items.npy", np.array(num_items))

for latent_dim in [2, 5, 10, 20, 40, 50, 75, 100]:
    print("latent_dim = ", latent_dim)
# latent_dim = 20  # Number of latent features

    model = SparseAutoencoder(num_items, latent_dim)
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    # Define your model, loss function, and optimizer
    # Assuming model, ratings_torch, mask, and optimizer are already defined

    epochs = 1000
    k_folds = 5  # Number of folds for cross-validation
    kf = KFold(n_splits=k_folds, shuffle=True)

    # Store the losses for each fold
    fold_losses = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(ratings_torch)):
#         print(f"\nFold {fold + 1}/{k_folds}")

        # Split the data into training and validation sets
        train_ratings = ratings_torch[train_idx]
        val_ratings = ratings_torch[val_idx]
        train_mask = mask[train_idx]
        val_mask = mask[val_idx]

        # Initialize a new model for each fold
        model = SparseAutoencoder(num_items, latent_dim)
    #     optimizer = optim.Adam(model.parameters(), lr=0.01)

        # Re-initialize optimizer for each fold
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

        # Train the model
        for epoch in range(epochs):
            model.train()
            optimizer.zero_grad()

            # Forward pass for training
            reconstructed = model(train_ratings)
            loss = masked_mse_loss(reconstructed, train_ratings, train_mask)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

#             if (epoch + 1) % 1000 == 0:
#                 print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.4f}")

        # Evaluate the model on the validation set
        model.eval()
        with torch.no_grad():
            reconstructed_val = model(val_ratings)
            val_loss = masked_mse_loss(reconstructed_val, val_ratings, val_mask)

        print(f"Validation Loss for Fold {fold + 1}: {val_loss.item():.4f}")

        # Store the validation loss for this fold
        fold_losses.append(val_loss.item())

    # Print the average validation loss after all folds
    print(f"\nAverage Validation Loss across all folds: {sum(fold_losses)/k_folds:.4f}")


In [None]:
fgh

In [None]:
ratings_matrix[user_id]

In [None]:
# Convert the filled ratings data into a numpy array
ratings_matrix = ratings_df.values

# Initialize KNN (using user-based KNN)
import math
knn = NearestNeighbors(n_neighbors=math.ceil(num_users/10), metric='cosine')  # Using cosine similarity
knn.fit(ratings_matrix)

# Example: Predict rating for User 0 on Item 2
user_id = 0  # Index of user in the matrix
item_id = 9  # Index of item to predict rating for

# Get the nearest neighbors for user 0 (excluding the user itself)
distances, indices = knn.kneighbors([ratings_matrix[user_id]])

pred_ratings_list = np.array([])
rankings_list = np.array([])
for item_id in range(num_titles):
    # Get the ratings for the neighbors on item 2
    neighbor_ratings = np.array([ratings_matrix[i, item_id] for i in indices[0] if not np.isnan(ratings_matrix[i, item_id])])
    print(ratings_matrix[:, item_id])
    print(neighbor_ratings)
    ghjk
    
    predicted_rating = np.mean(neighbor_ratings[np.nonzero(neighbor_ratings)])
    rankings = np.sum(neighbor_ratings[np.nonzero(neighbor_ratings)])

#     print(predicted_rating)

    pred_ratings_list = np.append(pred_ratings_list, predicted_rating)
    rankings_list = np.append(rankings_list, rankings)
#     print(f"Predicted rating for User {user_id} on Item {item_id}: {predicted_rating}")
    
best_book_rating = np.max(pred_ratings_list)
best_book_idx = np.argmax(pred_ratings_list)

sorted_indices = np.argsort(pred_ratings_list)[::-1]
print("Top books are:")
for i, idx in enumerate(sorted_indices): 
    print("#", (i+1) , titles[idx], "Rating:", round(pred_ratings_list[idx], 1))

In [None]:
import numpy as np
import scipy.sparse as sp
from scipy.sparse.linalg import svds

# Create a sparse matrix (CSR format)
A = np.array([[1.0, 0, 0], [0, 2, 3], [4, 0, 6], [0, 0, 0]])
# print(A)
sparse_matrix = sp.csr_matrix(ratings_matrix)

# Perform SVD on the sparse matrix
# k is the number of singular values to compute (you can choose a value smaller than min(m, n))
U, S, VT = svds(sparse_matrix, k=500)

# Output the matrices
print("U (Left Singular Vectors):\n", U)
print("\nS (Singular Values):\n", S)
print("\nVT (Right Singular Vectors - Transposed):\n", VT)

# Reconstruct the matrix from U, S, VT
S_full = np.diag(S)  # Convert singular values to a diagonal matrix
A_reconstructed = np.dot(U, np.dot(S_full, VT))

print("\nReconstructed Matrix A:\n", A_reconstructed)


In [None]:
my_diff = (ratings_matrix[0]- A_reconstructed[0])
print(ratings_matrix.shape)
plt.plot(my_diff, '.')

# for i in range(len(ratings_matrix[0])):
#     if ratings_matrix[0, i] > 0:
#         print(ratings_matrix[0, i], A_reconstructed[0, i], titles[i])
        
for i in range(len(ratings_matrix[0])):
    if ratings_matrix[0, i] == 0 and A_reconstructed[0, i] > 0:
        print(ratings_matrix[0, i], A_reconstructed[0, i], titles[i])


In [None]:
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

# Create a custom colormap with white for 0 and red for non-zero
cmap = mcolors.ListedColormap(['white', 'red'])
bounds = [0, 0.1, 1]  # Set bounds for 0 (white) and non-zero (red)
norm = mcolors.BoundaryNorm(bounds, cmap.N)

# plt.imshow(ratings_matrix - A_reconstructed, cmap=cmap, norm=norm)
plt.plot(ratings_matrix - A_reconstructed)
plt.show()

In [None]:
ratings_matrix - A_reconstructed

In [None]:
from sklearn.cluster import DBSCAN
dbscan = DBSCAN(eps=0.75, min_samples=2, metric='cosine')
labels = dbscan.fit_predict(ratings_matrix)


In [None]:
print(list(set(labels)))
print(labels.shape)

In [None]:
idx_in_group = np.arange(len(labels))
filtered_users = [idx_in_group for idx_in_group, label in zip(idx_in_group, labels) if label == 0]
print(filtered_users)

In [None]:
from sklearn.cluster import SpectralClustering
from scipy.sparse import csr_matrix
import numpy as np

# Example sparse data
# X = np.random.rand(100, 2)
X_sparse = csr_matrix(ratings_matrix)

n_clusters = 50
# Apply Spectral Clustering
spectral = SpectralClustering(n_clusters=n_clusters, affinity='nearest_neighbors')
labels = spectral.fit_predict(X_sparse)

print(labels)
print(list(set(labels)))
print(labels.shape)

In [None]:
# idx_in_group = np.arange(len(labels))
# filtered_users = [idx_in_group for idx_in_group, label in zip(idx_in_group, labels) if label == 0]
# print(filtered_users)

group_averages = []

for group in range(n_clusters):
    # Find indices of users in the current group
    group_users = np.where(labels == group)[0]
    
    # Extract the rows for users in this group
    group_data = ratings_matrix[group_users]
    
    print("Number of perople in group = ", group_data.shape[0])
    
    pred_ratings_list = np.array([])
    for item_id in range(num_titles):
        data = group_data[:,item_id]
        predicted_rating = np.mean(data[np.nonzero(data)])
#                 rankings = np.sum(neighbor_ratings[np.nonzero(neighbor_ratings)])

        pred_ratings_list = np.append(pred_ratings_list, predicted_rating)
    # Compute the average for each column (item) for this group
#     group_avg = np.mean(group_data[np.nonzero(group_data)], axis=0)
    
    # Append the average for this group
    group_averages.append(pred_ratings_list)

# Convert the list of group averages to a numpy array for easy viewing
group_averages = np.array(group_averages)

# Display the average for each item in each group
print("Average preferences for each item by group:")
print(group_averages.shape)

In [None]:
group = labels[0]
print("my group = ", group)
sorted_indices = np.argsort(group_averages[group])[::-1]
print(sorted_indices)
for i in sorted_indices:
    if (ratings_matrix[0, i] > 0) or (np.isnan(group_averages[group, i])):
        pass
    else:
        print(titles[i], round(group_averages[group,i], 1))