In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from tqdm import tqdm
from sklearn.neighbors import NearestNeighbors
import numpy as np
import random
import pickle 

def scrape_goodreads_ratings(user_id, max_pages=10):
    """
    Scrape a user's star ratings from Goodreads.
    
    Args:
    - user_id (str): Goodreads user ID or profile suffix.
    - max_pages (int): Maximum number of pages to scrape (each page contains ~30 books).
    
    Returns:
    - pd.DataFrame: A DataFrame containing book titles and ratings.
    """
    base_url = f"https://www.goodreads.com/review/list/{user_id}?shelf=read"
    headers = {"User-Agent": "Mozilla/5.0"}
    books = []

    for page in range(1, max_pages + 1):
        url = f"{base_url}&page={page}"
        response = requests.get(url, headers=headers)
        
        if response.status_code != 200:
            print(f"Failed to fetch page {page}. Status code: {response.status_code}")
            break

        soup = BeautifulSoup(response.content, "html.parser")
        
        # Find all book entries in the table
        rows = soup.find_all("tr", class_="bookalike review")
        if not rows:
            print("No more data found.")
            break

        for row in rows:
            try:
                title = row.find("td", class_="field title").a.text.strip()
                rating_element = row.find("td", class_="field rating")
                rating = rating_element.find("span", class_="staticStars").get("title", "No rating")
                stars = map_rating(rating)
                books.append({"Title": title, "Rating": stars, "User_id": user_id})
#                 print(title, rating, stars)
            except AttributeError:
                # Handle rows with missing data
                continue

        print(f"Page {page} scraped successfully.")
        time.sleep(random.uniform(1, 5))  # Be kind to the server and avoid being blocked

    # Return data as a pandas DataFrame
    return pd.DataFrame(books)



In [2]:
def map_rating(phrase):
    rating_map = {
        "liked it": 3,
        "really liked it": 4,
        "it was ok": 2, 
        "it was amazing": 5, 
        "did not like it": 1,
    }
    
    return rating_map.get(phrase, "Invalid rating")  # Default to "Invalid rating" if the phrase isn't in the dictionary


In [3]:
# if __name__ == "__main__":
# #     user_id = "6688207"  # Replace with the Goodreads user ID or profile suffix
# #     for user_id in tqdm(['30181442', '75009563', '11345366', '110912303', '113964939', '11215896', '53701594', '4622890', '93628736', '176180116']):
# #     for user_id in tqdm(['2974095', '4622890', '28953843', '16174645', '4159922', '4125660', '54886546', '16912659', '260116', '4685500', '21865425']):
# #     for user_id in tqdm(['53701594', '27709782', '7566229', '16652861', '30817744', '56259255', '4125660', '60964126', 
# #                          '176167767', '28510930', '1029975', '131020767', '28862120', '88713906', '160141433', '41097916', 
# #                          '20809863', '69519261', '24017481', '7376365', '75941333', '13571407', '106618742', '17792052',
# #                          '3534528', '130656897', '7474475', '4125412', '6336365', '6026811', '3438047']):
#     for user_id in ['169695556']:
#         print("User_id = ", user_id)
#         max_pages = 30  # Adjust based on expected data
#         ratings_data = scrape_goodreads_ratings(user_id, max_pages)

#         if not ratings_data.empty:
# #             print(ratings_data.head())
# #             ratings_data.to_csv("goodreads_ratings.csv", index=False)
#             ratings_data.to_csv('goodreads_ratings.csv', mode='a', header=False, index=False)
#             print("Data saved to goodreads_ratings.csv.")
#         else:
#             print("No data retrieved.")


In [4]:
df = pd.read_csv('goodreads_ratings_series.csv')
print(df.shape)
df = df.drop_duplicates()
# Print the entire DataFrame
print(df)

duplicate_count = df['Title'].duplicated().sum()
print("Number of books with at least two people rating it:", duplicate_count)
duplicate_counts_per_value = df['Title'].value_counts()
print(duplicate_counts_per_value)
print("Number of unique books: ", df['Title'].nunique())
num_users = df['User_id'].nunique()
user_ids = list(df['User_id'].unique())
print("number of users is: ", num_users)
print("user_ids = ", user_ids)

(209600, 6)
                                                    Title Rating    User_id  \
0                                       I Am Watching You      3  169695558   
1       Three to Get Deadly\n        (Stephanie Plum, #3)      3  169695558   
2       Before the Coffee Gets Cold\n        (Before t...      4  169695558   
3       Dark Sacred Night\n        (Renée Ballard, #2;...      4  169695558   
4         Two for the Dough\n        (Stephanie Plum, #2)      4  169695558   
...                                                   ...    ...        ...   
209595                            The Old Man and the Sea      5    9497971   
209596  The Hitchhikerâ€™s Guide to the Galaxy\n      ...      5    9497971   
209597                                Slaughterhouse-Five      5    9497971   
209598                              To Kill a Mockingbird      5    9497971   
209599  Ben BuradayÄ±m: OÄŸuz Atay'Ä±n Biyografik ve K...      4    9497972   

        Series  First  Suggest  
0     

In [5]:
# # Get a list of top titles in order
# top_titles = duplicate_counts_per_value.index.tolist()
# top_100 = top_titles[:100]

# for title in top_100:
#     print(title)
    
# with open("top_100.pkl", "wb") as file:
#     pickle.dump(top_100, file)

In [6]:
# threshold = 5#num_users * 0.1
# pop_titles = list(duplicate_counts_per_value[duplicate_counts_per_value > threshold].index)
# my_titles = df.loc[df["User_id"] == 169695558, "Title"].tolist()
# # print(my_titles)

# print("pop titles len = ", len(pop_titles))
# print(pop_titles)
# print("my titles len = ", len(my_titles))
# titles = list(set(pop_titles))# + my_titles))

# # #remove Harry Potter titles:
# # titles = [s for s in titles if "Harry Potter" not in s]

# num_titles = len(titles)


# print(titles)
# # print(titles)
# print("num_titles =", num_titles)

# # ratings = np.full((num_users, num_titles), None)
# ratings = np.zeros((num_users, num_titles))

# for index, row in df.iterrows():
#     if row['Title'] in titles:
#         try:
#             ratings[user_ids.index(row['User_id']), titles.index(row["Title"])] = int(row["Rating"])
# #             print("found ", row["Title"])
#         except:
#             pass
        
# print("ratings size = ", ratings.shape)
# ratings = ratings[~np.all(ratings == 0, axis=1)]
# print("ratings size = ", ratings.shape)
# # Save the list to a file
# with open("titles.pkl", "wb") as file:
#     pickle.dump(titles, file)

In [7]:
# # print(df['Suggest'])

# suggest = list(df['Suggest'])
# print(suggest)

# with open("suggest.pkl", "wb") as file:
#     suggest.dump(suggest, file)

In [8]:
# ratings_df = pd.DataFrame(ratings)
# print(ratings_df.shape)
# #delete users that don't have any of these ratings
# # ratings_df = ratings_df.loc[~(ratings_df == 0).all(axis=1)]
# ratings_df = ratings_df.loc[(ratings_df != 0).sum(axis=1) >= 4] #need at least 4 entries to stay
# print(ratings_df.shape)

# # Calculate percentage of non-zero elements
# percentage_nonzero = (np.count_nonzero(ratings_df) / ratings_df.size) * 100
# print("percentage_nonzero =", round(percentage_nonzero, 1), '%')

# # Save the list to a file
# with open("ratings_df.pkl", "wb") as file:
#     pickle.dump(ratings_df, file)


In [9]:
with open("titles.pkl", "rb") as file:
    titles = pickle.load(file)
    
with open("top_100.pkl", "rb") as file:
    top_100 = pickle.load(file)
    
with open("suggest.pkl", "rb") as file:
    suggest = pickle.load(file)
    
num_titles = len(titles)

# Load the list from the file
with open("ratings_df.pkl", "rb") as file:
    ratings_df = pickle.load(file)
    ratings = ratings_df.to_numpy()


In [10]:
# Convert the filled ratings data into a numpy array
ratings_matrix = ratings_df.values

# Initialize KNN (using user-based KNN)
import math
knn = NearestNeighbors(n_neighbors=50, metric='cosine')  # Using cosine similarity  math.ceil(num_users/10)
knn.fit(ratings_matrix)

with open("knn_model.pkl", "wb") as file:
    pickle.dump(knn, file)

# Example: Predict rating for User 0 on Item 2
user_id = 0  # Index of user in the matrix
item_id = 2  # Index of item to predict rating for

# Get the nearest neighbors for user 0 (excluding the user itself)
distances, indices = knn.kneighbors([ratings_matrix[user_id]])

pred_ratings_list = np.array([])
rankings_list = np.array([])
for item_id in range(num_titles):
    # Get the ratings for the neighbors on item 2
    neighbor_ratings = np.array([ratings_matrix[i, item_id] for i in indices[0] if not np.isnan(ratings_matrix[i, item_id])])

    predicted_rating = np.mean(neighbor_ratings[np.nonzero(neighbor_ratings)])
    rankings = np.sum(neighbor_ratings[np.nonzero(neighbor_ratings)])

#     print(predicted_rating)

    pred_ratings_list = np.append(pred_ratings_list, predicted_rating)
    rankings_list = np.append(rankings_list, rankings)
#     print(f"Predicted rating for User {user_id} on Item {item_id}: {predicted_rating}")
    
best_book_rating = np.max(pred_ratings_list)
best_book_idx = np.argmax(pred_ratings_list)

sorted_indices = np.argsort(pred_ratings_list)[::-1]
print("Top books are:")
n = 1
for i, idx in enumerate(sorted_indices): 
    if (ratings[0, idx] > 0) or (np.isnan(pred_ratings_list[idx])):
        pass
    else:
        print("#", (n) , titles[idx], "Rating:", round(pred_ratings_list[idx], 1))
        n+=1

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Top books are:
# 1 L.A. Weather Rating: 5.0
# 2 The Mark of Athena
        (The Heroes of Olympus, #3) Rating: 5.0
# 3 Harry Potter Series Box Set
        (Harry Potter, #1-7) Rating: 5.0
# 4 Fullmetal Alchemist, Vol. 1 Rating: 5.0
# 5 The Merchant of Death
        (Pendragon, #1) Rating: 5.0
# 6 Gregor the Overlander
        (Underland Chronicles, #1) Rating: 5.0
# 7 The Akhenaten Adventure
        (Children of the Lamp, #1) Rating: 5.0
# 8 Flyte
        (Septimus Heap, #2) Rating: 5.0
# 9 Shadow Puppets
        (The Shadow Series, #3) Rating: 5.0
# 10 Dragons of Winter Night
        (Dragonlance: Chronicles, #2) Rating: 5.0
# 11 Ruin and Rising
        (The Shadow and Bone Trilogy, #3) Rating: 5.0
# 12 Fantastic Beasts and Where to Find Them Rating: 5.0
# 13 A Monster Calls Rating: 5.0
# 14 When You Reach Me Rating: 5.0
# 15 Blueberries for Sal Rating: 5.0
# 16 Inkspell
        (Inkworld, #2) Rating: 5.0
# 17 Maus I: A Survivor's Tale: My Father Bleeds History
        (Maus, #1) Rati

# 385 Smokin' Seventeen
        (Stephanie Plum, #17) Rating: 4.0
# 386 The Curious Incident of the Dog in the Night-Time Rating: 4.0
# 387 The Things They Carried Rating: 4.0
# 388 A Reliable Wife Rating: 4.0
# 389 I'll Be Gone in the Dark: One Woman's Obsessive Search for the Golden State Killer Rating: 4.0
# 390 High Fidelity Rating: 4.0
# 391 Dear John Rating: 4.0
# 392 Three Cups of Tea: One Man's Mission to Promote Peace ... One School at a Time Rating: 4.0
# 393 Frindle Rating: 4.0
# 394 A Darker Shade of Magic
        (Shades of Magic, #1) Rating: 4.0
# 395 Explosive Eighteen
        (Stephanie Plum, #18) Rating: 4.0
# 396 The Disreputable History of Frankie Landau-Banks Rating: 4.0
# 397 Handle with Care Rating: 4.0
# 398 The Maze Runner
        (The Maze Runner, #1) Rating: 4.0
# 399 Howards End Rating: 4.0
# 400 Fifty Shades Freed
        (Fifty Shades, #3) Rating: 4.0
# 401 Surely You're Joking, Mr. Feynman!: Adventures of a Curious Character Rating: 4.0
# 402 City of Falle

# 733 Much Ado About Nothing Rating: 3.5
# 734 Christy Rating: 3.5
# 735 Charlotteâ€™s Web Rating: 3.4
# 736 Matched
        (Matched, #1) Rating: 3.4
# 737 Green Eggs and Ham Rating: 3.3
# 738 Beloved
        (Beloved Trilogy, #1) Rating: 3.3
# 739 Speak Rating: 3.3
# 740 Atlas Shrugged Rating: 3.3
# 741 The Sisterhood of the Traveling Pants
        (Sisterhood, #1) Rating: 3.3
# 742 The Cat in the Hat
        (Cat in the Hat, #1) Rating: 3.3
# 743 Something Blue
        (Darcy & Rachel, #2) Rating: 3.3
# 744 A Walk to Remember Rating: 3.3
# 745 Can You Keep a Secret? Rating: 3.3
# 746 Pretties
        (Uglies, #2) Rating: 3.3
# 747 Into the Wild Rating: 3.3
# 748 Little Women Rating: 3.3
# 749 Sense and Sensibility Rating: 3.2
# 750 The Alchemist Rating: 3.2
# 751 Something Borrowed
        (Darcy & Rachel, #1) Rating: 3.2
# 752 Cocktails for Three Rating: 3.0
# 753 Ghosts Rating: 3.0
# 754 Old Yeller Rating: 3.0
# 755 Dearly Devoted Dexter
        (Dexter, #2) Rating: 3.0
# 756 Barr

In [11]:
#make knn for the similar user part
# Initialize KNN (using user-based KNN)
# import math
knn_30 = NearestNeighbors(n_neighbors=50, metric='cosine')  # Using cosine similarity
knn_30.fit(ratings_matrix)

with open("knn_model_30.pkl", "wb") as file:
    pickle.dump(knn_30, file)

In [12]:
rankings_list

best_book_rating = np.max(rankings_list)
best_book_idx = np.argmax(rankings_list)

sorted_indices = np.argsort(rankings_list)[::-1]
print("Top books are:")
for i, idx in enumerate(sorted_indices): 
    print("#", (i+1) , titles[idx], "Rating:", round(pred_ratings_list[idx], 1), ". Ranking:", rankings_list[idx])

Top books are:
# 1 Harry Potter and the Prisoner of Azkaban
        (Harry Potter, #3) Rating: 4.7 . Ranking: 222.0
# 2 Harry Potter and the Goblet of Fire
        (Harry Potter, #4) Rating: 4.8 . Ranking: 216.0
# 3 Harry Potter and the Deathly Hallows
        (Harry Potter, #7) Rating: 4.8 . Ranking: 214.0
# 4 Harry Potter and the Order of the Phoenix
        (Harry Potter, #5) Rating: 4.5 . Ranking: 209.0
# 5 Harry Potter and the Chamber of Secrets
        (Harry Potter, #2) Rating: 4.5 . Ranking: 206.0
# 6 Harry Potter and the Half-Blood Prince
        (Harry Potter, #6) Rating: 4.6 . Ranking: 205.0
# 7 Harry Potter and the Sorcerer's Stone
        (Harry Potter, #1) Rating: 4.7 . Ranking: 200.0
# 8 The Hunger Games
        (The Hunger Games, #1) Rating: 4.7 . Ranking: 156.0
# 9 Catching Fire
        (The Hunger Games, #2) Rating: 4.6 . Ranking: 128.0
# 10 Twilight
        (The Twilight Saga, #1) Rating: 3.9 . Ranking: 124.0
# 11 Mockingjay
        (The Hunger Games, #3) Rating: 4.3

# 284 Because of Winn-Dixie Rating: 4.0 . Ranking: 8.0
# 285 Hamlet Rating: 2.7 . Ranking: 8.0
# 286 Into the Wilderness
        (Wilderness, #1) Rating: 4.0 . Ranking: 8.0
# 287 Atonement Rating: 4.0 . Ranking: 8.0
# 288 I'm Glad My Mom Died Rating: 4.0 . Ranking: 8.0
# 289 Bridget Jones: The Edge of Reason
        (Bridget Jones, #2) Rating: 4.0 . Ranking: 8.0
# 290 Charlie and the Chocolate Factory
        (Charlie Bucket, #1) Rating: 4.0 . Ranking: 8.0
# 291 Betrayed
        (House of Night, #2) Rating: 4.0 . Ranking: 8.0
# 292 It Ends with Us
        (It Ends with Us, #1) Rating: 4.0 . Ranking: 8.0
# 293 Fast Food Nation: The Dark Side of the All-American Meal Rating: 4.0 . Ranking: 8.0
# 294 Fight Club Rating: 2.7 . Ranking: 8.0
# 295 If I Stay
        (If I Stay, #1) Rating: 4.0 . Ranking: 8.0
# 296 The Absolutely True Diary of a Part-Time Indian Rating: 4.0 . Ranking: 8.0
# 297 City of Lost Souls
        (The Mortal Instruments, #5) Rating: 4.0 . Ranking: 8.0
# 298 Anne of Gree

        (Fablehaven, #2) Rating: 5.0 . Ranking: 5.0
# 540 The Hero and the Crown
        (Damar, #2) Rating: 5.0 . Ranking: 5.0
# 541 Lock and Key Rating: 5.0 . Ranking: 5.0
# 542 A Discovery of Witches
        (All Souls, #1) Rating: 5.0 . Ranking: 5.0
# 543 Safe Haven Rating: 5.0 . Ranking: 5.0
# 544 Corduroy Rating: 5.0 . Ranking: 5.0
# 545 The Undomestic Goddess Rating: 5.0 . Ranking: 5.0
# 546 The Two Princesses of Bamarre
        (The Two Princesses of Bamarre, #1) Rating: 5.0 . Ranking: 5.0
# 547 The Mists of Avalon
        (Avalon, #1) Rating: 5.0 . Ranking: 5.0
# 548 Blood of the Fold
        (Sword of Truth, #3) Rating: 5.0 . Ranking: 5.0
# 549 Swan Song Rating: 5.0 . Ranking: 5.0
# 550 Love on the Brain Rating: 5.0 . Ranking: 5.0
# 551 Dracula Rating: 5.0 . Ranking: 5.0
# 552 A Bend in the Road Rating: 5.0 . Ranking: 5.0
# 553 Ms. Marvel, Vol. 1: No Normal Rating: 5.0 . Ranking: 5.0
# 554 These Happy Golden Years
        (Little House, #8) Rating: 5.0 . Ranking: 5.0
# 555 Th

        (Vampire Academy, #5) Rating: 4.0 . Ranking: 4.0
# 804 Before the Coffee Gets Cold
        (Before the Coffee Gets Cold, #1) Rating: 4.0 . Ranking: 4.0
# 805 Mike Mulligan and His Steam Shovel Rating: 4.0 . Ranking: 4.0
# 806 The Things They Carried Rating: 4.0 . Ranking: 4.0
# 807 Plain Truth Rating: 4.0 . Ranking: 4.0
# 808 Bleak House Rating: 4.0 . Ranking: 4.0
# 809 Black Beauty Rating: 4.0 . Ranking: 4.0
# 810 High Fidelity Rating: 4.0 . Ranking: 4.0
# 811 The Innocent Rating: 4.0 . Ranking: 4.0
# 812 Dragons of Autumn Twilight
        (Dragonlance: Chronicles, #1) Rating: 4.0 . Ranking: 4.0
# 813 Tell No One Rating: 4.0 . Ranking: 4.0
# 814 11/22/63 Rating: 4.0 . Ranking: 4.0
# 815 Rodrick Rules
        (Diary of a Wimpy Kid, #2) Rating: 4.0 . Ranking: 4.0
# 816 Lullaby Rating: 4.0 . Ranking: 4.0
# 817 A Darker Shade of Magic
        (Shades of Magic, #1) Rating: 4.0 . Ranking: 4.0
# 818 Scarlett Rating: 4.0 . Ranking: 4.0
# 819 Tara Road Rating: 4.0 . Ranking: 4.0
# 820 

        (The Watch Hill Trilogy, #1) Rating: nan . Ranking: 0.0
# 1069 Who Do You Love Rating: nan . Ranking: 0.0
# 1070 One Summer: America, 1927 Rating: nan . Ranking: 0.0
# 1071 The Woman in Cabin 10 Rating: nan . Ranking: 0.0
# 1072 Daughter of No Worlds
        (The War of Lost Hearts, #1) Rating: nan . Ranking: 0.0
# 1073 Run for Your Life
        (Michael Bennett, #2) Rating: nan . Ranking: 0.0
# 1074 The Match
        (It Happened in Charleston, #1) Rating: nan . Ranking: 0.0
# 1075 Good Luck with That Rating: nan . Ranking: 0.0
# 1076 These Is My Words: The Diary of Sarah Agnes Prine, 1881-1901, Arizona Territories
        (Sarah Agnes Prine, #1) Rating: nan . Ranking: 0.0
# 1077 Memories of My Melancholy Whores Rating: nan . Ranking: 0.0
# 1078 The Secret Keeper Rating: nan . Ranking: 0.0
# 1079 Shiver
        (The Wolves of Mercy Falls, #1) Rating: nan . Ranking: 0.0
# 1080 Broken Harbor
        (Dublin Murder Squad, #4) Rating: nan . Ranking: 0.0
# 1081 Everything's Eventua

# 1351 Hearts in Atlantis Rating: nan . Ranking: 0.0
# 1352 Light in August Rating: nan . Ranking: 0.0
# 1353 The Overdue Life of Amy Byler Rating: nan . Ranking: 0.0
# 1354 The Year of the Flood
        (MaddAddam, #2) Rating: nan . Ranking: 0.0
# 1355 The Quiet American Rating: nan . Ranking: 0.0
# 1356 The Argonauts Rating: nan . Ranking: 0.0
# 1357 Simple Genius
        (Sean King & Michelle Maxwell, #3) Rating: nan . Ranking: 0.0
# 1358 Just After Sunset Rating: nan . Ranking: 0.0
# 1359 The Mistake
        (Off-Campus, #2) Rating: nan . Ranking: 0.0
# 1360 Lover at Last
        (Black Dagger Brotherhood, #11) Rating: nan . Ranking: 0.0
# 1361 P.S. I Still Love You
        (To All the Boys I've Loved Before, #2) Rating: nan . Ranking: 0.0
# 1362 Open Book Rating: nan . Ranking: 0.0
# 1363 Pattern Recognition
        (Blue Ant, #1) Rating: nan . Ranking: 0.0
# 1364 The Time Travelerâ€™s Wife Rating: nan . Ranking: 0.0
# 1365 Ø§Ù„Ù‚Ø±Ø¢Ù† Ø§Ù„ÙƒØ±ÙŠÙ… Rating: nan . Ranking: 0.0
# 13

        (Need, #1) Rating: nan . Ranking: 0.0
# 1584 The Body Keeps the Score: Brain, Mind, and Body in the Healing of Trauma Rating: nan . Ranking: 0.0
# 1585 The Secret Keeper of Jaipur
        (The Jaipur Trilogy, #2) Rating: nan . Ranking: 0.0
# 1586 Better than the Movies
        (Better than the Movies, #1) Rating: nan . Ranking: 0.0
# 1587 Ramona Quimby, Age 8
        (Ramona, #6) Rating: nan . Ranking: 0.0
# 1588 Worst Wingman Ever
        (The Improbable Meet-Cute, #2) Rating: nan . Ranking: 0.0
# 1589 Us Rating: nan . Ranking: 0.0
# 1590 Manhattan Beach Rating: nan . Ranking: 0.0
# 1591 Plum Island
        (John Corey, #1) Rating: nan . Ranking: 0.0
# 1592 The Kingmaker's Daughter
        (The Plantagenet and Tudor Novels, #4; Cousins War, #4) Rating: nan . Ranking: 0.0
# 1593 A Curse for True Love
        (Once Upon a Broken Heart, #3) Rating: nan . Ranking: 0.0
# 1594 Dust Child Rating: nan . Ranking: 0.0
# 1595 Such a Fun Age Rating: nan . Ranking: 0.0
# 1596 Ø«Ù„Ø§Ø«ÙŠØ© 

# 1839 The Family Experiment Rating: nan . Ranking: 0.0
# 1840 Runaway: Stories Rating: nan . Ranking: 0.0
# 1841 Perfect Chemistry
        (Perfect Chemistry, #1) Rating: nan . Ranking: 0.0
# 1842 Lillian Boxfish Takes a Walk Rating: nan . Ranking: 0.0
# 1843 Before I Let Go
        (Skyland, #1) Rating: nan . Ranking: 0.0
# 1844 Lock In
        (Lock In, #1) Rating: nan . Ranking: 0.0
# 1845 Six of Crows
        (Six of Crows, #1) Rating: nan . Ranking: 0.0
# 1846 Sharp Objects Rating: nan . Ranking: 0.0
# 1847 Watermelon
        (Walsh Family, #1) Rating: nan . Ranking: 0.0
# 1848 Once Burned
        (Night Prince, #1) Rating: nan . Ranking: 0.0
# 1849 Wide Sargasso Sea Rating: nan . Ranking: 0.0
# 1850 The Care and Feeding of Ravenously Hungry Girls Rating: nan . Ranking: 0.0
# 1851 The Twisted Ones Rating: nan . Ranking: 0.0
# 1852 Redhead by the Side of the Road Rating: nan . Ranking: 0.0
# 1853 The Sandman, Vol. 5: A Game of You Rating: nan . Ranking: 0.0
# 1854 Pale Demon
     

        (Mercy Thompson, #3) Rating: nan . Ranking: 0.0
# 2105 Love Unwritten
        (Lakefront Billionaires, #2) Rating: nan . Ranking: 0.0
# 2106 Effortless
        (Thoughtless, #2) Rating: nan . Ranking: 0.0
# 2107 Sybil: The Classic True Story of a Woman Possessed by Sixteen Personalities Rating: nan . Ranking: 0.0
# 2108 Walking Disaster
        (Beautiful, #2) Rating: nan . Ranking: 0.0
# 2109 Yours Truly
        (Part of Your World, #2) Rating: nan . Ranking: 0.0
# 2110 Clear Rating: nan . Ranking: 0.0
# 2111 The Coincidence of Callie & Kayden
        (The Coincidence, #1) Rating: nan . Ranking: 0.0
# 2112 A Lovely Lie Rating: nan . Ranking: 0.0
# 2113 The Dead-Tossed Waves
        (The Forest of Hands and Teeth, #2) Rating: nan . Ranking: 0.0
# 2114 Beautiful Ruins Rating: nan . Ranking: 0.0
# 2115 The Warm Hands of Ghosts Rating: nan . Ranking: 0.0
# 2116 Everything I Never Told You Rating: nan . Ranking: 0.0
# 2117 Wolves of the Calla
        (The Dark Tower, #5) Rating: na

# 2367 Do Not Disturb Rating: nan . Ranking: 0.0
# 2368 The Story of a New Name
        (Neapolitan Novels, #2) Rating: nan . Ranking: 0.0
# 2369 Out of the Silent Planet
        (The Space Trilogy, #1) Rating: nan . Ranking: 0.0
# 2370 Worst Case Scenario Rating: nan . Ranking: 0.0
# 2371 Without Fail
        (Jack Reacher, #6) Rating: nan . Ranking: 0.0
# 2372 The Almost Moon Rating: nan . Ranking: 0.0
# 2373 Transfer of Power
        (Mitch Rapp, #3) Rating: nan . Ranking: 0.0
# 2374 Love Story Rating: nan . Ranking: 0.0
# 2375 A Girl Called Samson Rating: nan . Ranking: 0.0
# 2376 Upgrade Rating: nan . Ranking: 0.0
# 2377 Not That Kind of Girl: A Young Woman Tells You What She's "Learned" Rating: nan . Ranking: 0.0
# 2378 A Midsummer Night's Dream Rating: nan . Ranking: 0.0
# 2379 Worth Dying For
        (Jack Reacher, #15) Rating: nan . Ranking: 0.0
# 2380 When We Believed in Mermaids Rating: nan . Ranking: 0.0
# 2381 Magician's Gambit
        (The Belgariad #3) Rating: nan . Rank

# 2637 The True Love Experiment Rating: nan . Ranking: 0.0
# 2638 The Three-Body Problem
        (Remembrance of Earthâ€™s Past, #1) Rating: nan . Ranking: 0.0
# 2639 Once Upon a River Rating: nan . Ranking: 0.0
# 2640 The Unbearable Lightness of Being Rating: nan . Ranking: 0.0
# 2641 Everything is Illuminated Rating: nan . Ranking: 0.0
# 2642 The Light Pirate Rating: nan . Ranking: 0.0
# 2643 The Bedwetter: Stories of Courage, Redemption, and Pee Rating: nan . Ranking: 0.0
# 2644 The Secret Book of Flora Lea Rating: nan . Ranking: 0.0
# 2645 Who Is Maud Dixon? Rating: nan . Ranking: 0.0
# 2646 Saving CeeCee Honeycutt Rating: nan . Ranking: 0.0
# 2647 Death du Jour
        (Temperance Brennan, #2) Rating: nan . Ranking: 0.0
# 2648 Flawless
        (Chestnut Springs, #1) Rating: nan . Ranking: 0.0
# 2649 Utopia Rating: nan . Ranking: 0.0
# 2650 In an Instant Rating: nan . Ranking: 0.0
# 2651 The Sugar Queen Rating: nan . Ranking: 0.0
# 2652 The Tattooist of Auschwitz Rating: nan . Rank

        (Villains, #1) Rating: nan . Ranking: 0.0
# 2905 Maybe You Should Talk to Someone Rating: nan . Ranking: 0.0
# 2906 Binti
        (Binti, #1) Rating: nan . Ranking: 0.0
# 2907 The Ballad of Songbirds and Snakes
        (The Hunger Games, #0) Rating: nan . Ranking: 0.0
# 2908 The Physick Book of Deliverance Dane
        (The Physick Book, #1) Rating: nan . Ranking: 0.0
# 2909 God Emperor of Dune
        (Dune #4) Rating: nan . Ranking: 0.0
# 2910 The Stone Sky
        (The Broken Earth, #3) Rating: nan . Ranking: 0.0
# 2911 The Other Einstein Rating: nan . Ranking: 0.0
# 2912 Black and Blue Rating: nan . Ranking: 0.0
# 2913 A Single Thread Rating: nan . Ranking: 0.0
# 2914 Maybe Not
        (Maybe, #1.5) Rating: nan . Ranking: 0.0
# 2915 The Ride of Her Life: The True Story of a Woman, Her Horse, and Their Last-Chance Journey Across America Rating: nan . Ranking: 0.0
# 2916 The Silmarillion Rating: nan . Ranking: 0.0
# 2917 A Fine Balance Rating: nan . Ranking: 0.0
# 2918 Rivers

# 3167 Creation Lake Rating: nan . Ranking: 0.0
# 3168 Wrong Place Wrong Time Rating: nan . Ranking: 0.0
# 3169 The Angel Maker Rating: nan . Ranking: 0.0
# 3170 The Silent Sister
        (Riley MacPherson, #1) Rating: nan . Ranking: 0.0
# 3171 Remarkable Creatures Rating: nan . Ranking: 0.0
# 3172 Defending Jacob Rating: nan . Ranking: 0.0
# 3173 Migrations Rating: nan . Ranking: 0.0
# 3174 Fool Rating: nan . Ranking: 0.0
# 3175 Counting by 7s Rating: nan . Ranking: 0.0
# 3176 A Storm of Swords 2: Blood and Gold
        (A Song of Ice and Fire, #3, Part 2 of 2) Rating: nan . Ranking: 0.0
# 3177 The Dreamers Rating: nan . Ranking: 0.0
# 3178 Worst Case
        (Michael Bennett, #3) Rating: nan . Ranking: 0.0
# 3179 Good in Bed
        (Cannie Shapiro, #1) Rating: nan . Ranking: 0.0
# 3180 The Guest Rating: nan . Ranking: 0.0
# 3181 The Day of the Triffids Rating: nan . Ranking: 0.0
# 3182 The Dream Daughter Rating: nan . Ranking: 0.0
# 3183 Morality for Beautiful Girls
        (No. 1 L

        (Stillhouse Lake, #2) Rating: nan . Ranking: 0.0
# 3431 A Gate at the Stairs Rating: nan . Ranking: 0.0
# 3432 'Tis Rating: nan . Ranking: 0.0
# 3433 The Rainbow Comes and Goes: A Mother and Son on Life, Love, and Loss Rating: nan . Ranking: 0.0
# 3434 Bloodsucking Fiends
        (A Love Story, #1) Rating: nan . Ranking: 0.0
# 3435 The Right Move
        (Windy City, #2) Rating: nan . Ranking: 0.0
# 3436 The Fall Rating: nan . Ranking: 0.0
# 3437 The Cardinal of the Kremlin
        (Jack Ryan, #4) Rating: nan . Ranking: 0.0
# 3438 The Art of War Rating: nan . Ranking: 0.0
# 3439 Absalom, Absalom! Rating: nan . Ranking: 0.0
# 3440 The Reluctant Fundamentalist Rating: nan . Ranking: 0.0
# 3441 The Devotion of Suspect X
        (Detective Galileo, #1) Rating: nan . Ranking: 0.0
# 3442 A Court of Silver Flames
        (A Court of Thorns and Roses, #4) Rating: nan . Ranking: 0.0
# 3443 The Answer Is No Rating: nan . Ranking: 0.0
# 3444 Today Will Be Different Rating: nan . Ranking: 

# 3695 Little Bee Rating: nan . Ranking: 0.0
# 3696 The Metamorphosis and Other Stories Rating: nan . Ranking: 0.0
# 3697 Unravel Me
        (Shatter Me, #2) Rating: nan . Ranking: 0.0
# 3698 Light on Snow Rating: nan . Ranking: 0.0
# 3699 Honor Rating: nan . Ranking: 0.0
# 3700 Half Magic
        (Tales of Magic, #1) Rating: nan . Ranking: 0.0
# 3701 White Fragility: Why It's So Hard for White People to Talk About Racism Rating: nan . Ranking: 0.0
# 3702 When They Call You a Terrorist: A Black Lives Matter Memoir Rating: nan . Ranking: 0.0
# 3703 The Zombie Survival Guide: Complete Protection from the Living Dead Rating: nan . Ranking: 0.0
# 3704 Private
        (Private, #1) Rating: nan . Ranking: 0.0
# 3705 Executive Orders
        (Jack Ryan, #8) Rating: nan . Ranking: 0.0
# 3706 Dial A for Aunties
        (Aunties, #1) Rating: nan . Ranking: 0.0
# 3707 The Lies I Tell Rating: nan . Ranking: 0.0
# 3708 I Am America Rating: nan . Ranking: 0.0
# 3709 Uncle Tomâ€™s Cabin Rating: nan .

# 3962 An Officer and a Spy Rating: nan . Ranking: 0.0
# 3963 Beach Read Rating: nan . Ranking: 0.0
# 3964 This Is Why We Lied
        (Will Trent, #12) Rating: nan . Ranking: 0.0
# 3965 The Candy House Rating: nan . Ranking: 0.0
# 3966 Alice's Adventures in Wonderland
        (Alice's Adventures in Wonderland, #1) Rating: nan . Ranking: 0.0
# 3967 Death in Venice Rating: nan . Ranking: 0.0
# 3968 Only Time Will Tell
        (The Clifton Chronicles, #1) Rating: nan . Ranking: 0.0
# 3969 Half Asleep in Frog Pajamas Rating: nan . Ranking: 0.0
# 3970 Skin Game
        (The Dresden Files, #15) Rating: nan . Ranking: 0.0
# 3971 Cross
        (Alex Cross, #12) Rating: nan . Ranking: 0.0
# 3972 For One More Day Rating: nan . Ranking: 0.0
# 3973 Fables, Vol. 7: Arabian Nights Rating: nan . Ranking: 0.0
# 3974 Norwegian Wood Rating: nan . Ranking: 0.0
# 3975 Midnightâ€™s Children Rating: nan . Ranking: 0.0
# 3976 Absolution Rating: nan . Ranking: 0.0
# 3977 The Running Man Rating: nan . Ranking

        (The Space Trilogy, #2) Rating: nan . Ranking: 0.0
# 4227 Turtles All the Way Down Rating: nan . Ranking: 0.0
# 4228 The Poetry of Robert Frost Rating: nan . Ranking: 0.0
# 4229 Amsterdam Rating: nan . Ranking: 0.0
# 4230 A Line to Kill
        (Hawthorne & Horowitz #3) Rating: nan . Ranking: 0.0
# 4231 The Chosen Rating: nan . Ranking: 0.0
# 4232 Sphere Rating: nan . Ranking: 0.0
# 4233 The Killing Dance
        (Anita Blake, Vampire Hunter, #6) Rating: nan . Ranking: 0.0
# 4234 The Claiming of Sleeping Beauty
        (Sleeping Beauty, #1) Rating: nan . Ranking: 0.0
# 4235 The Lake House
        (When the Wind Blows, #2) Rating: nan . Ranking: 0.0
# 4236 Johnny Got His Gun Rating: nan . Ranking: 0.0
# 4237 The Submission Rating: nan . Ranking: 0.0
# 4238 Way of the Peaceful Warrior: A Book That Changes Lives Rating: nan . Ranking: 0.0
# 4239 The Vegetarian Rating: nan . Ranking: 0.0
# 4240 The Undead Pool
        (The Hollows, #12) Rating: nan . Ranking: 0.0
# 4241 The Dead Zo

        (Discworld, #32; Tiffany Aching, #2) Rating: nan . Ranking: 0.0
# 4495 The Silver Linings Playbook Rating: nan . Ranking: 0.0
# 4496 Wildfire
        (Maple Hills, #2) Rating: nan . Ranking: 0.0
# 4497 The Ersatz Elevator
        (A Series of Unfortunate Events, #6) Rating: nan . Ranking: 0.0
# 4498 The Only One Left Rating: nan . Ranking: 0.0
# 4499 The Dog Stars Rating: nan . Ranking: 0.0
# 4500 The Pilgrim's Progress Rating: nan . Ranking: 0.0
# 4501 Legends & Lattes
        (Legends & Lattes, #1) Rating: nan . Ranking: 0.0
# 4502 Artificial Condition
        (The Murderbot Diaries, #2) Rating: nan . Ranking: 0.0
# 4503 The Street Lawyer Rating: nan . Ranking: 0.0
# 4504 At Home: A Short History of Private Life Rating: nan . Ranking: 0.0
# 4505 Miracle Creek Rating: nan . Ranking: 0.0
# 4506 Notes from Underground, White Nights, The Dream of a Ridiculous Man, and Selections from The House of the Dead Rating: nan . Ranking: 0.0
# 4507 Casino Royale
        (James Bond, #1) Ra

# 4757 Cyrano de Bergerac Rating: nan . Ranking: 0.0
# 4758 After the Flood Rating: nan . Ranking: 0.0
# 4759 Remember Me? Rating: nan . Ranking: 0.0
# 4760 River Marked
        (Mercy Thompson, #6) Rating: nan . Ranking: 0.0
# 4761 Northern Lights Rating: nan . Ranking: 0.0
# 4762 Quicksilver
        (The Baroque Cycle, #1) Rating: nan . Ranking: 0.0
# 4763 Love and Respect: The Love She Most Desires; The Respect He Desperately Needs Rating: nan . Ranking: 0.0
# 4764 The Painted Veil Rating: nan . Ranking: 0.0
# 4765 The Double Bind Rating: nan . Ranking: 0.0
# 4766 Dear Martin
        (Dear Martin, #1) Rating: nan . Ranking: 0.0
# 4767 The Girl Who Kicked the Hornet's Nest
        (Millennium, #3) Rating: nan . Ranking: 0.0
# 4768 The Battle of Corrin
        (Legends of Dune, #3) Rating: nan . Ranking: 0.0
# 4769 Blackwood Farm
        (The Vampire Chronicles, #9) Rating: nan . Ranking: 0.0
# 4770 The Sweetness of Forgetting Rating: nan . Ranking: 0.0
# 4771 Guardians of the West
  

        (Bartimaeus, #0.5) Rating: nan . Ranking: 0.0
# 5019 A Morbid Taste for Bones
        (Chronicles of Brother Cadfael, #1) Rating: nan . Ranking: 0.0
# 5020 Dark Roads Rating: nan . Ranking: 0.0
# 5021 Behind Closed Doors Rating: nan . Ranking: 0.0
# 5022 Glint
        (The Plated Prisoner, #2) Rating: nan . Ranking: 0.0
# 5023 Desperation Rating: nan . Ranking: 0.0
# 5024 Dragon Bound
        (Elder Races, #1) Rating: nan . Ranking: 0.0
# 5025 The Girl Who Circumnavigated Fairyland in a Ship of Her Own Making
        (Fairyland, #1) Rating: nan . Ranking: 0.0
# 5026 Jitterbug Perfume Rating: nan . Ranking: 0.0
# 5027 The Eye of the World
        (The Wheel of Time, #1) Rating: nan . Ranking: 0.0
# 5028 Wanted
        (Pretty Little Liars, #8) Rating: nan . Ranking: 0.0
# 5029 Adam Rating: nan . Ranking: 0.0
# 5030 The Diamond Throne
        (The Elenium, #1) Rating: nan . Ranking: 0.0
# 5031 Batman: Year One Rating: nan . Ranking: 0.0
# 5032 Heartless
        (Pretty Little Lia

        (Three Sisters Island, #2) Rating: nan . Ranking: 0.0
# 5282 Finale
        (Caraval, #3) Rating: nan . Ranking: 0.0
# 5283 Red Mars
        (Mars Trilogy, #1) Rating: nan . Ranking: 0.0
# 5284 Colorless Tsukuru Tazaki and His Years of Pilgrimage Rating: nan . Ranking: 0.0
# 5285 Childhoodâ€™s End Rating: nan . Ranking: 0.0
# 5286 The Hitchhiker’s Guide to the Galaxy
        (Hitchhiker's Guide to the Galaxy, #1) Rating: nan . Ranking: 0.0
# 5287 Midnight Rating: nan . Ranking: 0.0
# 5288 Blue Nights Rating: nan . Ranking: 0.0
# 5289 The Lost Boy
        (Dave Pelzer #2) Rating: nan . Ranking: 0.0
# 5290 Ben-Hur: A Tale of the Christ Rating: nan . Ranking: 0.0
# 5291 Changeless
        (Parasol Protectorate, #2) Rating: nan . Ranking: 0.0
# 5292 To Rise Again at a Decent Hour Rating: nan . Ranking: 0.0
# 5293 If You Tell: A True Story of Murder, Family Secrets, and the Unbreakable Bond of Sisterhood Rating: nan . Ranking: 0.0
# 5294 Purgatory Ridge
        (Cork O'Connor, #3) R

In [13]:
#give a list sorted out with books you've already read:
# sorted_indices = np.argsort(pred_ratings_list)[::-1]
print("Top books are:")
list_num = 1
for idx in sorted_indices: 
#     print("ratings_matrix[user_id, idx]= ", ratings_matrix[user_id, idx])
    if (ratings_matrix[user_id, idx] > 0) or (np.isnan(pred_ratings_list[idx])):
        continue
    print("#", list_num , titles[idx], "Rating:", round(pred_ratings_list[idx], 1))
    list_num += 1

Top books are:
# 1 Harry Potter and the Sorcerer's Stone
        (Harry Potter, #1) Rating: 4.7
# 2 The da Vinci Code
        (Robert Langdon, #2) Rating: 4.0
# 3 Eclipse
        (The Twilight Saga, #3) Rating: 4.0
# 4 Breaking Dawn
        (The Twilight Saga, #4) Rating: 3.8
# 5 Pride and Prejudice Rating: 4.3
# 6 The Hobbit, or There and Back Again
        (The Lord of the Rings, #0) Rating: 4.5
# 7 The Kite Runner Rating: 4.4
# 8 The Great Gatsby Rating: 3.5
# 9 The Sea of Monsters
        (Percy Jackson and the Olympians, #2) Rating: 4.1
# 10 The Lovely Bones Rating: 3.7
# 11 The Battle of the Labyrinth
        (Percy Jackson and the Olympians, #4) Rating: 4.0
# 12 The Help Rating: 4.8
# 13 The Last Olympian
        (Percy Jackson and the Olympians, #5) Rating: 4.3
# 14 Of Mice and Men Rating: 3.5
# 15 Lord of the Flies Rating: 3.0
# 16 The Lion, the Witch and the Wardrobe
        (Chronicles of Narnia, #1) Rating: 4.1
# 17 Where the Sidewalk Ends Rating: 3.6
# 18 The Diary of a Yo

        (The Mortal Instruments, #4) Rating: 4.0
# 218 Running with Scissors Rating: 2.7
# 219 Sizzling Sixteen
        (Stephanie Plum, #16) Rating: 4.0
# 220 Fearless Fourteen
        (Stephanie Plum, #14) Rating: 4.0
# 221 Prince Caspian
        (Chronicles of Narnia, #2) Rating: 4.0
# 222 Deadlocked
        (Sookie Stackhouse, #12) Rating: 4.0
# 223 The Pact Rating: 4.0
# 224 In Cold Blood Rating: 4.0
# 225 The Lorax Rating: 4.0
# 226 The Truth About Forever Rating: 4.0
# 227 A Good Girl's Guide to Murder
        (A Good Girl's Guide to Murder, #1) Rating: 4.0
# 228 Because of Winn-Dixie Rating: 4.0
# 229 Into the Wilderness
        (Wilderness, #1) Rating: 4.0
# 230 Atonement Rating: 4.0
# 231 I'm Glad My Mom Died Rating: 4.0
# 232 Bridget Jones: The Edge of Reason
        (Bridget Jones, #2) Rating: 4.0
# 233 Charlie and the Chocolate Factory
        (Charlie Bucket, #1) Rating: 4.0
# 234 Betrayed
        (House of Night, #2) Rating: 4.0
# 235 It Ends with Us
        (It Ends wit

        (Winnie-the-Pooh, #2) Rating: 4.0
# 561 Oliver Twist Rating: 4.0
# 562 The Iron King
        (The Iron Fey, #1) Rating: 4.0
# 563 4th of July
        (Women's Murder Club, #4) Rating: 4.0
# 564 Before We Were Innocent Rating: 4.0
# 565 Lit Rating: 4.0
# 566 Dexter by Design
        (Dexter, #4) Rating: 4.0
# 567 The Choice Rating: 4.0
# 568 The Complete Tales Rating: 4.0
# 569 Chicka Chicka Boom Boom Rating: 4.0
# 570 The Iron Daughter
        (The Iron Fey, #2) Rating: 4.0
# 571 A Passage to India Rating: 4.0
# 572 Moonwalking with Einstein: The Art and Science of Remembering Everything Rating: 4.0
# 573 The Road Rating: 4.0
# 574 Howards End Rating: 4.0
# 575 The Blood of Olympus
        (The Heroes of Olympus, #5) Rating: 4.0
# 576 The Kitchen House Rating: 4.0
# 577 The Night Circus Rating: 4.0
# 578 The Shell Seekers Rating: 4.0
# 579 Untamed Rating: 4.0
# 580 Tess of the D’Urbervilles Rating: 4.0
# 581 The Shining
        (The Shining, #1) Rating: 4.0
# 582 The Hound of t

        (Roald Dahl's Autobiography, #1) Rating: 3.0
# 925 Where the Heart Is Rating: 3.0
# 926 2nd Chance
        (Women's Murder Club, #2) Rating: 3.0
# 927 13 Little Blue Envelopes
        (Little Blue Envelope, #1) Rating: 3.0
# 928 Snow Falling on Cedars Rating: 3.0
# 929 Julie and Julia: 365 Days, 524 Recipes, 1 Tiny Apartment Kitchen Rating: 2.0
# 930 The Bridges of Madison County Rating: 2.0
# 931 Ashes to Ashes
        (Kovac and Liska, #1) Rating: 2.0
# 932 The Winter Sea
        (Slains, #1) Rating: 2.0
# 933 Vampire Kisses
        (Vampire Kisses, #1) Rating: 2.0
# 934 Extras
        (Uglies, #4) Rating: 2.0
# 935 Inkdeath
        (Inkworld, #3) Rating: 2.0
# 936 Lucky Rating: 2.0
# 937 A Farewell to Arms Rating: 2.0
# 938 The Toll
        (Arc of a Scythe, #3) Rating: 2.0
# 939 Out of the Dust Rating: 2.0
# 940 1776 Rating: 2.0
# 941 Beautiful Creatures
        (Caster Chronicles, #1) Rating: 2.0
# 942 A Spool of Blue Thread Rating: 2.0
# 943 Twilight: The Complete Illustr

In [14]:
idx = 8
this_ratings = ratings_matrix[indices[0,idx]]
print(this_ratings)

my_ratings = ratings_matrix[indices[0,0]]
print(my_ratings)

for i, rating in enumerate(this_ratings):
    if rating > 0 and my_ratings[i]>0:
        print("-", titles[i], ", their Rating:", rating, " My Rating:", my_ratings[i])

# print(distances, indices[-1,-1])

[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
- The Hunger Games
        (The Hunger Games, #1) , their Rating: 5.0  My Rating: 5.0
- A Tale of Two Cities , their Rating: 5.0  My Rating: 2.0
- Eragon
        (The Inheritance Cycle, #1) , their Rating: 4.0  My Rating: 5.0
- Harry Potter and the Half-Blood Prince
        (Harry Potter, #6) , their Rating: 4.0  My Rating: 5.0
- Romeo and Juliet , their Rating: 3.0  My Rating: 2.0
- The Catcher in the Rye , their Rating: 4.0  My Rating: 4.0
- To Kill a Mockingbird , their Rating: 5.0  My Rating: 5.0
- The Scarlet Letter , their Rating: 5.0  My Rating: 2.0
- New Moon
        (The Twilight Saga, #2) , their Rating: 5.0  My Rating: 3.0
- Mockingjay
        (The Hunger Games, #3) , their Rating: 5.0  My Rating: 5.0
- Harry Potter and the Order of the Phoenix
        (Harry Potter, #5) , their Rating: 4.0  My Rating: 5.0
- Harry Potter and the Goblet of Fire
        (Harry Potter, #4) , their Rating: 4.0  My Rating: 5.0
- Twilight
        (Th

In [15]:
#find most similar books using cosine similarity
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_similarity

similarity_matrix = cosine_similarity(ratings_matrix.T)

similarity_df = pd.DataFrame(similarity_matrix, index=titles, columns=titles)

# Function to get k nearest neighbors for a movie
def get_similar_book(book_name, k=3):
    similar_book = similarity_df[book_name].sort_values(ascending=False)[1:k+1]
    return similar_book

book_name = 'First Lie Wins'
print("\nTop 5 similar book to", book_name, ":")
print(get_similar_book(book_name, k=5))


Top 5 similar book to First Lie Wins :
She's Not Sorry                0.602133
Listen for the Lie             0.591416
Darling Girls                  0.578486
The Last One at the Wedding    0.568676
The Fury                       0.563608
Name: First Lie Wins, dtype: float64


In [16]:
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.impute import SimpleImputer

# Example user rating data (rows = users, columns = items)
ratings_df = pd.DataFrame(ratings)

# Step 1: Handle missing values using imputation (we will fill missing values with the mean rating)
imputer = SimpleImputer(strategy='mean')
ratings_filled = imputer.fit_transform(ratings_df)

# Step 2: Apply KMeans clustering
# We will use 2 clusters as an example
kmeans = KMeans(n_clusters=int(num_users/10), random_state=42)
clusters = kmeans.fit_predict(ratings_filled)

# Step 3: Add the cluster labels to the original DataFrame
ratings_df['Cluster'] = clusters

# # Print the user ratings with cluster assignments
# print("\nUser Ratings with Clusters:")
# print(ratings_df)

# # Step 4: Print the cluster centers (the centroid of each cluster)
# print("\nCluster Centers (Centroids):")
# print(kmeans.cluster_centers_)


user_id = 0
# print(clusters)

cluster_this_user = clusters[user_id]
# print(cluster_this_user)

pred_ratings_list = kmeans.cluster_centers_[user_id]
# print(pred_ratings_list)
# for i in range(len())

#give a list sorted out with books you've already read:
sorted_indices = np.argsort(pred_ratings_list)[::-1]
print("Top books are:")
list_num = 1
for idx in sorted_indices: 
#     print("ratings_matrix[user_id, idx]= ", ratings_matrix[user_id, idx])
    if (ratings_matrix[user_id, idx] > 0) or (np.isnan(pred_ratings_list[idx])) :
        continue
    print("#", list_num , titles[idx], "Rating:", round(pred_ratings_list[idx], 1))
    list_num += 1



Top books are:
# 1 Sharp Objects Rating: 5.0
# 2 The Girl on the Train Rating: 5.0
# 3 Water for Elephants Rating: 4.7
# 4 The Girl with the Dragon Tattoo
        (Millennium, #1) Rating: 4.3
# 5 Big Little Lies Rating: 3.3
# 6 Mr. Mercedes
        (Bill Hodges Trilogy, #1) Rating: 3.3
# 7 Small Great Things Rating: 3.3
# 8 11/22/63 Rating: 3.3
# 9 The Book Thief Rating: 3.3
# 10 Gone Girl Rating: 3.3
# 11 The Lovely Bones Rating: 3.0
# 12 The Girl Who Played with Fire
        (Millennium, #2) Rating: 3.0
# 13 The Help Rating: 3.0
# 14 The Host
        (The Host, #1) Rating: 3.0
# 15 Dark Places Rating: 3.0
# 16 Memoirs of a Geisha Rating: 2.7
# 17 Never Let Me Go Rating: 2.7
# 18 Atonement Rating: 2.7
# 19 The Goldfinch Rating: 2.7
# 20 Leaving Time Rating: 2.7
# 21 Fifty Shades Freed
        (Fifty Shades, #3) Rating: 2.3
# 22 Under the Dome Rating: 2.3
# 23 Drowning Ruth Rating: 2.3
# 24 Fifty Shades of Grey
        (Fifty Shades, #1) Rating: 2.3
# 25 Fifty Shades Darker
        (Fi

# 269 And Then There Were None Rating: 0.0
# 270 Verity Rating: 0.0
# 271 Fourth Wing
        (The Empyrean, #1) Rating: 0.0
# 272 Coraline Rating: 0.0
# 273 Under the Tuscan Sun: At Home in Italy Rating: 0.0
# 274 Angelaâ€™s Ashes
        (Frank McCourt, #1) Rating: 0.0
# 275 Bossypants Rating: 0.0
# 276 The Invisible Life of Addie LaRue Rating: 0.0
# 277 The Metamorphosis Rating: 0.0
# 278 The Voyage of the Dawn Treader
        (Chronicles of Narnia, #3) Rating: 0.0
# 279 The Stranger Rating: 0.0
# 280 A Feast for Crows
        (A Song of Ice and Fire, #4) Rating: 0.0
# 281 Girl with a Pearl Earring Rating: 0.0
# 282 Daisy Jones & The Six Rating: 0.0
# 283 Moby-Dick or, The Whale Rating: 0.0
# 284 Artemis Fowl
        (Artemis Fowl, #1) Rating: 0.0
# 285 The House in the Cerulean Sea
        (Cerulean Chronicles, #1) Rating: 0.0
# 286 It Ends with Us
        (It Ends with Us, #1) Rating: 0.0
# 287 Into Thin Air: A Personal Account of the Mt. Everest Disaster Rating: 0.0
# 288 A Storm

        (Anne of Green Gables, #1-8) Rating: 0.0
# 543 The Tea Girl of Hummingbird Lane Rating: 0.0
# 544 Valentine Rating: 0.0
# 545 The Red Pyramid
        (The Kane Chronicles, #1) Rating: 0.0
# 546 Summer Sisters Rating: 0.0
# 547 Animal, Vegetable, Miracle: A Year of Food Life Rating: 0.0
# 548 The Long Walk Rating: 0.0
# 549 What Happened to Nina? Rating: 0.0
# 550 Alias Grace Rating: 0.0
# 551 Mexican Gothic Rating: 0.0
# 552 Anna and the French Kiss
        (Anna and the French Kiss, #1) Rating: 0.0
# 553 The Phantom of the Opera Rating: 0.0
# 554 Disgrace Rating: 0.0
# 555 Starship Troopers Rating: 0.0
# 556 Darkly Dreaming Dexter
        (Dexter, #1) Rating: 0.0
# 557 Dreamfever
        (Fever, #4) Rating: 0.0
# 558 The Guncle
        (The Guncle, #1) Rating: 0.0
# 559 Shopaholic Ties the Knot
        (Shopaholic, #3) Rating: 0.0
# 560 The Immortalists Rating: 0.0
# 561 Elantris
        (Elantris, #1) Rating: 0.0
# 562 A Woman Is No Man Rating: 0.0
# 563 The Queen of the Damn

# 827 The Broken Girls Rating: 0.0
# 828 Angels' Blood
        (Guild Hunter, #1) Rating: 0.0
# 829 The Anomaly Rating: 0.0
# 830 Silence Fallen
        (Mercy Thompson, #10) Rating: 0.0
# 831 The End of the Affair Rating: 0.0
# 832 Nine Lives Rating: 0.0
# 833 Changeless
        (Parasol Protectorate, #2) Rating: 0.0
# 834 The Complete Poems Rating: 0.0
# 835 Force of Nature
        (Aaron Falk, #2) Rating: 0.0
# 836 Dearly Devoted Dexter
        (Dexter, #2) Rating: 0.0
# 837 Forever... Rating: 0.0
# 838 On the Beach Rating: 0.0
# 839 Bonk: The Curious Coupling of Science and Sex Rating: 0.0
# 840 Blue Nights Rating: 0.0
# 841 Bury My Heart at Wounded Knee: An Indian History of the American West Rating: 0.0
# 842 Doomsday Book
        (Oxford Time Travel, #1) Rating: 0.0
# 843 Don't Look for Me Rating: 0.0
# 844 O Pioneers! Rating: 0.0
# 845 The Passenger Rating: 0.0
# 846 One Plus One Rating: 0.0
# 847 If Beale Street Could Talk Rating: 0.0
# 848 Scott Pilgrim's Precious Little Life

        (Sarah, Plain and Tall, #1) Rating: 0.0
# 1116 The Perfect Son Rating: 0.0
# 1117 Revelations
        (Blue Bloods, #3) Rating: 0.0
# 1118 At First Sight
        (Jeremy Marsh & Lexie Darnell, #2) Rating: 0.0
# 1119 Forever Rating: 0.0
# 1120 A Is for Alibi
        (Kinsey Millhone #1) Rating: 0.0
# 1121 Girl, Woman, Other Rating: 0.0
# 1122 The Crossing
        (Harry Bosch, #18; Harry Bosch Universe, #28) Rating: 0.0
# 1123 The Time Keeper Rating: 0.0
# 1124 Worst Wingman Ever
        (The Improbable Meet-Cute, #2) Rating: 0.0
# 1125 Peter and the Starcatchers
        (Peter and the Starcatchers, #1) Rating: 0.0
# 1126 The Artist's Way: A Spiritual Path to Higher Creativity Rating: 0.0
# 1127 Bridge of Sighs Rating: 0.0
# 1128 Notorious Nineteen
        (Stephanie Plum, #19) Rating: 0.0
# 1129 The Concrete Blonde
        (Harry Bosch, #3; Harry Bosch Universe, #3) Rating: 0.0
# 1130 Getting Things Done: The Art of Stress-Free Productivity Rating: 0.0
# 1131 Black-Eyed Susans 

# 1401 The Rise and Fall of the Third Reich: A History of Nazi Germany Rating: 0.0
# 1402 His Majesty's Dragon
        (Temeraire, #1) Rating: 0.0
# 1403 Fool Me Once Rating: 0.0
# 1404 Fables, Vol. 9: Sons of Empire Rating: 0.0
# 1405 The Boy in the Field Rating: 0.0
# 1406 Fullmetal Alchemist, Vol. 1 Rating: 0.0
# 1407 Wild at Heart: Discovering the Secret of a Man's Soul Rating: 0.0
# 1408 The Twilight Saga Rating: 0.0
# 1409 Bee Season Rating: 0.0
# 1410 One Grave at a Time
        (Night Huntress, #6) Rating: 0.0
# 1411 Angus, Thongs and Full-Frontal Snogging
        (Confessions of Georgia Nicolson, #1) Rating: 0.0
# 1412 Streams of Silver
        (Forgotten Realms: Icewind Dale, #2; Legend of Drizzt, #5) Rating: 0.0
# 1413 The Trap Rating: 0.0
# 1414 The Museum of Extraordinary Things Rating: 0.0
# 1415 A Happier Life Rating: 0.0
# 1416 The Mad Ship
        (Liveship Traders, #2) Rating: 0.0
# 1417 The Broken Circle: A Memoir of Escaping Afghanistan Rating: 0.0
# 1418 The Incred

# 1703 Snow Rating: 0.0
# 1704 My Utmost for His Highest Rating: 0.0
# 1705 The Death Cure
        (The Maze Runner, #3) Rating: 0.0
# 1706 The Day of the Jackal Rating: 0.0
# 1707 The Golem's Eye
        (Bartimaeus, #2) Rating: 0.0
# 1708 The Graham Effect
        (Campus Diaries, #1) Rating: 0.0
# 1709 The Strange Case of Dr. Jekyll and Mr. Hyde and Other Tales of Terror Rating: 0.0
# 1710 Nausea Rating: 0.0
# 1711 Where the Forest Meets the Stars Rating: 0.0
# 1712 Spark of the Everflame
        (Kindred's Curse, #1) Rating: 0.0
# 1713 The Return Rating: 0.0
# 1714 The Guide Rating: 0.0
# 1715 Jamaica Inn Rating: 0.0
# 1716 The Boy from the Woods
        (Wilde, #1) Rating: 0.0
# 1717 The Liar Rating: 0.0
# 1718 A Town Like Alice Rating: 0.0
# 1719 The Bookstore Sisters Rating: 0.0
# 1720 The Turn of the Screw Rating: 0.0
# 1721 Don't Sweat the Small Stuff ... and It's All Small Stuff: Simple Ways to Keep the Little Things From Taking Over Your Life Rating: 0.0
# 1722 The Jane Aust

# 2001 After the Flood Rating: 0.0
# 2002 Peace Talks
        (The Dresden Files, #16) Rating: 0.0
# 2003 Smoke Gets in Your Eyes & Other Lessons from the Crematory Rating: 0.0
# 2004 The Coldest Girl in Coldtown Rating: 0.0
# 2005 All the Weyrs of Pern
        (Pern, #11) Rating: 0.0
# 2006 Twilight Eyes Rating: 0.0
# 2007 Tweak: Growing Up On Methamphetamines Rating: 0.0
# 2008 Catherine the Great: Portrait of a Woman Rating: 0.0
# 2009 The House at Watch Hill
        (The Watch Hill Trilogy, #1) Rating: 0.0
# 2010 The Third Wife Rating: 0.0
# 2011 When All Is Said Rating: 0.0
# 2012 The Hundred Thousand Kingdoms
        (Inheritance, #1) Rating: 0.0
# 2013 The Mystery of the Blue Train
        (Hercule Poirot, #6) Rating: 0.0
# 2014 The Lake House
        (When the Wind Blows, #2) Rating: 0.0
# 2015 The Hollow
        (Sign of Seven, #2) Rating: 0.0
# 2016 A Midsummer Night's Dream Rating: 0.0
# 2017 Kiss an Angel Rating: 0.0
# 2018 Tricky Twenty-Two
        (Stephanie Plum, #22) Ra

# 2251 The Once and Future Witches Rating: 0.0
# 2252 Sundays at Tiffany's Rating: 0.0
# 2253 The Toll
        (Arc of a Scythe, #3) Rating: 0.0
# 2254 Even Cowgirls Get the Blues Rating: 0.0
# 2255 Heart of a Dog Rating: 0.0
# 2256 Forever, Interrupted Rating: 0.0
# 2257 Ham on Rye Rating: 0.0
# 2258 Tuesdays with Morrie Rating: 0.0
# 2259 North and South Rating: 0.0
# 2260 Sleeping Giants
        (Themis Files, #1) Rating: 0.0
# 2261 Iona Iverson's Rules for Commuting Rating: 0.0
# 2262 The Palace of Illusions Rating: 0.0
# 2263 Possible Side Effects Rating: 0.0
# 2264 Something in the Water Rating: 0.0
# 2265 What You Wish For Rating: 0.0
# 2266 Betty Rating: 0.0
# 2267 The Hero of Ages
        (Mistborn, #3) Rating: 0.0
# 2268 Parable of the Sower
        (Earthseed, #1) Rating: 0.0
# 2269 North and South
        (North and South, #1) Rating: 0.0
# 2270 Key of Knowledge
        (Key Trilogy, #2) Rating: 0.0
# 2271 We the Animals Rating: 0.0
# 2272 The Handmaid’s Tale
        (The H

# 2414 Second Glance Rating: 0.0
# 2415 Messenger
        (The Giver, #3) Rating: 0.0
# 2416 Alex Cross's Trial
        (Alex Cross, #15) Rating: 0.0
# 2417 The Girl with All the Gifts
        (The Girl With All the Gifts, #1) Rating: 0.0
# 2418 A Promised Land Rating: 0.0
# 2419 Horton Hatches the Egg Rating: 0.0
# 2420 Lamb: The Gospel According to Biff, Christ’s Childhood Pal Rating: 0.0
# 2421 Beautiful Darkness
        (Caster Chronicles, #2) Rating: 0.0
# 2422 The Ten Thousand Doors of January Rating: 0.0
# 2423 Silas Marner Rating: 0.0
# 2424 Farmer Boy
        (Little House, #2) Rating: 0.0
# 2425 My Murder Rating: 0.0
# 2426 The Problem of Pain Rating: 0.0
# 2427 The Lies of Locke Lamora
        (Gentleman Bastard, #1) Rating: 0.0
# 2428 Rosemary and Rue
        (October Daye, #1) Rating: 0.0
# 2429 Unsheltered Rating: 0.0
# 2430 Half a King
        (Shattered Sea, #1) Rating: 0.0
# 2431 The Truth About Melody Browne Rating: 0.0
# 2432 Along Came a Spider
        (Alex Cross, 

# 2715 Everything I Know About Love Rating: 0.0
# 2716 Nightmares and Dreamscapes Rating: 0.0
# 2717 The Castaways
        (Nantucket, #2) Rating: 0.0
# 2718 Red, White & Royal Blue Rating: 0.0
# 2719 The Bridges of Madison County Rating: 0.0
# 2720 If You Would Have Told Me Rating: 0.0
# 2721 King of Greed
        (Kings of Sin, #3) Rating: 0.0
# 2722 Eric
        (Discworld, #9; Rincewind, #4) Rating: 0.0
# 2723 The Prince of Tides Rating: 0.0
# 2724 Blood Work
        (Harry Bosch Universe, #7) Rating: 0.0
# 2725 A Million Junes Rating: 0.0
# 2726 A Game of Thrones: The Graphic Novel, Volume One Rating: 0.0
# 2727 Tehanu
        (Earthsea Cycle, #4) Rating: 0.0
# 2728 Ordinary People Rating: 0.0
# 2729 The Couple Next Door Rating: 0.0
# 2730 Before I Let You Go Rating: 0.0
# 2731 Plainsong
        (Plainsong, #1) Rating: 0.0
# 2732 We Are All Completely Beside Ourselves Rating: 0.0
# 2733 Moonraker
        (James Bond, #3) Rating: 0.0
# 2734 The Circle
        (The Circle, #1) Ratin

# 3036 Don't Let Go Rating: 0.0
# 3037 Go the Fuck to Sleep Rating: 0.0
# 3038 Blow Fly
        (Kay Scarpetta, #12) Rating: 0.0
# 3039 Antigone
        (The Theban Plays, #3) Rating: 0.0
# 3040 Under Currents Rating: 0.0
# 3041 White Noise Rating: 0.0
# 3042 Marley and Me: Life and Love With the World’s Worst Dog Rating: 0.0
# 3043 The Sparrow
        (The Sparrow, #1) Rating: 0.0
# 3044 Save Me the Plums: My Gourmet Memoir Rating: 0.0
# 3045 The Year of the Flood
        (MaddAddam, #2) Rating: 0.0
# 3046 The Tale of the Body Thief
        (The Vampire Chronicles, #4) Rating: 0.0
# 3047 Pyramids
        (Discworld, #7) Rating: 0.0
# 3048 People We Meet on Vacation Rating: 0.0
# 3049 Battle Hymn of the Tiger Mother Rating: 0.0
# 3050 A Great Deliverance
        (Inspector Lynley, #1) Rating: 0.0
# 3051 Clap When You Land Rating: 0.0
# 3052 Origin
        (Robert Langdon, #5) Rating: 0.0
# 3053 One Good Turn
        (Jackson Brodie, #2) Rating: 0.0
# 3054 The Bride Test
        (The Ki

# 3279 Anything Is Possible
        (Amgash, #2) Rating: 0.0
# 3280 The Guilt Trip Rating: 0.0
# 3281 Emergency Contact Rating: 0.0
# 3282 Julius Caesar Rating: 0.0
# 3283 Portrait of a Killer: Jack the Ripper - Case Closed Rating: 0.0
# 3284 Under the Never Sky
        (Under the Never Sky, #1) Rating: 0.0
# 3285 Sanctuary Rating: 0.0
# 3286 On Basilisk Station
        (Honor Harrington, #1) Rating: 0.0
# 3287 Fallen
        (Fallen, #1) Rating: 0.0
# 3288 You Shouldn't Have Come Here Rating: 0.0
# 3289 Christine Rating: 0.0
# 3290 Royal Assassin
        (Farseer Trilogy, #2) Rating: 0.0
# 3291 Inside the O'Briens Rating: 0.0
# 3292 The Chain Rating: 0.0
# 3293 Whispers Rating: 0.0
# 3294 Legacy Rating: 0.0
# 3295 Girl Wash your Face Rating: 0.0
# 3296 Winter Street
        (Winter, #1) Rating: 0.0
# 3297 Before We Were Strangers Rating: 0.0
# 3298 Days at the Morisaki Bookshop
        (Days at the Morisaki Bookshop, #1) Rating: 0.0
# 3299 The Good Part Rating: 0.0
# 3300 The Christma

# 3537 The God of the Woods Rating: 0.0
# 3538 Dragonsong
        (Harper Hall, #1) Rating: 0.0
# 3539 Thinking, Fast and Slow Rating: 0.0
# 3540 The Illustrated Man Rating: 0.0
# 3541 The World Is Flat: A Brief History of the Twenty-first Century Rating: 0.0
# 3542 The Diviners
        (The Diviners, #1) Rating: 0.0
# 3543 Twisted Hate
        (Twisted, #3) Rating: 0.0
# 3544 Blindsighted
        (Grant County, #1) Rating: 0.0
# 3545 The Prime of Miss Jean Brodie Rating: 0.0
# 3546 The Beach House Rating: 0.0
# 3547 Love You Forever Rating: 0.0
# 3548 Waiting for Godot Rating: 0.0
# 3549 A Little Life Rating: 0.0
# 3550 The Younger Wife Rating: 0.0
# 3551 Relic
        (Pendergast, #1) Rating: 0.0
# 3552 The Waste Land Rating: 0.0
# 3553 Girl in Translation Rating: 0.0
# 3554 The Light We Lost Rating: 0.0
# 3555 John Dies at the End
        (John Dies at the End, #1) Rating: 0.0
# 3556 Make Me
        (Jack Reacher, #20) Rating: 0.0
# 3557 Disappearing Earth Rating: 0.0
# 3558 The Las

# 3827 Illusions: The Adventures of a Reluctant Messiah Rating: 0.0
# 3828 Regretting You Rating: 0.0
# 3829 Tess of the Dâ€™Urbervilles Rating: 0.0
# 3830 Something Wicked This Way Comes Rating: 0.0
# 3831 Heart of Darkness Rating: 0.0
# 3832 Wintersmith
        (Discworld, #35; Tiffany Aching, #3) Rating: 0.0
# 3833 Lock Every Door Rating: 0.0
# 3834 False Witness Rating: 0.0
# 3835 Piranesi Rating: 0.0
# 3836 Things Fall Apart
        (The African Trilogy, #1) Rating: 0.0
# 3837 Cross Country
        (Alex Cross, #14) Rating: 0.0
# 3838 Family of Liars Rating: 0.0
# 3839 Strega Nona Rating: 0.0
# 3840 In the Dark Rating: 0.0
# 3841 Circle of Friends Rating: 0.0
# 3842 Sourcery
        (Discworld, #5; Rincewind, #3) Rating: 0.0
# 3843 The Weekenders Rating: 0.0
# 3844 Cry, the Beloved Country Rating: 0.0
# 3845 The Right Stuff Rating: 0.0
# 3846 Magic Bleeds
        (Kate Daniels, #4) Rating: 0.0
# 3847 Still Life
        (Chief Inspector Armand Gamache, #1) Rating: 0.0
# 3848 Uncle 

# 4127 A Streetcar Named Desire Rating: 0.0
# 4128 A Fatal Grace
        (Chief Inspector Armand Gamache, #2) Rating: 0.0
# 4129 A Caribbean Mystery
        (Miss Marple, #9) Rating: 0.0
# 4130 The Emperor's Soul Rating: 0.0
# 4131 Belladonna
        (Belladonna, #1) Rating: 0.0
# 4132 The Man in the Iron Mask Rating: 0.0
# 4133 Melmoth Rating: 0.0
# 4134 Tara Road Rating: 0.0
# 4135 The Summons Rating: 0.0
# 4136 One True Loves Rating: 0.0
# 4137 The House of the Seven Gables Rating: 0.0
# 4138 Batman: The Long Halloween Rating: 0.0
# 4139 Let's Pretend This Never Happened: A Mostly True Memoir Rating: 0.0
# 4140 The Patient's Secret Rating: 0.0
# 4141 The Regulators Rating: 0.0
# 4142 The Whistler
        (The Whistler, #1) Rating: 0.0
# 4143 The Known World Rating: 0.0
# 4144 The Bourne Supremacy
        (Jason Bourne, #2) Rating: 0.0
# 4145 Shatter Me
        (Shatter Me, #1) Rating: 0.0
# 4146 Dumplin'
        (Dumplin', #1) Rating: 0.0
# 4147 The Searcher Rating: 0.0
# 4148 Lucki

# 4428 I Don't Forgive You Rating: 0.0
# 4429 The School of Essential Ingredients Rating: 0.0
# 4430 True Grit Rating: 0.0
# 4431 Warbreaker Rating: 0.0
# 4432 The Replacement Rating: 0.0
# 4433 Cold Mountain Rating: 0.0
# 4434 Call Me By Your Name
        (Call Me By Your Name, #1) Rating: 0.0
# 4435 Princess: A True Story of Life Behind the Veil in Saudi Arabia Rating: 0.0
# 4436 Oathbringer
        (The Stormlight Archive, #3) Rating: 0.0
# 4437 Heir of Fire
        (Throne of Glass, #3) Rating: 0.0
# 4438 Definitely Dead
        (Sookie Stackhouse, #6) Rating: 0.0
# 4439 House of Earth and Blood
        (Crescent City, #1) Rating: 0.0
# 4440 The Devil and Miss Prym Rating: 0.0
# 4441 A Column of Fire
        (Kingsbridge, #3) Rating: 0.0
# 4442 The Complete Stories Rating: 0.0
# 4443 Fox in Socks Rating: 0.0
# 4444 Winter's Heart
        (The Wheel of Time, #9) Rating: 0.0
# 4445 The List Rating: 0.0
# 4446 The Quickie Rating: 0.0
# 4447 Scarpetta
        (Kay Scarpetta, #16) Ratin

# 4732 The Woman in Cabin 10 Rating: 0.0
# 4733 Breakfast of Champions Rating: 0.0
# 4734 The Hotel Nantucket Rating: 0.0
# 4735 Takedown Twenty
        (Stephanie Plum, #20) Rating: 0.0
# 4736 The Cheat Sheet
        (The Cheat Sheet, #1) Rating: 0.0
# 4737 8th Confession
        (Women's Murder Club, #8) Rating: 0.0
# 4738 The Girl He Used to Know Rating: 0.0
# 4739 Journey to the Center of the Earth Rating: 0.0
# 4740 Those Who Leave and Those Who Stay
        (Neapolitan Novels, #3) Rating: 0.0
# 4741 The Dictionary of Lost Words Rating: 0.0
# 4742 After the End Rating: 0.0
# 4743 Heaven is for Real: A Little Boy's Astounding Story of His Trip to Heaven and Back Rating: 0.0
# 4744 Purgatory Ridge
        (Cork O'Connor, #3) Rating: 0.0
# 4745 To Rise Again at a Decent Hour Rating: 0.0
# 4746 Ben-Hur: A Tale of the Christ Rating: 0.0
# 4747 The Lost Boy
        (Dave Pelzer #2) Rating: 0.0
# 4748 Midnight Rating: 0.0
# 4749 The Hitchhiker’s Guide to the Galaxy
        (Hitchhiker's 

# 5037 Cheaper by the Dozen Rating: 0.0
# 5038 Anya's Ghost Rating: 0.0
# 5039 The Remains of the Day Rating: 0.0
# 5040 Evermore
        (The Immortals, #1) Rating: 0.0
# 5041 The Story of Ferdinand Rating: 0.0
# 5042 The Bell Jar Rating: 0.0
# 5043 12th of Never
        (Women's Murder Club, #12) Rating: 0.0
# 5044 Legend
        (The Drenai Saga, #1) Rating: 0.0
# 5045 Valley of the Dolls Rating: 0.0
# 5046 Glory Road Rating: 0.0
# 5047 The Humans Rating: 0.0
# 5048 King of Wrath
        (Kings of Sin, #1) Rating: 0.0
# 5049 Carmilla Rating: 0.0
# 5050 What Alice Forgot Rating: 0.0
# 5051 Grip of the Shadow Plague
        (Fablehaven, #3) Rating: 0.0
# 5052 All Her Little Secrets Rating: 0.0
# 5053 Then She Was Gone Rating: 0.0
# 5054 The Widow
        (Kate Waters, #1) Rating: 0.0
# 5055 Her Final Breath
        (Tracy Crosswhite, #2) Rating: 0.0
# 5056 How the Irish Saved Civilization: The Untold Story of Ireland's Heroic Role from the Fall of Rome to the Rise of Medieval Europe R

In [None]:
#doing masked autoencoder
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
from torch.utils.data import DataLoader, TensorDataset, random_split

# Mask for observed values (1 for observed, 0 for missing)
ratings_torch = torch.tensor(ratings).float()
mask = (ratings_torch != 0).float()
mask_tensor = torch.tensor(mask)


#Define autoencoder
class SparseAutoencoder(nn.Module):
    def __init__(self, num_items, latent_dim):
        super(SparseAutoencoder, self).__init__()
        self.encoder = nn.Linear(num_items, latent_dim)
        self.decoder = nn.Linear(latent_dim, num_items)
        
    def forward(self, x):
        encoded = torch.relu(self.encoder(x))
        decoded = self.decoder(encoded)
        # Scale sigmoid output to [1, 5]
        return 1 + 4 * torch.sigmoid(decoded)
        return decoded

    
#initialize the model
num_users, num_items = ratings_torch.shape
np.save("num_users.npy", np.array(num_users))
np.save("num_items.npy", np.array(num_items))
latent_dim = 100#int(num_items/4) # Number of latent features

model = SparseAutoencoder(num_items, latent_dim)
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Use MSE loss but only consider observed values
def masked_mse_loss(reconstructed, original, mask):
    loss = ((reconstructed - original) ** 2) * mask
    return loss.sum() / mask.sum()

#break up data into train and val
dataset = TensorDataset(ratings_torch, mask_tensor) #keeping the mask
print("ratings_torch shape =", ratings_torch.shape)
print(len(dataset))
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

print("len(train_loader) = ", len(train_loader))
print("len(val_loader) = ", len(val_loader))

#train the model
epochs = 5000
best_loss = 10e10
counter = 0
for epoch in range(epochs):
    train_loss = 0.0
    for inputs, mask in train_loader:
        model.train()
        optimizer.zero_grad()

        # Forward pass
        reconstructed = model(inputs)
        loss = masked_mse_loss(reconstructed, inputs, mask)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()

    train_loss /= len(train_loader)
    
    if (epoch + 1) % 10 == 0:
#         print(f"Epoch {epoch + 1}/{epochs}, Loss: {train_loss}")
        #check validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, mask in val_loader:
                outputs = model(inputs)
                loss = masked_mse_loss(outputs, inputs, mask)
                val_loss += loss.item()
                

        val_loss /= len(val_loader)
        

        print(f"Epoch {epoch + 1} - Train Loss: {train_loss:.4f} - Val Loss: {val_loss:.4f}")
        if val_loss < best_loss: #if improve then save
            #save model 
            torch.save(model.state_dict(), "model{}.pkl".format(latent_dim))
            best_loss = val_loss
            print("Model saved to model{}.pkl.".format(latent_dim))
            counter = 0
        else:
            counter += 10
            
    if counter > 200:
        print("Done training because of no improvement.")
        break
        
            


  mask_tensor = torch.tensor(mask)


ratings_torch shape = torch.Size([1307, 5316])
1307
len(train_loader) =  33
len(val_loader) =  9
Epoch 10 - Train Loss: 0.7248 - Val Loss: 1.2032
Model saved to model100.pkl.
Epoch 20 - Train Loss: 0.6405 - Val Loss: 1.1211
Model saved to model100.pkl.
Epoch 30 - Train Loss: 0.6129 - Val Loss: 1.1309
Epoch 40 - Train Loss: 0.6347 - Val Loss: 1.1183
Model saved to model100.pkl.
Epoch 50 - Train Loss: 0.6372 - Val Loss: 1.1155
Model saved to model100.pkl.
Epoch 60 - Train Loss: 0.6495 - Val Loss: 1.0946
Model saved to model100.pkl.
Epoch 70 - Train Loss: 0.6627 - Val Loss: 1.1277
Epoch 80 - Train Loss: 0.6843 - Val Loss: 1.1199
Epoch 90 - Train Loss: 0.6796 - Val Loss: 1.0967
Epoch 100 - Train Loss: 0.6824 - Val Loss: 1.1144
Epoch 110 - Train Loss: 0.6754 - Val Loss: 1.1119
Epoch 120 - Train Loss: 0.6963 - Val Loss: 1.0924
Model saved to model100.pkl.
Epoch 130 - Train Loss: 0.7159 - Val Loss: 1.0883
Model saved to model100.pkl.
Epoch 140 - Train Loss: 0.7130 - Val Loss: 1.0942
Epoch 150

In [None]:
num_users, num_items

In [None]:
print(sum(sum(mask)))
print(mask.shape)
print(mask.shape[0] * mask.shape[1])
print(sum(sum(mask)) / (mask.shape[0] * mask.shape[1]))



In [None]:
#Evaulating the model
model.eval()
with torch.no_grad():
    reconstructed = model(ratings_torch)

# Fill missing values in the original matrix
filled_data = ratings_torch.clone()
filled_data[mask_tensor == 0] = reconstructed[mask_tensor == 0]

print("Original Data:\n", ratings_torch)
print("Reconstructed Data:\n", reconstructed)
print("Filled Data:\n", filled_data)


In [None]:
print(reconstructed[0].numpy())
print(ratings_torch[0].numpy())

print((reconstructed[0].numpy()-ratings_torch[0].numpy())/ratings_torch[0].numpy())

In [None]:
pred_ratings_list = reconstructed[0].detach().numpy()

#give a list sorted out with books you've already read:
sorted_indices = np.argsort(pred_ratings_list)[::-1]
print("Top books are:")
list_num = 1
for idx in sorted_indices: 
#     print("ratings_matrix[user_id, idx]= ", ratings_matrix[user_id, idx])
    if  (ratings_matrix[user_id, idx] > 0) or(np.isnan(pred_ratings_list[idx])) :
        continue
    print("#", list_num , titles[idx], " - Predicted Rating:", round(pred_ratings_list[idx], 1))
    list_num += 1

In [None]:
#making weighted loss matrix
percents = np.array([ 2.0839861,   6.38564535, 22.8939068,  37.94135873, 30.69510302])
each_weights = 100/percents
print(each_weights)
print(each_weights.sum())

print(each_weights * percents)

weights_array = np.zeros(ratings_torch.shape)
for i in tqdm(range(len(ratings_torch))):
    for j in range(len(ratings_torch[0])):
        for num in [1, 2, 3, 4, 5]:
            if ratings_torch[i,j] == num:
                weights_array[i,j] = each_weights[num-1]
weights_tensor = torch.tensor(weights_array)

In [None]:
weights[0]

In [None]:
#doing masked autoencoder with weighted loss
latent_dim = 100 # Number of latent features

model = SparseAutoencoder(num_items, latent_dim)
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Use MSE loss with weights but only consider observed values
def masked_mse_loss_diff(reconstructed, original, mask, weights):
    loss = (((reconstructed - original) ** 2) * mask)
    weighted_loss = loss * weights
    return weighted_loss.sum() / mask.sum()/100

#break up data into train and val
print("ratings_torch shape = ", ratings_torch.shape)
print("mask_tensor shape = ", mask_tensor.shape)
print("weights shape = ", weights_tensor.shape)

dataset = TensorDataset(ratings_torch, mask_tensor, weights_tensor) #keeping the mask
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


#train the model
epochs = 5000
best_loss = 10e10
counter = 0
for epoch in range(epochs):
    train_loss = 0.0
    for inputs, mask, this_weight in train_loader:
        model.train()
        optimizer.zero_grad()

        # Forward pass
        reconstructed = model(inputs)
        loss = masked_mse_loss_diff(reconstructed, inputs, mask, this_weight)
#         loss_not_weighted = masked_mse_loss(reconstructed, inputs, mask)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()

    train_loss /= len(train_loader)
    
    if (epoch + 1) % 10 == 0:
#         print(f"Epoch {epoch + 1}/{epochs}, Loss: {train_loss}")
        #check validation
        model.eval()
        val_loss = 0.0
        val_loss_not_weighted = 0.0
        with torch.no_grad():
            for inputs, mask, this_weight in val_loader:
                outputs = model(inputs)
                loss = masked_mse_loss_diff(outputs, inputs, mask, this_weight)
                loss_not_weighted = masked_mse_loss(outputs, inputs, mask)
                val_loss += loss.item()
                val_loss_not_weighted += loss_not_weighted.item()
                

        val_loss /= len(val_loader)
        

        print(f"Epoch {epoch + 1} - Train Loss: {train_loss:.4f} - Val Loss: {val_loss:.4f} - Val not weighted: {val_loss_not_weighted:.4f}" )
        if val_loss < best_loss: #if improve then save
            #save model 
            torch.save(model.state_dict(), "model_weighted{}.pkl".format(latent_dim))
            best_loss = val_loss
            print("Model saved to model_weighted{}.pkl.".format(latent_dim))
            counter = 0
        else:
            counter += 10
            
    if counter > 200:
        print("Done training because of no improvement.")
        break
        
            


In [None]:
dfghj
import torch
from sklearn.model_selection import KFold

# Mask for observed values (1 for observed, 0 for missing)
ratings_torch = torch.tensor(ratings).float()
mask = (ratings_torch != 0).float()
print(mask)


#Define autoencoder
class SparseAutoencoder(nn.Module):
    def __init__(self, num_items, latent_dim):
        super(SparseAutoencoder, self).__init__()
        self.encoder = nn.Linear(num_items, latent_dim)
        self.decoder = nn.Linear(latent_dim, num_items)
        
    def forward(self, x):
        encoded = torch.relu(self.encoder(x))
        decoded = self.decoder(encoded)
        # Scale sigmoid output to [1, 5]
        return 1 + 4 * torch.sigmoid(decoded)
        return decoded

    
#initialize the model
num_users, num_items = ratings_torch.shape
np.save("num_users.npy", np.array(num_users))
np.save("num_items.npy", np.array(num_items))

for latent_dim in [2, 5, 10, 20, 40, 50, 75, 100]:
    print("latent_dim = ", latent_dim)
# latent_dim = 20  # Number of latent features

    model = SparseAutoencoder(num_items, latent_dim)
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    # Define your model, loss function, and optimizer
    # Assuming model, ratings_torch, mask, and optimizer are already defined

    epochs = 1000
    k_folds = 5  # Number of folds for cross-validation
    kf = KFold(n_splits=k_folds, shuffle=True)

    # Store the losses for each fold
    fold_losses = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(ratings_torch)):
#         print(f"\nFold {fold + 1}/{k_folds}")

        # Split the data into training and validation sets
        train_ratings = ratings_torch[train_idx]
        val_ratings = ratings_torch[val_idx]
        train_mask = mask[train_idx]
        val_mask = mask[val_idx]

        # Initialize a new model for each fold
        model = SparseAutoencoder(num_items, latent_dim)
    #     optimizer = optim.Adam(model.parameters(), lr=0.01)

        # Re-initialize optimizer for each fold
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

        # Train the model
        for epoch in range(epochs):
            model.train()
            optimizer.zero_grad()

            # Forward pass for training
            reconstructed = model(train_ratings)
            loss = masked_mse_loss(reconstructed, train_ratings, train_mask)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

#             if (epoch + 1) % 1000 == 0:
#                 print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.4f}")

        # Evaluate the model on the validation set
        model.eval()
        with torch.no_grad():
            reconstructed_val = model(val_ratings)
            val_loss = masked_mse_loss(reconstructed_val, val_ratings, val_mask)

        print(f"Validation Loss for Fold {fold + 1}: {val_loss.item():.4f}")

        # Store the validation loss for this fold
        fold_losses.append(val_loss.item())

    # Print the average validation loss after all folds
    print(f"\nAverage Validation Loss across all folds: {sum(fold_losses)/k_folds:.4f}")


In [None]:
fgh

In [None]:
ratings_matrix[user_id]

In [None]:
# Convert the filled ratings data into a numpy array
ratings_matrix = ratings_df.values

# Initialize KNN (using user-based KNN)
import math
knn = NearestNeighbors(n_neighbors=math.ceil(num_users/10), metric='cosine')  # Using cosine similarity
knn.fit(ratings_matrix)

# Example: Predict rating for User 0 on Item 2
user_id = 0  # Index of user in the matrix
item_id = 9  # Index of item to predict rating for

# Get the nearest neighbors for user 0 (excluding the user itself)
distances, indices = knn.kneighbors([ratings_matrix[user_id]])

pred_ratings_list = np.array([])
rankings_list = np.array([])
for item_id in range(num_titles):
    # Get the ratings for the neighbors on item 2
    neighbor_ratings = np.array([ratings_matrix[i, item_id] for i in indices[0] if not np.isnan(ratings_matrix[i, item_id])])
    print(ratings_matrix[:, item_id])
    print(neighbor_ratings)
    ghjk
    
    predicted_rating = np.mean(neighbor_ratings[np.nonzero(neighbor_ratings)])
    rankings = np.sum(neighbor_ratings[np.nonzero(neighbor_ratings)])

#     print(predicted_rating)

    pred_ratings_list = np.append(pred_ratings_list, predicted_rating)
    rankings_list = np.append(rankings_list, rankings)
#     print(f"Predicted rating for User {user_id} on Item {item_id}: {predicted_rating}")
    
best_book_rating = np.max(pred_ratings_list)
best_book_idx = np.argmax(pred_ratings_list)

sorted_indices = np.argsort(pred_ratings_list)[::-1]
print("Top books are:")
for i, idx in enumerate(sorted_indices): 
    print("#", (i+1) , titles[idx], "Rating:", round(pred_ratings_list[idx], 1))

In [None]:
import numpy as np
import scipy.sparse as sp
from scipy.sparse.linalg import svds

# Create a sparse matrix (CSR format)
A = np.array([[1.0, 0, 0], [0, 2, 3], [4, 0, 6], [0, 0, 0]])
# print(A)
sparse_matrix = sp.csr_matrix(ratings_matrix)

# Perform SVD on the sparse matrix
# k is the number of singular values to compute (you can choose a value smaller than min(m, n))
U, S, VT = svds(sparse_matrix, k=500)

# Output the matrices
print("U (Left Singular Vectors):\n", U)
print("\nS (Singular Values):\n", S)
print("\nVT (Right Singular Vectors - Transposed):\n", VT)

# Reconstruct the matrix from U, S, VT
S_full = np.diag(S)  # Convert singular values to a diagonal matrix
A_reconstructed = np.dot(U, np.dot(S_full, VT))

print("\nReconstructed Matrix A:\n", A_reconstructed)


In [None]:
my_diff = (ratings_matrix[0]- A_reconstructed[0])
print(ratings_matrix.shape)
plt.plot(my_diff, '.')

# for i in range(len(ratings_matrix[0])):
#     if ratings_matrix[0, i] > 0:
#         print(ratings_matrix[0, i], A_reconstructed[0, i], titles[i])
        
for i in range(len(ratings_matrix[0])):
    if ratings_matrix[0, i] == 0 and A_reconstructed[0, i] > 0:
        print(ratings_matrix[0, i], A_reconstructed[0, i], titles[i])


In [None]:
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

# Create a custom colormap with white for 0 and red for non-zero
cmap = mcolors.ListedColormap(['white', 'red'])
bounds = [0, 0.1, 1]  # Set bounds for 0 (white) and non-zero (red)
norm = mcolors.BoundaryNorm(bounds, cmap.N)

# plt.imshow(ratings_matrix - A_reconstructed, cmap=cmap, norm=norm)
plt.plot(ratings_matrix - A_reconstructed)
plt.show()

In [None]:
ratings_matrix - A_reconstructed

In [None]:
from sklearn.cluster import DBSCAN
dbscan = DBSCAN(eps=0.75, min_samples=2, metric='cosine')
labels = dbscan.fit_predict(ratings_matrix)


In [None]:
print(list(set(labels)))
print(labels.shape)

In [None]:
idx_in_group = np.arange(len(labels))
filtered_users = [idx_in_group for idx_in_group, label in zip(idx_in_group, labels) if label == 0]
print(filtered_users)

In [None]:
from sklearn.cluster import SpectralClustering
from scipy.sparse import csr_matrix
import numpy as np

# Example sparse data
# X = np.random.rand(100, 2)
X_sparse = csr_matrix(ratings_matrix)

n_clusters = 50
# Apply Spectral Clustering
spectral = SpectralClustering(n_clusters=n_clusters, affinity='nearest_neighbors')
labels = spectral.fit_predict(X_sparse)

print(labels)
print(list(set(labels)))
print(labels.shape)

In [None]:
# idx_in_group = np.arange(len(labels))
# filtered_users = [idx_in_group for idx_in_group, label in zip(idx_in_group, labels) if label == 0]
# print(filtered_users)

group_averages = []

for group in range(n_clusters):
    # Find indices of users in the current group
    group_users = np.where(labels == group)[0]
    
    # Extract the rows for users in this group
    group_data = ratings_matrix[group_users]
    
    print("Number of perople in group = ", group_data.shape[0])
    
    pred_ratings_list = np.array([])
    for item_id in range(num_titles):
        data = group_data[:,item_id]
        predicted_rating = np.mean(data[np.nonzero(data)])
#                 rankings = np.sum(neighbor_ratings[np.nonzero(neighbor_ratings)])

        pred_ratings_list = np.append(pred_ratings_list, predicted_rating)
    # Compute the average for each column (item) for this group
#     group_avg = np.mean(group_data[np.nonzero(group_data)], axis=0)
    
    # Append the average for this group
    group_averages.append(pred_ratings_list)

# Convert the list of group averages to a numpy array for easy viewing
group_averages = np.array(group_averages)

# Display the average for each item in each group
print("Average preferences for each item by group:")
print(group_averages.shape)

In [None]:
group = labels[0]
print("my group = ", group)
sorted_indices = np.argsort(group_averages[group])[::-1]
print(sorted_indices)
for i in sorted_indices:
    if (ratings_matrix[0, i] > 0) or (np.isnan(group_averages[group, i])):
        pass
    else:
        print(titles[i], round(group_averages[group,i], 1))