In [26]:
liked_books = ["581526", "20562717", "944652"]

In [27]:
!head book_id_map.csv

book_id_csv,book_id
0,34684622
1,34536488
2,34017076
3,71730
4,30422361
5,33503613
6,33517540
7,34467031
8,6383669


In [28]:
# Create a dictionary, splitting the file at the comma into csv_id and book_id
csv_book_mapping = {}

with open("book_id_map.csv", "r") as f:
    while True:
        line =f.readline()
        if not line:
            break
        csv_id, book_id = line.strip().split(",") # .strip() removes any new line characters
        csv_book_mapping[csv_id] = book_id

In [29]:
len(csv_book_mapping)

2360651

In [30]:
# First, find all of the users who like the same books
# Populate a set of users using the streaming fashion to reduce memory usage
overlap_users = set()

with open("goodreads_interactions.csv", "r") as f:
    while True:
        line = f.readline()
        if not line:
            break
        user_id, csv_id, _, rating, _ = line.split(",") # underscore means we don't care about the field, this will give us user_id, csv_id, and rating
        
        if user_id in overlap_users:
            continue
        
        try:
            rating = int(rating)
        except ValueError:
            continue
            
        book_id = csv_book_mapping[csv_id] # turn the csv_id into a book_id
        
        if book_id in liked_books and rating >= 4: # if the book in the row is one we like and the rating is 4 or 5, add that user to overlap users
            overlap_users.add(user_id) # this set will contain any user who read the same book and rated it highly

In [31]:
# Finding the books that those users read
# Create a list with recommended lines
rec_lines = []

with open("goodreads_interactions.csv", "r") as f:
    while True:
        line = f.readline()
        if not line:
            break
        user_id, csv_id, _, rating, _ = line.split(",")
        
        if user_id in overlap_users:
            book_id = csv_book_mapping[csv_id]
            rec_lines.append([user_id, book_id, rating]) # Contains books that users who like the same books as us have read

In [32]:
import pandas as pd

# Create a DataFrame with the potential recommendations and pass in column names
recs = pd.DataFrame(rec_lines, columns=["user_id", "book_id", "rating"])

# Ensure book_id is a string
recs["book_id"] = recs["book_id"].astype(str)

In [37]:
recs

Unnamed: 0,user_id,book_id,rating
0,286,48855,5
1,286,9418327,0
2,286,47281,3
3,286,114345,2
4,286,375013,5
...,...,...,...
374096,873461,20562717,4
374097,873461,9531737,4
374098,873461,20150777,5
374099,873461,8167044,5


In [38]:
# See which book_id occurs most in the df
top_recs = recs["book_id"].value_counts().head(10)

# Get the values of the index rather than the keys as book_ids is a dictionary with keys and values
top_recs = top_recs.index.values

In [39]:
# Look at the books_titles file we created in the search.ipynb file
books_titles = pd.read_json("books_titles.json")

# Ensure book_id is a string
books_titles["book_id"] = books_titles["book_id"].astype(str)

In [40]:
books_titles.head()

Unnamed: 0,book_id,title,ratings,url,cover_image,mod_title
0,1333909,Good Harbor,10,https://www.goodreads.com/book/show/1333909.Go...,https://s.gr-assets.com/assets/nophoto/book/11...,good harbor
1,7327624,"The Unschooled Wizard (Sun Wolf and Starhawk, ...",140,https://www.goodreads.com/book/show/7327624-th...,https://images.gr-assets.com/books/1304100136m...,the unschooled wizard sun wolf and starhawk 12
2,6066819,Best Friends Forever,51184,https://www.goodreads.com/book/show/6066819-be...,https://s.gr-assets.com/assets/nophoto/book/11...,best friends forever
3,287140,Runic Astrology: Starcraft and Timekeeping in ...,15,https://www.goodreads.com/book/show/287140.Run...,https://images.gr-assets.com/books/1413219371m...,runic astrology starcraft and timekeeping in t...
4,287141,The Aeneid for Boys and Girls,46,https://www.goodreads.com/book/show/287141.The...,https://s.gr-assets.com/assets/nophoto/book/11...,the aeneid for boys and girls


In [41]:
# Use the isin method to find the book titles where the book_id is in the top recommendations
books_titles[books_titles["book_id"].isin(top_recs)]

Unnamed: 0,book_id,title,ratings,url,cover_image,mod_title
181196,20562717,The Name of the Wind (The Kingkiller Chronicle...,15824,https://www.goodreads.com/book/show/20562717-t...,https://images.gr-assets.com/books/1470701050m...,the name of the wind the kingkiller chronicle 1
203480,9531737,"The Wise Man's Fear (The Kingkiller Chronicle,...",17766,https://www.goodreads.com/book/show/9531737-th...,https://images.gr-assets.com/books/1327957823m...,the wise mans fear the kingkiller chronicle 2
386663,2767052,"The Hunger Games (The Hunger Games, #1)",4899965,https://www.goodreads.com/book/show/2767052-th...,https://images.gr-assets.com/books/1447303603m...,the hunger games the hunger games 1
608482,5907,The Hobbit,2099680,https://www.goodreads.com/book/show/5907.The_H...,https://images.gr-assets.com/books/1372847500m...,the hobbit
838525,5470,1984,2023937,https://www.goodreads.com/book/show/5470.1984,https://images.gr-assets.com/books/1348990566m...,1984
1077226,2657,To Kill a Mockingbird,3255518,https://www.goodreads.com/book/show/2657.To_Ki...,https://images.gr-assets.com/books/1361975680m...,to kill a mockingbird
1196415,3,Harry Potter and the Sorcerer's Stone (Harry P...,4765497,https://www.goodreads.com/book/show/3.Harry_Po...,https://images.gr-assets.com/books/1474154022m...,harry potter and the sorcerers stone harry pot...
1229158,136251,Harry Potter and the Deathly Hallows (Harry Po...,1784684,https://www.goodreads.com/book/show/136251.Har...,https://images.gr-assets.com/books/1474171184m...,harry potter and the deathly hallows harry pot...
1316662,13496,"A Game of Thrones (A Song of Ice and Fire, #1)",1359501,https://www.goodreads.com/book/show/13496.A_Ga...,https://images.gr-assets.com/books/1436732693m...,a game of thrones a song of ice and fire 1
1354988,15881,Harry Potter and the Chamber of Secrets (Harry...,1821802,https://www.goodreads.com/book/show/15881.Harr...,https://images.gr-assets.com/books/1474169725m...,harry potter and the chamber of secrets harry ...


In [42]:
# Get value_counts() with how many times each book_id appears in the set
all_recs = recs["book_id"].value_counts()

In [43]:
# Convert to a df from a series
all_recs = all_recs.to_frame().reset_index()

# Rename columns
all_recs.columns = ["book_id", "book_count"]

In [44]:
all_recs.head()

Unnamed: 0,book_id,book_count
0,20562717,787
1,3,460
2,9531737,442
3,2767052,401
4,5907,373


In [46]:
# Merge all_recs with books_titles using inner meaning if the data doesn't exist in both, get rid of the row
all_recs = all_recs.merge(books_titles, how="inner", on="book_id")

In [None]:
# Create a score such that if a book is popular in our set but less popular in general on goodreads, it'll be more highly scored
all_recs["score"] = all_recs["book_count"] * (all_recs["book_count"] / all_recs["ratings"])

In [48]:
# Sort the top ten recommendations based upon score
all_recs.sort_values("score", ascending=False).head(10)

Unnamed: 0,book_id,book_count,title,ratings,url,cover_image,mod_title,score
0,20562717,787,The Name of the Wind (The Kingkiller Chronicle...,15824,https://www.goodreads.com/book/show/20562717-t...,https://images.gr-assets.com/books/1470701050m...,the name of the wind the kingkiller chronicle 1,39.141115
69,21032488,154,"Doors of Stone (The Kingkiller Chronicle, #3)",2059,https://www.goodreads.com/book/show/21032488-d...,https://s.gr-assets.com/assets/nophoto/book/11...,doors of stone the kingkiller chronicle 3,11.518213
2,9531737,442,"The Wise Man's Fear (The Kingkiller Chronicle,...",17766,https://www.goodreads.com/book/show/9531737-th...,https://images.gr-assets.com/books/1327957823m...,the wise mans fear the kingkiller chronicle 2,10.99651
2506,18243345,18,"Nightblood (Warbreaker, #2)",66,https://www.goodreads.com/book/show/18243345-n...,https://s.gr-assets.com/assets/nophoto/book/11...,nightblood warbreaker 2,4.909091
1041,29217027,34,"Iron Gold (Red Rising Saga, #4)",256,https://www.goodreads.com/book/show/29217027-i...,https://images.gr-assets.com/books/1482474951m...,iron gold red rising saga 4,4.515625
4294,15999003,12,"Inherit the Night (Gentleman Bastard, #7)",36,https://www.goodreads.com/book/show/15999003-i...,https://s.gr-assets.com/assets/nophoto/book/11...,inherit the night gentleman bastard 7,4.0
5222,26860699,10,"The Olympian Affair (The Cinder Spires, #2)",27,https://www.goodreads.com/book/show/26860699-t...,https://s.gr-assets.com/assets/nophoto/book/11...,the olympian affair the cinder spires 2,3.703704
1191,23947089,31,"The Lost Metal (Mistborn, #7)",305,https://www.goodreads.com/book/show/23947089-t...,https://s.gr-assets.com/assets/nophoto/book/11...,the lost metal mistborn 7,3.15082
477,17250961,59,"Oathbringer (The Stormlight Archive, #3)",1121,https://www.goodreads.com/book/show/17250961-o...,https://images.gr-assets.com/books/1500062685m...,oathbringer the stormlight archive 3,3.105263
861,581526,39,In My Own Way: An Autobiography,508,https://www.goodreads.com/book/show/581526.In_...,https://s.gr-assets.com/assets/nophoto/book/11...,in my own way an autobiography,2.994094


In [49]:
# Filter all recommendations for a book count greater than 200
all_recs[all_recs["book_count"] > 200].sort_values("score", ascending=False).head()

Unnamed: 0,book_id,book_count,title,ratings,url,cover_image,mod_title,score
0,20562717,787,The Name of the Wind (The Kingkiller Chronicle...,15824,https://www.goodreads.com/book/show/20562717-t...,https://images.gr-assets.com/books/1470701050m...,the name of the wind the kingkiller chronicle 1,39.141115
2,9531737,442,"The Wise Man's Fear (The Kingkiller Chronicle,...",17766,https://www.goodreads.com/book/show/9531737-th...,https://images.gr-assets.com/books/1327957823m...,the wise mans fear the kingkiller chronicle 2,10.99651
35,68428,213,"The Final Empire (Mistborn, #1)",216149,https://www.goodreads.com/book/show/68428.The_...,https://images.gr-assets.com/books/1480717416m...,the final empire mistborn 1,0.209897
27,9969571,233,Ready Player One,376328,https://www.goodreads.com/book/show/9969571-re...,https://images.gr-assets.com/books/1500930947m...,ready player one,0.14426
31,4407,223,"American Gods (American Gods, #1)",379668,https://www.goodreads.com/book/show/4407.Ameri...,https://images.gr-assets.com/books/1258417001m...,american gods american gods 1,0.13098


In [52]:
# Only take the recommendations where book_count is greater than 200
popular_recs = all_recs[all_recs["book_count"] > 200].sort_values("score", ascending=False)

In [59]:
def make_clickable(val):
    return '<a target="_blank" href="{}">Goodreads</a>'.format(val, val)

def show_image(val):
    return '<a href="{}"><img src="{}" width=50></img></a>.'.format(val, val)

# Filter out popular books where book_id isin liked_books
popular_recs[~popular_recs["book_id"].isin(liked_books)].head(10).style.format({'url': make_clickable, 'cover_image': show_image})

Unnamed: 0,book_id,book_count,title,ratings,url,cover_image,mod_title,score
2,9531737,442,"The Wise Man's Fear (The Kingkiller Chronicle, #2)",17766,Goodreads,.,the wise mans fear the kingkiller chronicle 2,10.99651
35,68428,213,"The Final Empire (Mistborn, #1)",216149,Goodreads,.,the final empire mistborn 1,0.209897
27,9969571,233,Ready Player One,376328,Goodreads,.,ready player one,0.14426
31,4407,223,"American Gods (American Gods, #1)",379668,Goodreads,.,american gods american gods 1,0.13098
36,10664113,210,"A Dance with Dragons (A Song of Ice and Fire, #5)",374315,Goodreads,.,a dance with dragons a song of ice and fire 5,0.117815
14,375802,311,"Ender's Game (Ender's Saga, #1)",829380,Goodreads,.,enders game enders saga 1,0.116618
23,234225,238,Dune (Dune Chronicles #1),494553,Goodreads,.,dune dune chronicles 1,0.114536
34,13497,213,"A Feast for Crows (A Song of Ice and Fire, #4)",437398,Goodreads,.,a feast for crows a song of ice and fire 4,0.103725
32,62291,219,"A Storm of Swords (A Song of Ice and Fire, #3)",477834,Goodreads,.,a storm of swords a song of ice and fire 3,0.100372
29,10572,230,"A Clash of Kings (A Song of Ice and Fire, #2)",534960,Goodreads,.,a clash of kings a song of ice and fire 2,0.098886
