In [15]:
liked_books = ["25545994", "8835305", "83880", "29495703", "32737635"]

In [16]:
!head book_id_map.csv

book_id_csv,book_id
0,34684622
1,34536488
2,34017076
3,71730
4,30422361
5,33503613
6,33517540
7,34467031
8,6383669


In [27]:
# Making a dictionary with all the csv_id's, which are the key id's of our interaction file 
# and the values will be the id's of the file we just worked with 

csv_book_mapping = {}  # dictionary 
with open("book_id_map.csv", "r") as f:
    while True:
        line = f.readline()
        if not line:
            break
        csv_id, book_id = line.strip().split(",") # reads every line of the file, then splits the line at the comma en assigns the first part to csv_id and the second part(book_id) to book_id
        csv_book_mapping[csv_id] = book_id

In [28]:
len(csv_book_mapping)

2360651

In [29]:
!head goodreads_interactions.csv

user_id,book_id,is_read,rating,is_reviewed
0,948,1,5,0
0,947,1,5,1
0,946,1,5,0
0,945,1,5,0
0,944,1,5,0
0,943,1,5,0
0,942,1,5,0
0,941,1,5,0
0,940,1,5,0


# Finding users who liked the same books 

In [30]:
# Making a set with all the users that have read the same books as us and rated the book 4 or higher 

overlap_users = set() # list that can only contain unique values, no duplicates 

with open("goodreads_interactions.csv", 'r') as f:
    while True:
        line = f.readline()
        if not line:
            break
        user_id, csv_id, _, rating, _ = line.split(",")
        
        if user_id in overlap_users:
            continue

        try:
            rating = int(rating)
        except ValueError:
            continue
        
        book_id = csv_book_mapping[csv_id]
        
        if book_id in liked_books and rating >= 4:
                overlap_users.add(user_id)

# Finding what books those users liked 

In [31]:
rec_lines = []

with open("goodreads_interactions.csv", 'r') as f:
    while True:
        line = f.readline()
        if not line:
            break
        user_id, csv_id, _, rating, _ = line.split(",")
        
        if user_id in overlap_users:
            book_id = csv_book_mapping[csv_id]
            rec_lines.append([user_id, book_id, rating])

In [32]:
len(overlap_users)

355

In [33]:
len(rec_lines)

333106

In [34]:
# Turning our recommendations into a pd dataframe

import pandas as pd

recs = pd.DataFrame(rec_lines, columns=["user_id", "book_id", "rating"])
recs["book_id"] = recs["book_id"].astype(str)

In [35]:
recs

Unnamed: 0,user_id,book_id,rating
0,321,48855,5
1,321,2203,5
2,321,9418327,4
3,321,11230081,5
4,321,252577,4
...,...,...,...
333101,863653,27824826,5
333102,863653,23280208,4
333103,863653,29095428,4
333104,863653,15753740,5


# Finding the Top Recommendations

In [48]:
# Counting up how many tyimes each book occurs and showing the most commen ones (finding top recommendations)

top_recs = recs["book_id"].value_counts().head(10)

# Taking/showing the index 

top_recs = top_recs.index.values

In [49]:
books_titles = pd.read_json("books_titles.json")
books_titles["book_id"] = books_titles["book_id"].astype(str) # converting our titles into text rather then numerical values

In [50]:
books_titles.head()

Unnamed: 0,book_id,title,ratings,url,cover_image,mod_title
0,7327624,"The Unschooled Wizard (Sun Wolf and Starhawk, ...",140,https://www.goodreads.com/book/show/7327624-th...,https://images.gr-assets.com/books/1304100136m...,the unschooled wizard sun wolf and starhawk 12
1,6066819,Best Friends Forever,51184,https://www.goodreads.com/book/show/6066819-be...,https://s.gr-assets.com/assets/nophoto/book/11...,best friends forever
2,287141,The Aeneid for Boys and Girls,46,https://www.goodreads.com/book/show/287141.The...,https://s.gr-assets.com/assets/nophoto/book/11...,the aeneid for boys and girls
3,6066812,All's Fairy in Love and War (Avalon: Web of Ma...,98,https://www.goodreads.com/book/show/6066812-al...,https://images.gr-assets.com/books/1316637798m...,alls fairy in love and war avalon web of magic 8
4,287149,The Devil's Notebook,986,https://www.goodreads.com/book/show/287149.The...,https://images.gr-assets.com/books/1328768789m...,the devils notebook


In [51]:
# checking which books are in our top recommendations

books_titles[books_titles["book_id"].isin(top_recs)] 

Unnamed: 0,book_id,title,ratings,url,cover_image,mod_title
284473,2767052,"The Hunger Games (The Hunger Games, #1)",4899965,https://www.goodreads.com/book/show/2767052-th...,https://images.gr-assets.com/books/1447303603m...,the hunger games the hunger games 1
436303,32737635,The Most Dangerous Place on Earth,4063,https://www.goodreads.com/book/show/32737635-t...,https://images.gr-assets.com/books/1477347638m...,the most dangerous place on earth
463463,4671,The Great Gatsby,2758812,https://www.goodreads.com/book/show/4671.The_G...,https://images.gr-assets.com/books/1490528560m...,the great gatsby
482663,26893819,The Girls,84357,https://www.goodreads.com/book/show/26893819-t...,https://images.gr-assets.com/books/1492065338m...,the girls
790927,2657,To Kill a Mockingbird,3255518,https://www.goodreads.com/book/show/2657.To_Ki...,https://images.gr-assets.com/books/1361975680m...,to kill a mockingbird
878151,18143977,All the Light We Cannot See,498685,https://www.goodreads.com/book/show/18143977-a...,https://images.gr-assets.com/books/1451445646m...,all the light we cannot see
878545,3,Harry Potter and the Sorcerer's Stone (Harry P...,4765497,https://www.goodreads.com/book/show/3.Harry_Po...,https://images.gr-assets.com/books/1474154022m...,harry potter and the sorcerers stone harry pot...
973220,19486412,Big Little Lies,335079,https://www.goodreads.com/book/show/19486412-b...,https://images.gr-assets.com/books/1492239430m...,big little lies
1091642,22557272,The Girl on the Train,1076144,https://www.goodreads.com/book/show/22557272-t...,https://images.gr-assets.com/books/1490903702m...,the girl on the train
1095301,11870085,The Fault in Our Stars,2429317,https://www.goodreads.com/book/show/11870085-t...,https://images.gr-assets.com/books/1360206420m...,the fault in our stars


In [70]:
top_recs

array(['32737635', '22557272', '11870085', '2767052', '26893819',
       '19486412', '18143977', '4671', '3', '2657'], dtype=object)

In [53]:
# Checking how many times a specific book is recommended 

all_recs = recs["book_id"].value_counts()  

In [54]:
all_recs

book_id
32737635    332
22557272    248
11870085    218
2767052     211
26893819    206
           ... 
22859574      1
444734        1
23507708      1
25005110      1
27037166      1
Name: count, Length: 124934, dtype: int64

In [55]:
# Adding a frame with index to our output 

all_recs = all_recs.to_frame().reset_index()

In [56]:
all_recs

Unnamed: 0,book_id,count
0,32737635,332
1,22557272,248
2,11870085,218
3,2767052,211
4,26893819,206
...,...,...
124929,22859574,1
124930,444734,1
124931,23507708,1
124932,25005110,1


In [58]:
# Merging our recommendations and their counting values together with our dataframe containing the books information 

all_recs = all_recs.merge(books_titles, how="inner", on="book_id")

In [59]:
all_recs

Unnamed: 0,book_id,count,title,ratings,url,cover_image,mod_title
0,32737635,332,The Most Dangerous Place on Earth,4063,https://www.goodreads.com/book/show/32737635-t...,https://images.gr-assets.com/books/1477347638m...,the most dangerous place on earth
1,22557272,248,The Girl on the Train,1076144,https://www.goodreads.com/book/show/22557272-t...,https://images.gr-assets.com/books/1490903702m...,the girl on the train
2,11870085,218,The Fault in Our Stars,2429317,https://www.goodreads.com/book/show/11870085-t...,https://images.gr-assets.com/books/1360206420m...,the fault in our stars
3,2767052,211,"The Hunger Games (The Hunger Games, #1)",4899965,https://www.goodreads.com/book/show/2767052-th...,https://images.gr-assets.com/books/1447303603m...,the hunger games the hunger games 1
4,26893819,206,The Girls,84357,https://www.goodreads.com/book/show/26893819-t...,https://images.gr-assets.com/books/1492065338m...,the girls
...,...,...,...,...,...,...,...
116927,22859574,1,The Year My Mother Came Back,281,https://www.goodreads.com/book/show/22859574-t...,https://images.gr-assets.com/books/1423486499m...,the year my mother came back
116928,444734,1,"Later, at the Bar",869,https://www.goodreads.com/book/show/444734.Lat...,https://s.gr-assets.com/assets/nophoto/book/11...,later at the bar
116929,23507708,1,All Dogs Go to Kevin: Everything Three Dogs Ta...,578,https://www.goodreads.com/book/show/23507708-a...,https://images.gr-assets.com/books/1423683984m...,all dogs go to kevin everything three dogs tau...
116930,25005110,1,The Theory of Death (Peter Decker/Rina Lazarus...,59,https://www.goodreads.com/book/show/25005110-t...,https://images.gr-assets.com/books/1432580315m...,the theory of death peter deckerrina lazarus 23


In [61]:
# Creating a 'score' column 
# All the users who liked books that we also liked, how many of them also liked this book 

all_recs["score"] = all_recs["count"] * (all_recs["count"] / all_recs["ratings"])  # the more ratings a book has, the more populair that book is

In [63]:
# Sorting the scores of our recommendations

all_recs.sort_values("score", ascending=False).head(10)

Unnamed: 0,book_id,count,title,ratings,url,cover_image,mod_title,score
0,32737635,332,The Most Dangerous Place on Earth,4063,https://www.goodreads.com/book/show/32737635-t...,https://images.gr-assets.com/books/1477347638m...,the most dangerous place on earth,27.128723
6763,34051963,8,Here We Lie,18,https://www.goodreads.com/book/show/34051963-h...,https://images.gr-assets.com/books/1499097971m...,here we lie,3.555556
6549,29495703,8,"Public Secrets (Artificial Intelligence, #1)",21,https://www.goodreads.com/book/show/29495703-p...,https://s.gr-assets.com/assets/nophoto/book/11...,public secrets artificial intelligence 1,3.047619
7889,32333338,7,Save the Date,19,https://www.goodreads.com/book/show/32333338-s...,https://images.gr-assets.com/books/1510611507m...,save the date,2.578947
9345,35297420,6,She Regrets Nothing,17,https://www.goodreads.com/book/show/35297420-s...,https://images.gr-assets.com/books/1510482237m...,she regrets nothing,2.117647
3692,24909347,13,"Obsidio (The Illuminae Files, #3)",82,https://www.goodreads.com/book/show/24909347-o...,https://images.gr-assets.com/books/1501704611m...,obsidio the illuminae files 3,2.060976
326,32025142,62,White Fur,1973,https://www.goodreads.com/book/show/32025142-w...,https://images.gr-assets.com/books/1481903804m...,white fur,1.948302
124,30199414,100,Marlena,5324,https://www.goodreads.com/book/show/30199414-m...,https://images.gr-assets.com/books/1491343350m...,marlena,1.878287
6200,26856502,8,"Vengeful (Villains, #2)",35,https://www.goodreads.com/book/show/26856502-v...,https://s.gr-assets.com/assets/nophoto/book/11...,vengeful villains 2,1.828571
4398,35099035,11,Red Clocks,67,https://www.goodreads.com/book/show/35099035-r...,https://images.gr-assets.com/books/1494345016m...,red clocks,1.80597


In [67]:
all_recs[all_recs["count"] > 200].sort_values("score", ascending=False).head(10)

Unnamed: 0,book_id,count,title,ratings,url,cover_image,mod_title,score
0,32737635,332,The Most Dangerous Place on Earth,4063,https://www.goodreads.com/book/show/32737635-t...,https://images.gr-assets.com/books/1477347638m...,the most dangerous place on earth,27.128723
4,26893819,206,The Girls,84357,https://www.goodreads.com/book/show/26893819-t...,https://images.gr-assets.com/books/1492065338m...,the girls,0.503053
1,22557272,248,The Girl on the Train,1076144,https://www.goodreads.com/book/show/22557272-t...,https://images.gr-assets.com/books/1490903702m...,the girl on the train,0.057152
2,11870085,218,The Fault in Our Stars,2429317,https://www.goodreads.com/book/show/11870085-t...,https://images.gr-assets.com/books/1360206420m...,the fault in our stars,0.019563
3,2767052,211,"The Hunger Games (The Hunger Games, #1)",4899965,https://www.goodreads.com/book/show/2767052-th...,https://images.gr-assets.com/books/1447303603m...,the hunger games the hunger games 1,0.009086


In [68]:
# removing the books that have low recommendations 

popular_recs = all_recs[all_recs["count"] > 50].sort_values("score", ascending=False) # only books that are at least recommended 50 times

In [71]:
def make_clickable(val):
    return '<a target="_blank" href="{}">Goodreads</a>'.format(val, val)

def show_image(val):
    return '<a href="{}"><img src="{}" width=50></img></a>'.format(val, val)

# filtering out the books where the book_id is already in our book set - prevents us from seeing books that we have already read
popular_recs[~popular_recs["book_id"].isin(liked_books)].head(10).style.format({'url': make_clickable, 'cover_image': show_image})

Unnamed: 0,book_id,count,title,ratings,url,cover_image,mod_title,score
326,32025142,62,White Fur,1973,Goodreads,,white fur,1.948302
124,30199414,100,Marlena,5324,Goodreads,,marlena,1.878287
335,30256250,61,The Possessions,2482,Goodreads,,the possessions,1.499194
254,29229956,71,Perfect Little World,4113,Goodreads,,perfect little world,1.225626
125,29276588,98,Everything You Want Me to Be,7954,Goodreads,,everything you want me to be,1.207443
162,30971707,87,All Grown Up,6505,Goodreads,,all grown up,1.163566
204,30753987,78,The Leavers,5602,Goodreads,,the leavers,1.086041
285,30090925,67,The Animators,4691,Goodreads,,the animators,0.956939
386,32740062,57,Watch Me Disappear,3460,Goodreads,,watch me disappear,0.939017
267,33574211,70,Emma in the Night,5303,Goodreads,,emma in the night,0.924005
