In [1]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!ls /content/drive/MyDrive/ProjectFiles/book_id_map.csv

/content/drive/MyDrive/ProjectFiles/book_id_map.csv


In [3]:
liked_books = ["1392526", "8036918", "586666", "43326", "126381", "3016125"]



In [4]:
!head /content/drive/MyDrive/ProjectFiles/book_id_map.csv

book_id_csv,book_id
0,34684622
1,34536488
2,34017076
3,71730
4,30422361
5,33503613
6,33517540
7,34467031
8,6383669


In [5]:
csv_book_mapping = {}
with open("/content/drive/MyDrive/ProjectFiles/book_id_map.csv", "r") as f:
    while True:
          line = f.readline()
          if not line:
              break
          csv_id, book_id = line.strip().split(",")
          csv_book_mapping[csv_id] = book_id

In [6]:
len(csv_book_mapping)

2360651

In [7]:
!wc -l /content/drive/MyDrive/ProjectFiles/goodreads_interactions.csv

228648343 /content/drive/MyDrive/ProjectFiles/goodreads_interactions.csv


In [8]:
!ls -lh | grep /content/drive/MyDrive/ProjectFiles/goodreads_interactions

In [9]:
!head /content/drive/MyDrive/ProjectFiles/goodreads_interactions.csv

user_id,book_id,is_read,rating,is_reviewed
0,948,1,5,0
0,947,1,5,1
0,946,1,5,0
0,945,1,5,0
0,944,1,5,0
0,943,1,5,0
0,942,1,5,0
0,941,1,5,0
0,940,1,5,0


In [10]:
overlap_users = set()

with open("/content/drive/MyDrive/ProjectFiles/goodreads_interactions.csv", "r") as f:
  while True:
        line = f.readline()
        if not line:
            break
        user_id, csv_id, _, rating, _ = line.split(",")

        if user_id in overlap_users:
            continue

        try:
            rating = int(rating)
        except ValueError:
            continue

        book_id = csv_book_mapping[csv_id]

        if book_id in liked_books and rating >= 4:
                overlap_users.add(user_id)

In [11]:
rec_lines = []
with open("/content/drive/MyDrive/ProjectFiles/goodreads_interactions.csv", "r") as f:
  while True:
        line = f.readline()
        if not line:
            break
        user_id, csv_id, _, rating, _ = line.split(",")

        if user_id in overlap_users:
            book_id = csv_book_mapping[csv_id]
            rec_lines.append([user_id, book_id, rating])

In [12]:
len(overlap_users)

6494

In [13]:
len(rec_lines)

3445473

In [14]:
import pandas as pd

recs = pd.DataFrame(rec_lines, columns=["user_id", "book_id", "rating"])
recs["book_id"] = recs["book_id"].astype(str)

In [15]:
top_recs = recs["book_id"].value_counts().head(10)
top_recs = top_recs.index.values

In [16]:
books_titles = pd.read_json("/content/drive/MyDrive/ProjectFiles/book_titles.json")
books_titles["book_id"] = books_titles["book_id"].astype(str)

In [17]:
books_titles.head()

Unnamed: 0,book_id,title,ratings,url,image_url,mod_title
0,7327624,"The Unschooled Wizard (Sun Wolf and Starhawk, ...",140,https://www.goodreads.com/book/show/7327624-th...,https://images.gr-assets.com/books/1304100136m...,the unschooled wizard sun wolf and starhawk 12
1,6066819,Best Friends Forever,51184,https://www.goodreads.com/book/show/6066819-be...,https://s.gr-assets.com/assets/nophoto/book/11...,best friends forever
2,287141,The Aeneid for Boys and Girls,46,https://www.goodreads.com/book/show/287141.The...,https://s.gr-assets.com/assets/nophoto/book/11...,the aeneid for boys and girls
3,6066812,All's Fairy in Love and War (Avalon: Web of Ma...,98,https://www.goodreads.com/book/show/6066812-al...,https://images.gr-assets.com/books/1316637798m...,alls fairy in love and war avalon web of magic 8
4,287149,The Devil's Notebook,986,https://www.goodreads.com/book/show/287149.The...,https://images.gr-assets.com/books/1328768789m...,the devils notebook


In [18]:
books_titles[books_titles["book_id"].isin(top_recs)]

Unnamed: 0,book_id,title,ratings,url,image_url,mod_title
53027,77203,The Kite Runner,1848782,https://www.goodreads.com/book/show/77203.The_...,https://images.gr-assets.com/books/1484565687m...,the kite runner
159867,126381,Purple Hibiscus,31133,https://www.goodreads.com/book/show/126381.Pur...,https://images.gr-assets.com/books/1329431038m...,purple hibiscus
284473,2767052,"The Hunger Games (The Hunger Games, #1)",4899965,https://www.goodreads.com/book/show/2767052-th...,https://images.gr-assets.com/books/1447303603m...,the hunger games the hunger games 1
463463,4671,The Great Gatsby,2758812,https://www.goodreads.com/book/show/4671.The_G...,https://images.gr-assets.com/books/1490528560m...,the great gatsby
569831,5,Harry Potter and the Prisoner of Azkaban (Harr...,1876252,https://www.goodreads.com/book/show/5.Harry_Po...,https://images.gr-assets.com/books/1499277281m...,harry potter and the prisoner of azkaban harry...
790927,2657,To Kill a Mockingbird,3255518,https://www.goodreads.com/book/show/2657.To_Ki...,https://images.gr-assets.com/books/1361975680m...,to kill a mockingbird
876816,865,The Alchemist,1342863,https://www.goodreads.com/book/show/865.The_Al...,https://images.gr-assets.com/books/1483412266m...,the alchemist
878545,3,Harry Potter and the Sorcerer's Stone (Harry P...,4765497,https://www.goodreads.com/book/show/3.Harry_Po...,https://images.gr-assets.com/books/1474154022m...,harry potter and the sorcerers stone harry pot...
929665,43326,Tell Me Your Dreams,35022,https://www.goodreads.com/book/show/43326.Tell...,https://s.gr-assets.com/assets/nophoto/book/11...,tell me your dreams
1223254,968,"The Da Vinci Code (Robert Langdon, #2)",1465770,https://www.goodreads.com/book/show/968.The_Da...,https://images.gr-assets.com/books/1303252999m...,the da vinci code robert langdon 2


In [19]:
all_recs = recs["book_id"].value_counts()

In [20]:
all_recs = all_recs.reset_index()
all_recs.columns = ["book_id", "book_count"]

In [21]:
all_recs

Unnamed: 0,book_id,book_count
0,2657,3375
1,126381,3307
2,3,3251
3,77203,3155
4,43326,2887
...,...,...
610391,111084,1
610392,22725371,1
610393,3694757,1
610394,2154943,1


In [22]:
all_recs = all_recs.merge(books_titles, how="inner", on="book_id")

In [23]:
all_recs

Unnamed: 0,book_id,book_count,title,ratings,url,image_url,mod_title
0,2657,3375,To Kill a Mockingbird,3255518,https://www.goodreads.com/book/show/2657.To_Ki...,https://images.gr-assets.com/books/1361975680m...,to kill a mockingbird
1,126381,3307,Purple Hibiscus,31133,https://www.goodreads.com/book/show/126381.Pur...,https://images.gr-assets.com/books/1329431038m...,purple hibiscus
2,3,3251,Harry Potter and the Sorcerer's Stone (Harry P...,4765497,https://www.goodreads.com/book/show/3.Harry_Po...,https://images.gr-assets.com/books/1474154022m...,harry potter and the sorcerers stone harry pot...
3,77203,3155,The Kite Runner,1848782,https://www.goodreads.com/book/show/77203.The_...,https://images.gr-assets.com/books/1484565687m...,the kite runner
4,43326,2887,Tell Me Your Dreams,35022,https://www.goodreads.com/book/show/43326.Tell...,https://s.gr-assets.com/assets/nophoto/book/11...,tell me your dreams
...,...,...,...,...,...,...,...
530314,832173,1,"Red Mars (Mars Trilogy, #1)",92,https://www.goodreads.com/book/show/832173.Red...,https://images.gr-assets.com/books/1405542875m...,red mars mars trilogy 1
530315,111084,1,Golden States,92,https://www.goodreads.com/book/show/111084.Gol...,https://images.gr-assets.com/books/1263075550m...,golden states
530316,22725371,1,"A Grand Design (Quilts of Love, #20)",69,https://www.goodreads.com/book/show/22725371-a...,https://images.gr-assets.com/books/1405378144m...,a grand design quilts of love 20
530317,3694757,1,The Park is Mine,26,https://www.goodreads.com/book/show/3694757-th...,https://s.gr-assets.com/assets/nophoto/book/11...,the park is mine


In [24]:
all_recs["score"] = all_recs["book_count"] * (all_recs["book_count"] / all_recs["ratings"])

In [25]:
all_recs.sort_values("score", ascending=False).head(10)

Unnamed: 0,book_id,book_count,title,ratings,url,image_url,mod_title,score
1,126381,3307,Purple Hibiscus,31133,https://www.goodreads.com/book/show/126381.Pur...,https://images.gr-assets.com/books/1329431038m...,purple hibiscus,351.275142
4,43326,2887,Tell Me Your Dreams,35022,https://www.goodreads.com/book/show/43326.Tell...,https://s.gr-assets.com/assets/nophoto/book/11...,tell me your dreams,237.986666
123,5587960,1072,The Thing Around Your Neck,12501,https://www.goodreads.com/book/show/5587960-th...,https://images.gr-assets.com/books/1320413162m...,the thing around your neck,91.927366
71,43325,1351,Nothing Lasts Forever,23681,https://www.goodreads.com/book/show/43325.Noth...,https://images.gr-assets.com/books/1344265620m...,nothing lasts forever,77.07449
89,43324,1272,Are You Afraid of the Dark?,21780,https://www.goodreads.com/book/show/43324.Are_...,https://s.gr-assets.com/assets/nophoto/book/11...,are you afraid of the dark,74.287603
128,43327,1043,The Sky is Falling,15564,https://www.goodreads.com/book/show/43327.The_...,https://images.gr-assets.com/books/1353302273m...,the sky is falling,69.895207
83,300225,1297,Bloodline,24080,https://www.goodreads.com/book/show/300225.Blo...,https://images.gr-assets.com/books/1298570618m...,bloodline,69.859178
117,99610,1098,The Best Laid Plans,17434,https://www.goodreads.com/book/show/99610.The_...,https://images.gr-assets.com/books/1353374848m...,the best laid plans,69.152461
37,18749,1828,Half of a Yellow Sun,48752,https://www.goodreads.com/book/show/18749.Half...,https://images.gr-assets.com/books/1327934717m...,half of a yellow sun,68.542501
115,119382,1110,The Sands of Time,18728,https://www.goodreads.com/book/show/119382.The...,https://images.gr-assets.com/books/1356453253m...,the sands of time,65.789193


In [26]:
popular_recs = all_recs[all_recs["book_count"] > 75].sort_values("score", ascending=False)

In [27]:
def make_clickable(val):
    return '<a target="_blank" href="{}">Goodreads</a>'.format(val, val)

def show_image(val):
    return '<img src="{}" width=50></img>'.format(val)

popular_recs[~popular_recs["book_id"].isin(liked_books)].head(10).style.format({'url': make_clickable, 'image_url': show_image})

Unnamed: 0,book_id,book_count,title,ratings,url,image_url,mod_title,score
123,5587960,1072,The Thing Around Your Neck,12501,Goodreads,,the thing around your neck,91.927366
71,43325,1351,Nothing Lasts Forever,23681,Goodreads,,nothing lasts forever,77.07449
89,43324,1272,Are You Afraid of the Dark?,21780,Goodreads,,are you afraid of the dark,74.287603
128,43327,1043,The Sky is Falling,15564,Goodreads,,the sky is falling,69.895207
83,300225,1297,Bloodline,24080,Goodreads,,bloodline,69.859178
117,99610,1098,The Best Laid Plans,17434,Goodreads,,the best laid plans,69.152461
37,18749,1828,Half of a Yellow Sun,48752,Goodreads,,half of a yellow sun,68.542501
115,119382,1110,The Sands of Time,18728,Goodreads,,the sands of time,65.789193
43,14554,1700,"If Tomorrow Comes (Tracy Whitney Series, #1)",45465,Goodreads,,if tomorrow comes tracy whitney series 1,63.56538
79,43328,1322,Rage of Angels,27603,Goodreads,,rage of angels,63.315002




*   Created a list of books i like
*   Used the list to see who liked the same books as i did and what books they liked.


*   Ranked those books and came up with recommendations based off of that
*   Next steps... how to improve the quality of the recommendations by using collaborative filtering



