# Load Books and Model Data

In [1]:
import pandas as pd

In [2]:
books = pd.read_csv('books.csv')

In [4]:
books.head()

Unnamed: 0,book_id,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,ratings_count,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url
0,1,2767052,2767052,2792775,272,439023483,9780439000000.0,Suzanne Collins,2008.0,The Hunger Games,...,4780653,4942365,155254,66715,127936,560092,1481305,2706317,https://images.gr-assets.com/books/1447303603m...,https://images.gr-assets.com/books/1447303603s...
1,2,3,3,4640799,491,439554934,9780440000000.0,"J.K. Rowling, Mary GrandPré",1997.0,Harry Potter and the Philosopher's Stone,...,4602479,4800065,75867,75504,101676,455024,1156318,3011543,https://images.gr-assets.com/books/1474154022m...,https://images.gr-assets.com/books/1474154022s...
2,3,41865,41865,3212258,226,316015849,9780316000000.0,Stephenie Meyer,2005.0,Twilight,...,3866839,3916824,95009,456191,436802,793319,875073,1355439,https://images.gr-assets.com/books/1361039443m...,https://images.gr-assets.com/books/1361039443s...
3,4,2657,2657,3275794,487,61120081,9780061000000.0,Harper Lee,1960.0,To Kill a Mockingbird,...,3198671,3340896,72586,60427,117415,446835,1001952,1714267,https://images.gr-assets.com/books/1361975680m...,https://images.gr-assets.com/books/1361975680s...
4,5,4671,4671,245494,1356,743273567,9780743000000.0,F. Scott Fitzgerald,1925.0,The Great Gatsby,...,2683664,2773745,51992,86236,197621,606158,936012,947718,https://images.gr-assets.com/books/1490528560m...,https://images.gr-assets.com/books/1490528560s...


In [6]:
import numpy as np

data = np.load('CF_data.npz')
book_index = data['book_index']
user_index = data['user_index'] 
item_similarity = data['item_similarity']

# Build Dictionary for Book

In [24]:
books_valid = books.iloc[book_index]

In [29]:
id_to_title = {}
for index, row in books_valid.iterrows():
    id_to_title[row['book_id']] = row['original_title']

In [43]:
title_to_id = {v: k for k, v in id_to_title.items()}

# Build Dictionary for ID to Numpy ID

In [27]:
id_to_numpy = {}
count = 0
for index, row in books_valid.iterrows():
    id_to_numpy[row['book_id']] = count
    count += 1

In [42]:
numpy_to_id = {v: k for k, v in id_to_numpy.items()}

# Find Top K

In [44]:
def title_to_numpy(title):
    return id_to_numpy[title_to_id[title]]

def numpy_to_title(numpy):
    return id_to_title[numpy_to_id[numpy]]

In [54]:
def top_k(title, k=10):
    indice = np.argpartition(item_similarity[title_to_numpy(title)], -k)[-k:]
    result = [numpy_to_title(b) for b in indice]
    return result

In the following sections I test some books that I have read before.

## Popular Series

In [55]:
top_k("Harry Potter and the Philosopher's Stone")

['Pride and Prejudice',
 'The Da Vinci Code',
 'The Hunger Games',
 'Harry Potter and the Deathly Hallows',
 "Harry Potter and the Philosopher's Stone",
 'Harry Potter and the Goblet of Fire',
 'Harry Potter and the Half-Blood Prince',
 'Harry Potter and the Chamber of Secrets',
 'Harry Potter and the Order of the Phoenix',
 'Harry Potter and the Prisoner of Azkaban']

It is great that the all sequels of `Harry Potter and the Philosopher's Stone` are shown here.

In [72]:
top_k("The Hunger Games")

['Harry Potter and the Half-Blood Prince',
 'Angels & Demons ',
 'The Help',
 'Twilight',
 "Harry Potter and the Philosopher's Stone",
 'Harry Potter and the Prisoner of Azkaban',
 'Divergent',
 'Mockingjay',
 'Catching Fire',
 'The Hunger Games']

The sequels of `The Hunger Games` are all shown. `Divergent` is very similar.

In [73]:
top_k("The Hobbit")

["One Flew Over the Cuckoo's Nest",
 'The Hobbit and The Lord of the Rings',
 'The Lion, the Witch and the Wardrobe',
 'Green Eggs and Ham',
 'The Two Towers',
 'The Return of the King',
 ' The Fellowship of the Ring',
 'The Hobbit or There and Back Again',
 'The Hobbit',
 'The Lord of the Rings']

## Classics

In [71]:
top_k("Wuthering Heights")

['Of Mice and Men ',
 'Little Women',
 'Pride and Prejudice',
 'Emma',
 'Great Expectations',
 'Sense and Sensibility',
 'Jane Eyre',
 'An Excellent conceited Tragedie of Romeo and Juliet',
 'Wuthering Heights',
 'The Scarlet Letter']

`Wuthering Heights` is very different from `Pride and Prejudice` and `Jane Eyre` in my opinion.

In [57]:
top_k("The Great Gatsby")

['The Adventures of Huckleberry Finn',
 'The Sun Also Rises',
 'The Catcher in the Rye',
 'Animal Farm: A Fairy Story',
 'Lord of the Flies ',
 'An Excellent conceited Tragedie of Romeo and Juliet',
 'The Great Gatsby',
 'To Kill a Mockingbird',
 'Of Mice and Men ',
 'Nineteen Eighty-Four']

## Children's Novel

In [76]:
top_k("The Secret Garden")

['Pride and Prejudice',
 'The Adventures of Tom Sawyer',
 'Het Achterhuis: Dagboekbrieven 14 juni 1942 - 1 augustus 1944',
 'The Lion, the Witch and the Wardrobe',
 'A Little Princess',
 'A Christmas Carol',
 'Little Women',
 'Anne of Green Gables',
 'The Secret Garden',
 "Charlotte's Web"]

Most of them are similar to `The Secret Garden`. It is strange for `Pride and Prejudice` to be here lol 

`Het Achterhuis: Dagboekbrieven 14 juni 1942 - 1 augustus 1944` is a sad one. I don't think it is appropriate for children.

## Animal Novel

In [61]:
top_k("Black Beauty")

['The Call of the Wild',
 'Aesopica',
 'Notre-Dame de Paris',
 "Charlotte's Web",
 'Treasure Island',
 'Der Schweizerische Robinson',
 'The Secret Garden',
 'Heidi',
 'Black Beauty',
 'White Fang']

## Transdisciplinary Non-fiction

In [62]:
top_k("Guns, Germs, and Steel: The Fates of Human Societies")

['Outliers: The Story of Success',
 'The Tragedy of Macbeth',
 'Stiff: The Curious Lives of Human Cadavers',
 'The Tipping Point: How Little Things Can Make a Big Difference',
 'The 7 Habits of Highly Effective People',
 'Freakonomics: A Rogue Economist Explores the Hidden Side of Everything',
 "A People's History of the United States: 1492 to Present ",
 'Guns, Germs, and Steel: The Fates of Human Societies',
 'A Short History of Nearly Everything',
 'Blink: The Power of Thinking Without Thinking']

## Science Fiction

In [69]:
top_k("Ender's Game")

['The Tipping Point: How Little Things Can Make a Big Difference',
 'Into Thin Air: A Personal Account of the Mt. Everest Disaster',
 "Ender's Game",
 'Foundation',
 'Neuromancer',
 "The Hitchhiker's Guide to the Galaxy",
 'Brave New World',
 'Snow Crash',
 'Stranger in a Strange Land',
 'Dune']