# Load Books and Model Data

In [1]:
import pandas as pd

In [2]:
books = pd.read_csv('../books.csv')

In [3]:
books.head()

Unnamed: 0,book_id,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,ratings_count,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url
0,1,2767052,2767052,2792775,272,439023483,9780439000000.0,Suzanne Collins,2008.0,The Hunger Games,...,4780653,4942365,155254,66715,127936,560092,1481305,2706317,https://images.gr-assets.com/books/1447303603m...,https://images.gr-assets.com/books/1447303603s...
1,2,3,3,4640799,491,439554934,9780440000000.0,"J.K. Rowling, Mary GrandPré",1997.0,Harry Potter and the Philosopher's Stone,...,4602479,4800065,75867,75504,101676,455024,1156318,3011543,https://images.gr-assets.com/books/1474154022m...,https://images.gr-assets.com/books/1474154022s...
2,3,41865,41865,3212258,226,316015849,9780316000000.0,Stephenie Meyer,2005.0,Twilight,...,3866839,3916824,95009,456191,436802,793319,875073,1355439,https://images.gr-assets.com/books/1361039443m...,https://images.gr-assets.com/books/1361039443s...
3,4,2657,2657,3275794,487,61120081,9780061000000.0,Harper Lee,1960.0,To Kill a Mockingbird,...,3198671,3340896,72586,60427,117415,446835,1001952,1714267,https://images.gr-assets.com/books/1361975680m...,https://images.gr-assets.com/books/1361975680s...
4,5,4671,4671,245494,1356,743273567,9780743000000.0,F. Scott Fitzgerald,1925.0,The Great Gatsby,...,2683664,2773745,51992,86236,197621,606158,936012,947718,https://images.gr-assets.com/books/1490528560m...,https://images.gr-assets.com/books/1490528560s...


In [4]:
import numpy as np

data = np.load('CF_data.npz')
book_index = data['book_index']
user_index = data['user_index'] 
item_similarity = data['item_similarity']

# Build Dictionary for Book

In [5]:
books_valid = books.iloc[book_index]

In [6]:
id_to_title = {}
for index, row in books_valid.iterrows():
    id_to_title[row['book_id']] = row['original_title']

In [7]:
title_to_id = {v: k for k, v in id_to_title.items()}

# Build Dictionary for ID to Numpy ID

In [8]:
id_to_numpy = {}
count = 0
for index, row in books_valid.iterrows():
    id_to_numpy[row['book_id']] = count
    count += 1

In [9]:
numpy_to_id = {v: k for k, v in id_to_numpy.items()}

# Evaluate Similarity

In [10]:
def title_to_numpy(title):
    return id_to_numpy[title_to_id[title]]

def numpy_to_title(numpy):
    return id_to_title[numpy_to_id[numpy]]

In [11]:
def top_k(title, k=10):
    indice = np.argpartition(item_similarity[title_to_numpy(title)], -k)[-k:]
    result = [numpy_to_title(b) for b in indice]
    return result

In the following sections I test some books that I have read before.

## Popular Series

In [12]:
top_k("Harry Potter and the Philosopher's Stone")

['Complete Harry Potter Boxed Set',
 'The Hunger Games',
 'Harry Potter and the Goblet of Fire',
 'Harry Potter and the Prisoner of Azkaban',
 "Harry Potter and the Philosopher's Stone",
 'Harry Potter and the Half-Blood Prince',
 'Harry Potter and the Order of the Phoenix',
 'The Da Vinci Code',
 'Harry Potter and the Chamber of Secrets',
 'Harry Potter and the Deathly Hallows']

It is great that the all sequels of `Harry Potter and the Philosopher's Stone` are shown here.

In [13]:
top_k("The Hunger Games")

['Harry Potter and the Half-Blood Prince',
 'Harry Potter and the Prisoner of Azkaban',
 'The Help',
 'Angels & Demons ',
 'Divergent',
 "Harry Potter and the Philosopher's Stone",
 'Mockingjay',
 'Twilight',
 'Catching Fire',
 'The Hunger Games']

The sequels of `The Hunger Games` are all shown. `Divergent` is very similar.

In [14]:
top_k("The Hobbit")

["One Flew Over the Cuckoo's Nest",
 'The Hobbit and The Lord of the Rings',
 'The Lion, the Witch and the Wardrobe',
 'Green Eggs and Ham',
 'The Two Towers',
 'The Return of the King',
 'The Lord of the Rings',
 'The Hobbit',
 'The Hobbit or There and Back Again',
 ' The Fellowship of the Ring']

`The Lord of the Rings` is shown.

## Classics

In [15]:
top_k("Wuthering Heights")

['Of Mice and Men ',
 'Emma',
 'Little Women',
 'Great Expectations',
 'Pride and Prejudice',
 'Sense and Sensibility',
 'An Excellent conceited Tragedie of Romeo and Juliet',
 'Jane Eyre',
 'The Scarlet Letter',
 'Wuthering Heights']

`Wuthering Heights` is very different from `Pride and Prejudice` and `Jane Eyre` in my opinion.

I really wish `One Hundred Years of Solitude` is included in this dataset. It reminds me of `Wuthering Heights`, which I read about ten years ago.

In [16]:
top_k("The Great Gatsby")

['The Adventures of Huckleberry Finn',
 'The Sun Also Rises',
 'Nineteen Eighty-Four',
 'Animal Farm: A Fairy Story',
 'The Catcher in the Rye',
 'An Excellent conceited Tragedie of Romeo and Juliet',
 'The Great Gatsby',
 'To Kill a Mockingbird',
 'Of Mice and Men ',
 'Lord of the Flies ']

## Dystopia Novel

In [17]:
top_k("Nineteen Eighty-Four")

['The Great Gatsby',
 'Of Mice and Men ',
 'Catch-22',
 'The Catcher in the Rye',
 "Slaughterhouse-Five, or The Children's Crusade: A Duty-Dance with Death ",
 'Lord of the Flies ',
 'Fahrenheit 451',
 'Animal Farm: A Fairy Story',
 'Nineteen Eighty-Four',
 'Brave New World']

## Children's Novel

In [18]:
top_k("The Secret Garden")

['A Wrinkle in Time',
 'Het Achterhuis: Dagboekbrieven 14 juni 1942 - 1 augustus 1944',
 'The Adventures of Tom Sawyer',
 'The Lion, the Witch and the Wardrobe',
 'A Christmas Carol',
 'A Little Princess',
 'The Secret Garden',
 'Little Women',
 "Charlotte's Web",
 'Anne of Green Gables']

Most of them are similar to `The Secret Garden`. It is strange for `Pride and Prejudice` to be here.

`Het Achterhuis: Dagboekbrieven 14 juni 1942 - 1 augustus 1944` is a sad one. I don't think it is appropriate for children.

## Animal Novel

In [19]:
top_k("Black Beauty")

['The Tale of Peter Rabbit',
 'White Fang',
 'The Secret Garden',
 'Der Schweizerische Robinson',
 'Notre-Dame de Paris',
 'The Call of the Wild',
 'Black Beauty',
 'Heidi',
 "Charlotte's Web",
 'Aesopica']

`War Horse` reminds me of this novel. But the dataset is too small to include it.

## Transdisciplinary Non-fiction

In [20]:
top_k("Guns, Germs, and Steel: The Fates of Human Societies")

['Stiff: The Curious Lives of Human Cadavers',
 'The Tipping Point: How Little Things Can Make a Big Difference',
 'Outliers: The Story of Success',
 'Blink: The Power of Thinking Without Thinking',
 'A Short History of Nearly Everything',
 'Il Principe',
 "A People's History of the United States: 1492 to Present ",
 'Freakonomics: A Rogue Economist Explores the Hidden Side of Everything',
 'Guns, Germs, and Steel: The Fates of Human Societies',
 'An Excellent conceited Tragedie of Romeo and Juliet']

I expect to see `Sapiens: A Brief History of Humankind` but it is not included in the dataset.

## Science Fiction

In [21]:
top_k("Ender's Game")

['The Tipping Point: How Little Things Can Make a Big Difference',
 'Brave New World',
 'Stranger in a Strange Land',
 'Neuromancer',
 'Into Thin Air: A Personal Account of the Mt. Everest Disaster',
 "The Hitchhiker's Guide to the Galaxy",
 'Dune',
 'Snow Crash',
 "Ender's Game",
 'Foundation']

# Recommend Books based on user's ratings

In [22]:
my_rating = {
    'The Hunger Games': 4.5,
    'Twilight': 3.5,
    'The Da Vinci Code': 4.5,
    'The Great Gatsby': 5,
    "Ender's Game": 4,
    'Nineteen Eighty-Four': 4.5,
    'The Secret Garden': 4.5,
    'Le Comte de Monte-Cristo': 5,
    'Les Misérables': 5,
    'Wuthering Heights': 5,
    'Pride and Prejudice': 4.5,
    'Jane Eyre': 4,
    'Black Beauty': 5,
    'Fall of Giants': 3.5,
    'Guns, Germs, and Steel: The Fates of Human Societies': 5, 
    'Cloud Atlas': 5,
    'The Hobbit': 4,
    "Harry Potter and the Philosopher's Stone": 5
}

In [23]:
def get_recommondation(user_rating, similarity, K=15):
    user_numpy = np.array([title_to_numpy(x) for x in user_rating.keys()])
    user_rating = np.array([y for x, y in user_rating.items()])
    pred_rating = np.zeros(similarity.shape[0])
        
    for i in range(similarity.shape[0]):
        top_k_similar = np.argpartition(similarity[i][user_numpy], -K)[-K:]
        pred_rating[i] = np.dot(user_rating[top_k_similar], similarity[i][user_numpy[top_k_similar]]) / similarity[i][user_numpy[top_k_similar]].sum()
    
    top_k_rating = np.argpartition(pred_rating, -30)[-30:]
    dic = {numpy_to_title(x): pred_rating[x] for x in top_k_rating}
    
    return dic

In [24]:
recommend = get_recommondation(my_rating, item_similarity)
recommend

{'Candide': 4.738780317060948,
 'The One Plus One': 4.796180717673848,
 'The Story of Edgar Sawtelle': 4.809392671058634,
 'Le Comte de Monte-Cristo': 4.78645304155522,
 "L'élégance du hérisson": 4.763744226180255,
 '2001: A Space Odyssey': 4.742712746251362,
 'Les Misérables': 4.775234330848559,
 'No Country for Old Men': 4.787372666885708,
 'The Corrections': 4.794923051585936,
 'ねじまき鳥クロニクル [Nejimakidori kuronikuru]': 4.746867893539367,
 'Freedom': 4.739999123069169,
 'The Sense of an Ending': 4.761703072965084,
 'The Amazing Adventures of Kavalier & Clay': 4.771760354406468,
 'The Autobiography of Malcolm X': 4.747426661710104,
 'Hearts in Atlantis': 4.749151768170858,
 'The Glass Menagerie': 4.795128720764772,
 'The Interestings': 4.75321959715013,
 'Beowulf': 4.752684346851267,
 'The Hours': 4.758093917764653,
 'Black Beauty': 4.824912466068879,
 'Let the Great World Spin': 5.125484153643984,
 'Leaving Time': 4.888565806646468,
 'Easy': 8.219043621363733,
 "Tell the Wolves I'm Hom

I have no idea what those books are (?) Some of them seem interesting.