In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re

In [2]:
df_books = pd.read_csv("../data/raw/books_data.csv")

In [3]:
titles = pd.DataFrame(df_books["Title"])

In [4]:
titles

Unnamed: 0,Title
0,Its Only Art If Its Well Hung!
1,Dr. Seuss: American Icon
2,Wonderful Worship in Smaller Churches
3,Whispers of the Wicked Saints
4,"Nation Dance: Religion, Identity and Cultural ..."
...,...
212399,The Orphan Of Ellis Island (Time Travel Advent...
212400,Red Boots for Christmas
212401,Mamaw
212402,The Autograph Man


In [5]:
print("Len: {}".format(len(df_books)))
df_books.isnull().sum()

Len: 212404


Title                 1
description       68442
authors           31413
image             52075
previewLink       23836
publisher         75886
publishedDate     25305
infoLink          23836
categories        41199
ratingsCount     162652
dtype: int64

In [6]:
df_books.dropna(inplace=True)
print("Len: {}".format(len(df_books)))

Len: 40635


In [7]:
df_books.head()

Unnamed: 0,Title,description,authors,image,previewLink,publisher,publishedDate,infoLink,categories,ratingsCount
5,The Church of Christ: A Biblical Ecclesiology ...,In The Church of Christ: A Biblical Ecclesiolo...,['Everett Ferguson'],http://books.google.com/books/content?id=kVqRa...,http://books.google.nl/books?id=kVqRaiPlx88C&p...,Wm. B. Eerdmans Publishing,1996,http://books.google.nl/books?id=kVqRaiPlx88C&d...,['Religion'],5.0
31,Voices from the Farm: Adventures in Community ...,"Twenty-five years ago, at the height of the co...",['Rupert Fike'],http://books.google.com/books/content?id=IjTAB...,http://books.google.nl/books?id=IjTABgAAQBAJ&p...,Book Publishing Company,2012-08-21,https://play.google.com/store/books/details?id...,['Biography & Autobiography'],1.0
33,The Battleship Bismarck,The Bismarck is perhaps the most famous – and ...,['Stefan Draminski'],http://books.google.com/books/content?id=nxttD...,http://books.google.nl/books?id=nxttDwAAQBAJ&p...,Bloomsbury Publishing,2018-09-20,https://play.google.com/store/books/details?id...,['History'],1.0
42,Tess and the Highlander,"In 1543, on a windswept isle off of Scotland, ...",['May Mcgoldrick'],http://books.google.com/books/content?id=VmCRS...,http://books.google.nl/books?id=VmCRSPmY3WkC&d...,Harper Collins,2002-11,http://books.google.nl/books?id=VmCRSPmY3WkC&d...,['Juvenile Fiction'],2.0
43,Beginner's Yoruba (Hippocrene Beginner's Series),"""Beginner's Yoruba"" is now available with two ...",['Kayode J. Fakinlede'],http://books.google.com/books/content?id=xLe4n...,http://books.google.nl/books?id=xLe4nWzeSw0C&p...,Hippocrene Books,2005,http://books.google.nl/books?id=xLe4nWzeSw0C&d...,['Foreign Language Study'],1.0


In [8]:
df_books["ratingsCount"] = pd.to_numeric(df_books["ratingsCount"])

In [9]:
df_books

Unnamed: 0,Title,description,authors,image,previewLink,publisher,publishedDate,infoLink,categories,ratingsCount
5,The Church of Christ: A Biblical Ecclesiology ...,In The Church of Christ: A Biblical Ecclesiolo...,['Everett Ferguson'],http://books.google.com/books/content?id=kVqRa...,http://books.google.nl/books?id=kVqRaiPlx88C&p...,Wm. B. Eerdmans Publishing,1996,http://books.google.nl/books?id=kVqRaiPlx88C&d...,['Religion'],5.0
31,Voices from the Farm: Adventures in Community ...,"Twenty-five years ago, at the height of the co...",['Rupert Fike'],http://books.google.com/books/content?id=IjTAB...,http://books.google.nl/books?id=IjTABgAAQBAJ&p...,Book Publishing Company,2012-08-21,https://play.google.com/store/books/details?id...,['Biography & Autobiography'],1.0
33,The Battleship Bismarck,The Bismarck is perhaps the most famous – and ...,['Stefan Draminski'],http://books.google.com/books/content?id=nxttD...,http://books.google.nl/books?id=nxttDwAAQBAJ&p...,Bloomsbury Publishing,2018-09-20,https://play.google.com/store/books/details?id...,['History'],1.0
42,Tess and the Highlander,"In 1543, on a windswept isle off of Scotland, ...",['May Mcgoldrick'],http://books.google.com/books/content?id=VmCRS...,http://books.google.nl/books?id=VmCRSPmY3WkC&d...,Harper Collins,2002-11,http://books.google.nl/books?id=VmCRSPmY3WkC&d...,['Juvenile Fiction'],2.0
43,Beginner's Yoruba (Hippocrene Beginner's Series),"""Beginner's Yoruba"" is now available with two ...",['Kayode J. Fakinlede'],http://books.google.com/books/content?id=xLe4n...,http://books.google.nl/books?id=xLe4nWzeSw0C&p...,Hippocrene Books,2005,http://books.google.nl/books?id=xLe4nWzeSw0C&d...,['Foreign Language Study'],1.0
...,...,...,...,...,...,...,...,...,...,...
212374,Thin Within,"I want to lose weight, but dieting just doesn'...","['Judy Halliday', 'Arthur Halliday']",http://books.google.com/books/content?id=L_YV_...,http://books.google.com/books?id=L_YV_kuQwk8C&...,Thomas Nelson,2005-04-17,https://play.google.com/store/books/details?id...,['Health & Fitness'],9.0
212392,The Awakening and Selected Stories (Modern Lib...,"WHEN IT FIRST APPEARED IN 1899, THE AWAKENING ...",['Kate Chopin'],http://books.google.com/books/content?id=TDK4u...,http://books.google.com/books?id=TDK4u5Fl2D8C&...,Library of Alexandria,2003,https://play.google.com/store/books/details?id...,['Adultery'],2.0
212394,Final things,Grace's father believes in science and builds ...,['Jenny Offill'],http://books.google.com/books/content?id=UbSFB...,http://books.google.com/books?id=UbSFBAAAQBAJ&...,Vintage,2015-03-17,https://play.google.com/store/books/details?id...,['Fiction'],4.0
212399,The Orphan Of Ellis Island (Time Travel Advent...,"During a school trip to Ellis Island, Dominick...",['Elvira Woodruff'],http://books.google.com/books/content?id=J7M-N...,http://books.google.com/books?id=J7M-NwAACAAJ&...,Scholastic Paperbacks,2000-06-01,http://books.google.com/books?id=J7M-NwAACAAJ&...,['Juvenile Fiction'],2.0


In [10]:
def rename_title(df, title_column_name):
    df["mod_title"] = df[title_column_name].str.lower().replace("[^a-z0-9 ]", "", regex=True).str.replace("\s+", " ", regex=True)
    #.replace("[^a-z0-9]", "", regex=True)
    #.str.replace("\s+", " ", regex=True)
    return df

In [11]:
df_books = rename_title(df_books, "Title")
df_books.head()

Unnamed: 0,Title,description,authors,image,previewLink,publisher,publishedDate,infoLink,categories,ratingsCount,mod_title
5,The Church of Christ: A Biblical Ecclesiology ...,In The Church of Christ: A Biblical Ecclesiolo...,['Everett Ferguson'],http://books.google.com/books/content?id=kVqRa...,http://books.google.nl/books?id=kVqRaiPlx88C&p...,Wm. B. Eerdmans Publishing,1996,http://books.google.nl/books?id=kVqRaiPlx88C&d...,['Religion'],5.0,the church of christ a biblical ecclesiology f...
31,Voices from the Farm: Adventures in Community ...,"Twenty-five years ago, at the height of the co...",['Rupert Fike'],http://books.google.com/books/content?id=IjTAB...,http://books.google.nl/books?id=IjTABgAAQBAJ&p...,Book Publishing Company,2012-08-21,https://play.google.com/store/books/details?id...,['Biography & Autobiography'],1.0,voices from the farm adventures in community l...
33,The Battleship Bismarck,The Bismarck is perhaps the most famous – and ...,['Stefan Draminski'],http://books.google.com/books/content?id=nxttD...,http://books.google.nl/books?id=nxttDwAAQBAJ&p...,Bloomsbury Publishing,2018-09-20,https://play.google.com/store/books/details?id...,['History'],1.0,the battleship bismarck
42,Tess and the Highlander,"In 1543, on a windswept isle off of Scotland, ...",['May Mcgoldrick'],http://books.google.com/books/content?id=VmCRS...,http://books.google.nl/books?id=VmCRSPmY3WkC&d...,Harper Collins,2002-11,http://books.google.nl/books?id=VmCRSPmY3WkC&d...,['Juvenile Fiction'],2.0,tess and the highlander
43,Beginner's Yoruba (Hippocrene Beginner's Series),"""Beginner's Yoruba"" is now available with two ...",['Kayode J. Fakinlede'],http://books.google.com/books/content?id=xLe4n...,http://books.google.nl/books?id=xLe4nWzeSw0C&p...,Hippocrene Books,2005,http://books.google.nl/books?id=xLe4nWzeSw0C&d...,['Foreign Language Study'],1.0,beginners yoruba hippocrene beginners series


In [12]:
vectorizer = TfidfVectorizer()

tfidf = vectorizer.fit_transform(df_books["mod_title"])

In [21]:
def make_clickable(val):
    return '<a target="-blank" href="{}">Google</a>'.format(val)

def show_image(val):
    return '<img src="{}" width=50></img>'.format(val)

def search(query, vectorizer):
    processed = re.sub("[^a-zA-Z0-9 ]", "", query.lower())
    query_vec = vectorizer.transform([processed])
    similarity = cosine_similarity(query_vec, tfidf).flatten()
    indices = np.argpartition(similarity, -5)[-5:]
    results = df_books.iloc[indices]
    results
    results = results.sort_values("ratingsCount", ascending=False)
    results
    return results.head(5).style.format({'infoLink': make_clickable, 'image': show_image})
    

In [31]:
search("Harry Potter", vectorizer)

Unnamed: 0,Title,description,authors,image,previewLink,publisher,publishedDate,infoLink,categories,ratingsCount,mod_title
62999,Harry Potter and the Chamber of Secrets,"Witchcraft, wizardry - fiction.",['J. K. Rowling'],,http://books.google.com/books?id=nmXTy4FPfcwC&q=Harry+Potter+and+the+Chamber+of+Secrets&dq=Harry+Potter+and+the+Chamber+of+Secrets&hl=&cd=1&source=gbs_api,Raincoast Books,1999,Google,['Juvenile Fiction'],14.0,harry potter and the chamber of secrets
31810,The Irresistible Rise of Harry Potter,"Blake's examination of the Potter phenomenon raises serious questions about the condition of the publishing industry, filmmaking, and the ways in which the Potter consumer campaign has changed ideas about literature and reading.",['Andrew Blake'],,http://books.google.com/books?id=Aaug_RnI-xQC&printsec=frontcover&dq=The+Irresistible+Rise+of+Harry+Potter&hl=&cd=1&source=gbs_api,Verso,2002,Google,['Literary Criticism'],4.0,the irresistible rise of harry potter
170495,Harry Potter Manuscript Book,"'As someone who respects comprehensive research, I am in awe of the level of detail and amount of time Philip Errington has dedicated to this slavishly thorough and somewhat mind-boggling bibliography.' J. K. Rowling This is the definitive bibliography of the writings of J. K. Rowling. In addition to complete bibliographic details of each edition of all her books, pamphlets and original contributions to published works, there is detailed information on the publishing history of her work, including fascinating extracts from correspondence, and information on Rowling at auction. This will be the first source on Rowling consulted by textual scholars, book dealers and collectors, auction houses, critics and researchers. The aim of the book is to record fact and dispel rumour on the fascinating publishing history of the Harry Potter series.",['Philip W. Errington'],,http://books.google.com/books?id=H8buBQAAQBAJ&printsec=frontcover&dq=Harry+Potter+Manuscript+Book&hl=&cd=1&source=gbs_api,Bloomsbury Publishing,2015-02-26,Google,['Literary Criticism'],2.0,harry potter manuscript book
36776,Harry Potter and The Sorcerer's Stone,"Celebrate 20 years of Harry Potter magic! Harry Potter has never even heard of Hogwarts when the letters start dropping on the doormat at number four, Privet Drive. Addressed in green ink on yellowish parchment with a purple seal, they are swiftly confiscated by his grisly aunt and uncle. Then, on Harry's eleventh birthday, a great beetle-eyed giant of a man called Rubeus Hagrid bursts in with some astonishing news: Harry Potter is a wizard, and he has a place at Hogwarts School of Witchcraft and Wizardry. An incredible adventure is about to begin!These new editions of the classic and internationally bestselling, multi-award-winning series feature instantly pick-up-able new jackets by Jonny Duddle, with huge child appeal, to bring Harry Potter to the next generation of readers. It's time to PASS THE MAGIC ON ...",['J. K. Rowling'],,http://books.google.com/books?id=HksgDQAAQBAJ&dq=Harry+Potter+and+The+Sorcerer%27s+Stone&hl=&cd=1&source=gbs_api,Bloomsbury Publishing,2014-01-09,Google,['Juvenile Fiction'],1.0,harry potter and the sorcerers stone
53172,Critical Perspectives on Harry Potter,"This thoroughly revised edition includes updated essays on cultural themes and literary analysis, and its new essays analyze the full scope of the seven-book series as both pop cultural phenomenon and as a set of literary texts. Critical Perspectives on Harry Potter, Second Edition draws on a wider range of intellectual traditions to explore the texts, including moral-theological analysis, psychoanalytic perspectives, and philosophy of technology. The Harry Potter novels engage the social, cultural, and psychological preoccupations of our times, and Critical Perspectives on Harry Potter, Second Edition examines these worlds of consciousness and culture, ultimately revealing how modern anxieties and fixations are reflected in these powerful texts. (""DISCLAIMER: This book is not authorized, approved, licensed, or endorsed by J.K. Rowling, Warner Bros. Entertainment Inc., or anyone associated with the Harry Potter books or movies."")",['Elizabeth E. Heilman'],,http://books.google.com/books?id=yfOTAgAAQBAJ&printsec=frontcover&dq=Critical+Perspectives+on+Harry+Potter&hl=&cd=1&source=gbs_api,Routledge,2008-09-01,Google,['Education'],1.0,critical perspectives on harry potter


In [None]:
liked_books = ["81605", "62999", "36776", "60730", "33908"]