In [1]:
#Get data from Google BOOKS API
import requests
import json
import pandas as pd
from bs4 import BeautifulSoup
import re 
from goodreads import client
import datetime
import time
import nltk
#nltk.download('stopwords')
import pickle
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import linear_kernel

In [2]:
#Read Google Books API Developer Key from key.txt file
DEVELOPER_KEY = ""
with open('googleapi_key.txt') as file:
    DEVELOPER_KEY = str(file.readline())

In [3]:
keywords=['current+trending+books','current+bestseller+books']
print(keywords)

['current+trending+books', 'current+bestseller+books']


In [4]:
def get_volumeinfo(item):
    iteminfo = {}
    
    keys = ['title','subtitle','authors','publisher','publishedDate','description','industryIdentifiers','readingModes','pageCount','printType','categories','averageRating','ratingsCount','maturityRating','language','infoLink']
    
    for key in keys :
        try :
            if(item['volumeInfo'][key]) :
                iteminfo[key] = item['volumeInfo'][key]
        except:
            iteminfo[key] = None
    
    return iteminfo

In [5]:
def get_isbn(info):
    isbn = None
    try :
        identifiers = info['industryIdentifiers']
        for x in range(len(identifiers)):
            if(str(identifiers[x]['type']) == 'ISBN_13'):
                isbn = identifiers[x]['identifier']
    except:
        pass
    
    return isbn

In [6]:
pages_to_fetch = 20
trending_books = []

def fetch_data(url):
    for i in range(pages_to_fetch):
        start_index = i *40
        print("page num : ", start_index)
        params = {'maxResults':40,'startIndex':start_index}
        url_data = requests.get(url,params=params).json()
        try:
            for j in range(len(url_data['items'])):
                print("Processing for item : ", j)
                item_info = get_volumeinfo(url_data['items'][j])
                print("item info : ", item_info)
                isbn = get_isbn(item_info)
                if isbn:
                    item_info['isbn'] = isbn
                else:
                    item_info['isbn'] = None
                trending_books.append(item_info)
        except:
            pass

In [7]:
print("Start time : ", datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
for item in range(len(keywords)):
    fetch_item = keywords[item]
    print("Fetch : ", fetch_item )
    #Get single book
    url = 'https://www.googleapis.com/books/v1/volumes?q=' + str(fetch_item) + ' &key=' + str(DEVELOPER_KEY)
    print("url : ",url)
    fetch_data(url)
print("End time : ", datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

Start time :  2019-08-09 02:04:36
Fetch :  current+trending+books
url :  https://www.googleapis.com/books/v1/volumes?q=current+trending+books &key=AIzaSyAn2_08aOmG_dXnOPy_Tumz_lt_PLg7u98
page num :  0
Processing for item :  0
item info :  {'title': 'Big Data', 'subtitle': 'A Revolution That Will Transform How We Live, Work, and Think', 'authors': ['Viktor Mayer-Schönberger', 'Kenneth Cukier'], 'publisher': 'Houghton Mifflin Harcourt', 'publishedDate': '2013-03-05', 'description': 'A revelatory exploration of the hottest trend in technology and the dramatic impact it will have on the economy, science, and society at large. Which paint color is most likely to tell you that a used car is in good shape? How can officials identify the most dangerous New York City manholes before they explode? And how did Google searches predict the spread of the H1N1 flu outbreak? The key to answering these questions, and many more, is big data. “Big data” refers to our burgeoning ability to crunch vast col

Processing for item :  0
item info :  {'title': 'Introductory Text-book of Physical Geography', 'subtitle': None, 'authors': ['David Page'], 'publisher': None, 'publishedDate': '1863', 'description': None, 'industryIdentifiers': [{'type': 'OTHER', 'identifier': 'OXFORD:590748337'}], 'readingModes': {'text': False, 'image': True}, 'pageCount': 193, 'printType': 'BOOK', 'categories': ['Physical geography'], 'averageRating': None, 'ratingsCount': None, 'maturityRating': 'NOT_MATURE', 'language': 'en', 'infoLink': 'https://play.google.com/store/books/details?id=bXYDAAAAQAAJ&source=gbs_api'}
Processing for item :  1
item info :  {'title': 'Connected Vehicles', 'subtitle': 'Intelligent Transportation Systems', 'authors': ['Radovan Miucic'], 'publisher': 'Springer', 'publishedDate': '2019', 'description': 'This book introduces concepts and technologies of Intelligent Transportation Systems (ITS). It describes state of the art safety communication protocol called Dedicated Short Range Communic

Processing for item :  0
item info :  {'title': 'Queen Bey', 'subtitle': 'A Celebration of the Power and Creativity of Beyoncé Knowles-Carter', 'authors': ['Veronica Chambers'], 'publisher': "St. Martin's Press", 'publishedDate': '2019-03-05', 'description': 'FEATURED IN: Essence • People • Bustle • PopSugar • Refinery 29 • HelloGiggles\' • PureWow • Newsday The Ultimate Beyoncé Collectible "Beyoncé fans will eat it up." —People "You don\'t need to be in the Beyhive to appreciate Queen Bey...Voices including culture critic Luvvie Ajayi and actress and producer Lena Waithe give us a fresh take on Beyoncé, who\'s arguably the biggest pop star of our time." —Essence Beyoncé. Her name conjures more than music, it has come to be synonymous with beauty, glamour, power, creativity, love, and romance. Her performances are legendary, her album releases events. She is not even forty but she has already rewritten the Beyoncé playbook more than half a dozen times. She is consistently provocative, 

Processing for item :  0
item info :  {'title': 'Socrates in the City: Conversations on Life, God and Other Small Topics', 'subtitle': None, 'authors': ['Eric Metaxas'], 'publisher': 'HarperCollins UK', 'publishedDate': '2011-10-13', 'description': "Following the extraordinary success of the New York Times bestseller Bonhoeffer, Eric Metaxas's latest book offers inspirational and intellectually rigorous thoughts on the big questions surrounding us all today.", 'industryIdentifiers': [{'type': 'ISBN_13', 'identifier': '9780007461066'}, {'type': 'ISBN_10', 'identifier': '0007461062'}], 'readingModes': {'text': True, 'image': False}, 'pageCount': 400, 'printType': 'BOOK', 'categories': ['Religion'], 'averageRating': 4.0, 'ratingsCount': 2, 'maturityRating': 'NOT_MATURE', 'language': 'en', 'infoLink': 'http://books.google.com/books?id=x0xYvVg0-MIC&dq=current+trending+books&hl=&source=gbs_api'}
Processing for item :  1
item info :  {'title': 'Zaitoun: Recipes from the Palestinian Kitchen', 

Processing for item :  0
item info :  {'title': "Avery's Diseases of the Newborn E-Book", 'subtitle': None, 'authors': ['Christine A. Gleason', 'Sherin Devaskar'], 'publisher': 'Elsevier Health Sciences', 'publishedDate': '2011-08-12', 'description': 'Avery’s Diseases of the Newborn, edited by Christine A. Gleason and Sherin U. Devaskar, is a practical, clinical reference for diagnosing and managing of all the important diseases affecting newborns. Thoroughly revised by a team of new editors, this edition provides new perspectives and updated coverage of genetics, nutrition, respiratory conditions, MRSA, neonatal pain, cardiovascular fetal interventions, care of the late preterm infant, and more. This authoritative reference is ideal as a clinical resource or subspecialty review tool. Treat newborns effectively with focused coverage of diagnosis and management, including pertinent developmental physiology and the pathogenesis of neonatal problems. Meet every challenge you face in neona

Processing for item :  0
item info :  {'title': 'Knife', 'subtitle': 'A New Harry Hole Novel', 'authors': ['Jo Nesbo'], 'publisher': 'Knopf', 'publishedDate': '2019-07-09', 'description': "Brilliant, audaciously rogue police officer, Harry Hole from The Snowman and The Thirst, is back and in the throes of a new, unanticipated rage--once again hunting the murderer who has haunted his entire career. Harry Hole is not in a good place. Rakel--the only woman he's ever loved--has ended it with him, permanently. He's been given a chance for a new start with the Oslo Police but it's in the cold case office, when what he really wants is to be investigating cases he suspects have ties to Svein Finne, the serial rapist and murderer who Harry helped put behind bars. And now, Finne is free after a decade-plus in prison--free, and Harry is certain, unreformed and ready to take up where he left off. But things will get worse. When Harry wakes up the morning after a blackout, drunken night with blood 

Processing for item :  0
item info :  {'title': 'General Knowledge January 2018 eBook', 'authors': ['Jagran Josh'], 'publisher': 'Jagran Josh', 'publishedDate': '2018-01-02', 'description': 'General knowledge has immense importance in various competitive exams like UPSC, State Services, SSC, Banking, Railway, NDA, MBA entrance exams and various service exams in private as well as public sectors. This edition contains detailed discussion and analysis of the current GK topics and MCQs for further practice about the latest and most important happenings in political, economic, social, sports and entertainment fields all over the world. All the topics are presented with facts and a brief description, so that the reader gets full knowledge and understanding in all the key areas of exams. Some maps, tables, etc. are also included for further clarity. • e-book covers National, International, Economy, Environment, Ecology, News from States,Sports, Discussion and Analysis. • e-book has comprehen

Processing for item :  0
item info :  {'title': 'Million Dollar Web Presence: Leverage the Web to Build Your Brand and Transform Your Business', 'subtitle': None, 'authors': ['Chad Barr', 'Alan Weiss'], 'publisher': 'Entrepreneur Press', 'publishedDate': '2012-02-28', 'description': 'Building upon the success of his Million Dollar series, which includes bestseller Million Dollar Consulting, Alan Weiss, teamed with globally renowned internet expert Chad Barr, shows you how to cash in on the unmatched reach of the web. Empowered to tactically leverage technology, from your website to mobile marketing, uncover the secrets to dramatically elevating your brand—and ultimately, driving more revenue and growing your business. Alan Weiss, Ph.D., (East Greenwich, RI) is a consultant, speaker, and author of 45 books, including his bestseller, Million Dollar Consulting. He is the founder of consulting firm, Summit Consulting Group, Inc. co-creator of Million Dollar Websites: www.themilliondollarwe

Processing for item :  0
item info :  {'title': 'Rotating Machinery Vibration', 'subtitle': 'From Analysis to Troubleshooting, Second Edition', 'authors': ['Maurice L. Adams'], 'publisher': 'CRC Press', 'publishedDate': '2010-08-09', 'description': 'Diagnosis and correction are critical tasks for the vibrations engineer. Many causes of rotor vibration are so subtle and pervasive that excessive vibration continues to occur despite the use of usually effective design practices and methods of avoidance. Rotating Machinery Vibration: From Analysis to Troubleshooting provides a comprehensive, consolidated overview of the fundamentals of rotating machinery vibration and addresses computer model building, sources and types of vibration, and machine vibration signal analysis. This reference is a powerful tool to strengthen vital in-house competency on the subject for professionals in a variety of fields. After presenting governing fundamental principles and background on modern measurement, co

Processing for item :  0
item info :  {'title': 'Nursing Today - Binder Ready', 'subtitle': 'Transition and Trends', 'authors': ['JoAnn Zerwekh', 'Ashley Zerwekh Garneau'], 'publisher': 'Elsevier Health Sciences', 'publishedDate': '2017-02-01', 'description': 'Loved for its humor, readability, and inviting cartoons, Nursing Today: Transitions and Trends, 9th Edition helps you prepare for the NCLEX-RN® Examination — while giving you valuable information to succeed in your professional career. It reflects current issues and trending topics that nurses will face, ensuring that you graduate not only with patient care skills, but also with career development skills such as resume writing, finding a job, and effective interviewing. This edition features test-taking tips for the NCLEX-RN® Examination and updated Evolve resources for students, including review questions and case studies. Thorough coverage of all the most important issues faced by the new nurse, preparing you for a professional

Processing for item :  0
item info :  {'title': 'Cutting Loose', 'subtitle': 'Why Women Who End Their Marriages Do So Well', 'authors': ['Ashton Applewhite'], 'publisher': 'HarperCollins', 'publishedDate': '2017-06-20', 'description': 'For women contemplating divorce or for those who have already divorced, Ashton Applewhite’s insightful book sheds light on what to consider before making the decision to end your marriage, how to protect yourself—both financially and emotionally—and how much your life will change. One out of every two modern marriages ends in divorce, and 75 percent of those divorces are initiated by wives. Author Ashton Applewhite is one of these women, having sued for divorce after enduring an unfulfilling ten-year marriage. Cutting Loose is an essential resource for women who want to leave their marriage but fear the consequences. Shattering the media-generated image of the lonely, deprived and financially strapped divorcee, Applewhite provides a much needed reality c

Processing for item :  0
item info :  {'title': 'Proceedings [of The] Annual Convention', 'subtitle': None, 'authors': ['National Association of Regulatory Utility Commissioners'], 'publisher': None, 'publishedDate': '1955', 'description': 'Vols. for 1893-1912 contain also "List of state railroad commissions, showing official titles and addresses, and names and addresses of members and secretaries."', 'industryIdentifiers': [{'type': 'OTHER', 'identifier': 'UOM:39015013768653'}], 'readingModes': {'text': False, 'image': False}, 'pageCount': None, 'printType': 'BOOK', 'categories': ['Railroads'], 'averageRating': None, 'ratingsCount': None, 'maturityRating': 'NOT_MATURE', 'language': 'en', 'infoLink': 'http://books.google.com/books?id=mtxKAAAAMAAJ&dq=current+trending+books&hl=&source=gbs_api'}
Processing for item :  1
item info :  {'title': 'Decisions of Pennsylvania Public Utility Commission', 'subtitle': None, 'authors': None, 'publisher': None, 'publishedDate': '1977', 'description':

Processing for item :  0
item info :  {'title': 'The American Church Almanac and Year Book', 'subtitle': None, 'authors': None, 'publisher': None, 'publishedDate': '1894', 'description': None, 'industryIdentifiers': [{'type': 'OTHER', 'identifier': 'HARVARD:HN1PZF'}], 'readingModes': {'text': False, 'image': True}, 'pageCount': None, 'printType': 'BOOK', 'categories': None, 'averageRating': None, 'ratingsCount': None, 'maturityRating': 'NOT_MATURE', 'language': 'en', 'infoLink': 'https://play.google.com/store/books/details?id=TD8ZAAAAYAAJ&source=gbs_api'}
Processing for item :  1
item info :  {'title': 'The Year-book of Facts in Science and the Arts for 1874', 'subtitle': None, 'authors': ['Vincent (W. Charles)'], 'publisher': None, 'publishedDate': '1875', 'description': None, 'industryIdentifiers': [{'type': 'OTHER', 'identifier': 'RMS:RMS45DOP000000742$$$K'}], 'readingModes': {'text': False, 'image': True}, 'pageCount': 248, 'printType': 'BOOK', 'categories': None, 'averageRating': 

Processing for item :  0
item info :  {'title': 'Continent', 'subtitle': None, 'authors': None, 'publisher': None, 'publishedDate': '1914', 'description': None, 'industryIdentifiers': [{'type': 'OTHER', 'identifier': 'IOWA:31858020583005'}], 'readingModes': {'text': False, 'image': True}, 'pageCount': None, 'printType': 'BOOK', 'categories': ['Christianity'], 'averageRating': None, 'ratingsCount': None, 'maturityRating': 'NOT_MATURE', 'language': 'en', 'infoLink': 'https://play.google.com/store/books/details?id=2DdKAQAAMAAJ&source=gbs_api'}
Processing for item :  1
item info :  {'title': 'English Mechanics and the World of Science', 'subtitle': None, 'authors': None, 'publisher': None, 'publishedDate': '1879', 'description': None, 'industryIdentifiers': [{'type': 'OTHER', 'identifier': 'UOM:39015080399275'}], 'readingModes': {'text': False, 'image': True}, 'pageCount': None, 'printType': 'BOOK', 'categories': ['Technology'], 'averageRating': None, 'ratingsCount': None, 'maturityRating'

Processing for item :  0
item info :  {'title': 'The British Australasian and New Zealand Mail', 'subtitle': None, 'authors': None, 'publisher': None, 'publishedDate': '1901', 'description': None, 'industryIdentifiers': [{'type': 'OTHER', 'identifier': 'WISC:89053274809'}], 'readingModes': {'text': False, 'image': True}, 'pageCount': None, 'printType': 'BOOK', 'categories': ['Australia'], 'averageRating': None, 'ratingsCount': None, 'maturityRating': 'NOT_MATURE', 'language': 'en', 'infoLink': 'https://play.google.com/store/books/details?id=h7tBJ2FHoWEC&source=gbs_api'}
Processing for item :  1
item info :  {'title': 'Summary: Mark R. Levin Ameritopia', 'subtitle': 'The Unmaking of America', 'authors': ['Quick Read Summary Books'], 'publisher': None, 'publishedDate': '2014-02-15', 'description': "In this summary of Mark R. Levin's book, Ameritopia: The Unmaking of America, you will discover how America's government has been slowly descending into tyranny using utopian tactics that serv

Processing for item :  0
item info :  {'title': "The Basilisk's Creed: Volume Five (The Basilisk's Creed #1)", 'authors': ['Eme Strife'], 'publisher': '(Eme)nded Publishing', 'publishedDate': '2016-11-24', 'description': "Dear diary...and anyone else who will listen: Tonight, I met the man of my dreams. Literally. *** Whoever said living in California was glamorous and easy-going lied through their fucking teeth. I guess I can blame reality TV for that pervasive B.S. Maybe it was good thing my old man banned television at home for all the years he was alive, after all. I probably would’ve been even more deluded than I was when I’d moved up here from Arkansas three months ago. Looking back, I’m not even sure why I did it; why, of all the places I could’ve chosen to start a new life, I picked San Francisco: one of the most expensive places in the world to live. I don’t even know anyone one here. Heck, I’d never even visited the city before my move. Still, I’m determined to make it here, 

Processing for item :  0
item info :  {'title': "The End of Alzheimer's", 'subtitle': 'The First Program to Prevent and Reverse Cognitive Decline', 'authors': ['Dale Bredesen'], 'publisher': 'Penguin', 'publishedDate': '2017', 'description': 'Everyone knows someone who has survived cancer, but no one knows anyone who has survived Alzheimer\'s Disease. Dale Bredesen, MD, offers hope to anyone looking to prevent and even reverse Alzheimer\'s Disease and cognitive decline. Arguing that AD is not one condition, as it is currently treated, but three, Bredesen outlines 36 metabolic factors (micronutrients, hormone levels, sleep) that can trigger "downsizing" in the brain. He then shows us how to rebalance these factors using lifestyle modifications like taking B12, eliminating gluten, or improving oral hygiene.', 'industryIdentifiers': [{'type': 'ISBN_13', 'identifier': '9780735216204'}, {'type': 'ISBN_10', 'identifier': '0735216207'}], 'readingModes': {'text': False, 'image': False}, 'pageC

Processing for item :  0
item info :  {'title': 'Your First 1000 Copies', 'subtitle': 'The Step-By-Step Guide to Marketing Your Book', 'authors': ['Tim Grahl'], 'publisher': 'Out: Think', 'publishedDate': '2013-06-27', 'description': '"If I could give an aspiring writer one piece of advice, it would be to read this book." - Hugh Howey, New York Times best selling author of Wool "Your First 1000 Copies is a must-read for authors trying to build a connection with their readers." - Dan Heath & Chip Heath, co-authors of Made to Stick, Switch, and Decisive "I watched in awe this year as Tim Grahl had 5 clients on the New York Times bestseller list in the same week. There is no one I trust more to learn about book marketing." - Pamela Slim, Author, Escape from Cubicle Nation "Tim was an early pioneer in teaching book authors how Internet marketing ACTUALLY works." - Hugh MacLeod, author of Ignore Everybody and Evil Plans Imagine if you had a direct connection with thousands of readers who lo

Processing for item :  0
item info :  {'title': 'Nine Perfect Strangers', 'subtitle': None, 'authors': ['Liane Moriarty'], 'publisher': 'Flatiron Books', 'publishedDate': '2018-11-06', 'description': 'NEW YORK TIMES BESTSELLER “If three characters were good in Big Little Lies, nine are even better in Nine Perfect Strangers.” —Lisa Scottoline, The New York Times Book Review From the #1 New York Times bestselling author of Big Little Lies Could ten days at a health resort really change you forever? In Liane Moriarty’s latest page-turner, nine perfect strangers are about to find out... Nine people gather at a remote health resort. Some are here to lose weight, some are here to get a reboot on life, some are here for reasons they can’t even admit to themselves. Amidst all of the luxury and pampering, the mindfulness and meditation, they know these ten days might involve some real work. But none of them could imagine just how challenging the next ten days are going to be. Frances Welty, the

Processing for item :  0
item info :  {'title': 'The Fight', 'subtitle': "A Secret Service Agent's Inside Account of Security Failings and the Political Machine", 'authors': ['Dan Bongino'], 'publisher': "St. Martin's Press", 'publishedDate': '2016-01-12', 'description': 'The New York Times bestseller! "The Fight shines a much needed light on the troubling games DC politicians and insiders play with the American people." -Sean Hannity "The Fight is a lesson plan for fighting back against the Washington DC political machine." - Mark Levin In The Fight, Dan Bongino picks up the story where his New York Times bestselling book Life Inside the Bubble ends, tackling current political and security issues and offering new solutions. From Hillary\'s emails to the security failings at the White House (including the drone crash and the fence jumper); from Charlie Hebdo to Bowe Bergdahl--the author examines how our current administration has allowed our security efforts to lapse both at home and a

Processing for item :  0
item info :  {'title': 'Four Views of Youth Ministry and the Church', 'subtitle': None, 'authors': ['Wesley Black', 'Chap Clark', 'Malan Nel'], 'publisher': 'Zondervan', 'publishedDate': '2010-01-05', 'description': 'Join the conversation as experts propose, defend, and explore Four Views of Youth Ministry and the Church.In a dialog that often gets downright feisty, four youth ministry academicians delineate their distinct philosophical and ecclesiological views regarding how youth ministry relates to the church at large--and leave a taste of what’s profound and what’s not in these four typologies:Inclusive congregational (Malan Nel). What happens when a church thoroughly integrates its adolescents, making them full partners in every aspect of congregational life?Preparatory (Wesley Black). Why and how should a church consider its teenagers as disciples-in-training and its youth ministry a school of preparation for future participation in church life?Missional 

Processing for item :  0
item info :  {'title': 'A Flag Worth Dying For', 'subtitle': 'The Power and Politics of National Symbols', 'authors': ['Tim Marshall'], 'publisher': 'Simon and Schuster', 'publishedDate': '2017-07-04', 'description': 'First published in Great Britain in 2016 by Elliott and Thompson Limited as: Worth dying for: the power and politics of flags.', 'industryIdentifiers': [{'type': 'ISBN_13', 'identifier': '9781501168338'}, {'type': 'ISBN_10', 'identifier': '1501168339'}], 'readingModes': {'text': False, 'image': False}, 'pageCount': 304, 'printType': 'BOOK', 'categories': ['History'], 'averageRating': 3.0, 'ratingsCount': 1, 'maturityRating': 'NOT_MATURE', 'language': 'en', 'infoLink': 'http://books.google.com/books?id=ysYpDwAAQBAJ&dq=current+bestseller+books&hl=&source=gbs_api'}
Processing for item :  1
item info :  {'title': 'Shadow', 'subtitle': None, 'authors': ['Michael Morpurgo'], 'publisher': 'Feiwel & Friends', 'publishedDate': '2014-12-23', 'description': 

Processing for item :  0
item info :  {'title': 'Seize the Night', 'subtitle': 'A Dark-Hunter Novel', 'authors': ['Sherrilyn Kenyon'], 'publisher': "St. Martin's Paperbacks", 'publishedDate': '2010-04-01', 'description': "Valerius isn't a popular Dark-Hunter-he's a Roman, which means that the largely Greek Hunters have a major grudge against him and his civilization for superceding them. To make things worse, he's very conscious of his aristocratic background and breeding. So it serves him right when he runs into Tabitha Devereaux. She's sassy, sexy, and completely unwilling to take him seriously. (Not to mention that she's the twin sister of the wife of former Dark-Hunter Kyrian-Val's mortal enemy.) What Tabitha does take seriously is hunting and killing vampires-and soon she and Val have to grapple with the deadliest of all Daimons-one who's managed to come back from the dead, and one who holds a serious grudge against both of them. To win against evil, Val will have to loosen up, le

Processing for item :  0
item info :  {'title': 'Until Death We Do Part', 'subtitle': None, 'authors': ['Sherrilyn Kenyon'], 'publisher': "St. Martin's Press", 'publishedDate': '2016-07-05', 'description': "Previously published in anthologies, Until Death We Do Part is now available as a standalone e-novella from #1 New York Times bestselling author Sherrilyn Kenyon! The only thing worse than being Dracula's only daughter is betraying him by marrying his bitterest enemy-- a dark sorcerer warlord whose powers make a mockery of the legendary monster known for impalement and massacre. Forced to flee her father's wrath over her secret marriage, Esperetta is willing to sacrifice anything for Velkan Danesti, until she learns that he's tied their immortal souls together without her permission-- something she doesn't discover until after her father kills them and her husband sells both their souls to a Greek goddess for vengeance against him and makes them eternal Dark-Hunters sworn to protect

Processing for item :  0
item info :  {'title': 'Do it Afraid!', 'subtitle': 'Obeying God in the Face of Fear', 'authors': ['Joyce Meyer'], 'publisher': 'FaithWords', 'publishedDate': '2008-11-16', 'description': "Has Fear Got the Best of You? Everyone who has ever lived has known the torment of fear. Like all of us, you experience fear almost every time you move toward a closer relationship with God. Fear will try to push you back and is always ready to attack you through your thoughts. But you can live free from it! God's promise is that you will be able to overcome this powerful emotion. In this book, Joyce Meyer shows that even though fear will surely challenge you, the Holy Spirit can still help you walk in faith. Find out: -How to keep fear from controlling your life -How to move forward in spite of your fears -How God stays faithful regardless of what you're feeling -The one basic fear underneath all fears. You can act on God's Word and defeat this enemy. Now is the time to seiz

Processing for item :  0
item info :  {'title': "Jeff Herman's Guide to Book Publishers, Editors and Literary Agents 2004", 'subtitle': None, 'authors': ['Jeff Herman'], 'publisher': 'Writer', 'publishedDate': '2003', 'description': 'A guide to the names and specialities of American and Canadian publishers, editors, and literary agents includes information on the acquisition process and on choosing literary agents.', 'industryIdentifiers': [{'type': 'ISBN_10', 'identifier': '0871162016'}, {'type': 'ISBN_13', 'identifier': '9780871162014'}], 'readingModes': {'text': False, 'image': False}, 'pageCount': 934, 'printType': 'BOOK', 'categories': ['Authors and publishers'], 'averageRating': 5.0, 'ratingsCount': 1, 'maturityRating': 'NOT_MATURE', 'language': 'en', 'infoLink': 'http://books.google.com/books?id=4QPoGX45em4C&dq=current+bestseller+books&hl=&source=gbs_api'}
Processing for item :  1
item info :  {'title': "Jeff Herman's Guide to Book Publishers, Editors & Literary Agents 2007", 's

Processing for item :  0
item info :  {'title': 'After Strange Fruit', 'subtitle': 'Changing Literary Taste in Post-World War II Boston', 'authors': ['Pierre Albert Duhamel'], 'publisher': 'Trustees of Public Library of City of Boston', 'publishedDate': '1980-01-01', 'description': None, 'industryIdentifiers': [{'type': 'OTHER', 'identifier': 'UOM:39015004973528'}], 'readingModes': {'text': False, 'image': False}, 'pageCount': 118, 'printType': 'BOOK', 'categories': ['American literature'], 'averageRating': None, 'ratingsCount': None, 'maturityRating': 'NOT_MATURE', 'language': 'en', 'infoLink': 'http://books.google.com/books?id=aw3hAAAAMAAJ&dq=current+bestseller+books&hl=&source=gbs_api'}
Processing for item :  1
item info :  {'title': 'Restructuring the Professional Organization', 'subtitle': 'Accounting, Health Care and Law', 'authors': ['David Brock', 'C. R. Hinings', 'Michael Powell'], 'publisher': 'Routledge', 'publishedDate': '2012-09-10', 'description': 'In recent years the pro

Processing for item :  0
item info :  {'title': 'Summary & Analysis of Skin in the Game', 'subtitle': 'Hidden Asymmetries in Daily Life | A Guide to the Book by Nassim Nicholas Taleb', 'authors': ['ZIP Reads'], 'publisher': 'ZIP Reads', 'publishedDate': '101-01-01', 'description': "PLEASE NOTE: This is a summary and analysis of the book and not the original book. If you'd like to purchase the original book, please paste this link in your browser: https://amzn.to/2FXsP9l Nassim Nicholas Taleb challenges conventions about success, change, and equanimity in his bestselling book, Skin in the Game: The Hidden Asymmetries in Daily Life. What does this ZIP Reads Summary Include? Synopsis of the original bookDetailed Chapter-by-chapter summariesKey Takeaways & Analysis of each chapterIn-depth Editorial ReviewBackground on the author About the Original Book: In Skin in the Game Taleb tackles issues including risk, inequality, and fairness in society today. He argues that our current system is w

Processing for item :  0
item info :  {'title': 'Love Runs in', 'subtitle': None, 'authors': ['Barbara Cartland'], 'publisher': 'Createspace Independent Publishing Platform', 'publishedDate': '2015-05-08', 'description': 'When a dashing young gentleman runs into Wentmore Hall bleeding from his shoulder and crying, "Save me! Save me! If you cannot hide me, they will kill me!" the quick-witted beautiful young Novella Wentworth hides him in the secret passage that was used centuries ago by Catholic Priests escaping from the wrath of Queen Elizabeth and later the Royalists and the Cromwellians. The young man, Vale Chester, has been shot by Novella\'s neighbour, Lord Grimstone, who demands to search the house for his quarry. Novella\'s father, a General fighting in the Duke of Wellington\'s Army in Spain, never liked Lord Grimstone, so she is naturally suspicious of him and his motives. And sure enough she finds that Vale Chester is a Government agent who suspects Lord Grimstone to be the l

Processing for item :  0
item info :  {'title': "Gun Trader's Guide to Shotguns", 'subtitle': 'A Comprehensive, Fully Illustrated Reference for Modern Shotguns with Current Market Values', 'authors': ['Robert A. Sadowski'], 'publisher': 'Skyhorse', 'publishedDate': '2015-10-27', 'description': "Everything you need to know to buy, sell, and collect shotguns. Gun Trader’s Guide is the bestselling collectible firearms reference, having sold over two million copies in thirty-six editions. The guide includes prices for all types of firearms, but what if you are only interested in buying, selling, and collecting shotguns? Then Gun Trader’s Guide to Shotguns is the book for you! Featuring all your favorite shotguns from the original Gun Trader’s Guide, plus hundreds more, this is the only reference you'll ever need. Veteran editor and firearms enthusiast Robert A. Sadowski has compiled and cataloged discontinued and collectible shotguns from your favorite manufacturers. Complete with specs an

Processing for item :  0
item info :  {'title': 'Balloons Can Be Murder', 'subtitle': 'The Ninth Charlie Parker Mystery', 'authors': ['Connie Shelton'], 'publisher': 'CreateSpace', 'publishedDate': '2010-07-19', 'description': 'Rachael Fairfield is a balloon pilot out to set a world altitude record during the upcoming Albuquerque International Balloon Fiesta. She\'s received several threats, though, and wants to hire Charlie and Ron to keep the stalker away from her. It shouldn\'t be a problem, says Rachael, because she knows who is sending the notes-her own father. Her testimony sent him to prison years ago and now he\'s out. They agree to take the case, but Charlie begins to suspect that it\'s not such a simple assignment. Praise for the Charlie Parker mysteries: "Fans of Southwestern mysteries will find that Shelton\'s engaging story, likable heroine, and comfortable prose make this a good choice." - Library Journal "Charlie is a good detective and a pleasant companion to unravel a 

In [8]:
trending_books_data = pd.DataFrame.from_dict(trending_books,orient='columns')

In [9]:
trending_books_data.shape

(1197, 17)

In [10]:
trending_books_data.head(5)

Unnamed: 0,authors,averageRating,categories,description,industryIdentifiers,infoLink,isbn,language,maturityRating,pageCount,printType,publishedDate,publisher,ratingsCount,readingModes,subtitle,title
0,"[Viktor Mayer-Schönberger, Kenneth Cukier]",3.0,[Business & Economics],A revelatory exploration of the hottest trend ...,"[{'type': 'ISBN_13', 'identifier': '9780544002...",https://play.google.com/store/books/details?id...,9780544002937,en,NOT_MATURE,240.0,BOOK,2013-03-05,Houghton Mifflin Harcourt,8.0,"{'text': True, 'image': True}","A Revolution That Will Transform How We Live, ...",Big Data
1,[Delia Owens],4.0,[Fiction],#1 New York Times Bestseller A Reese Witherspo...,"[{'type': 'ISBN_13', 'identifier': '9780735219...",https://play.google.com/store/books/details?id...,9780735219113,en,NOT_MATURE,384.0,BOOK,2018-08-14,Penguin,83.0,"{'text': True, 'image': False}",,Where the Crawdads Sing
2,[David D. Burns],4.0,[Cognitive therapy.],"A guide to a drug-free cure for anxiety, guilt...","[{'type': 'ISBN_10', 'identifier': '0380718030...",http://books.google.com/books?id=SXI4unwowIEC&...,9780380718030,en,NOT_MATURE,466.0,BOOK,1992,,6.0,"{'text': False, 'image': False}",The New Mood Therapy,Feeling Good
3,[Steven Pinker],4.0,[Psychology],Presents a controversial history of violence w...,"[{'type': 'ISBN_13', 'identifier': '9780143122...",http://books.google.com/books?id=8-vYCwAAQBAJ&...,9780143122012,en,NOT_MATURE,802.0,BOOK,2012-09,Penguin Group USA,16.0,"{'text': False, 'image': False}",Why Violence Has Declined,The Better Angels of Our Nature
4,"[Perle Slavik Cowen, Sue Moorhead]",,[Medical],Current Issues in Nursing provides a forum for...,"[{'type': 'ISBN_13', 'identifier': '9780323293...",https://play.google.com/store/books/details?id...,9780323293198,en,NOT_MATURE,832.0,BOOK,2014-04-14,Elsevier Health Sciences,,"{'text': False, 'image': True}",,Current Issues In Nursing - E-Book


In [11]:
#Keep only selected columns
columns_to_drop = ['averageRating','industryIdentifiers','infoLink','maturityRating','pageCount','printType','publishedDate','ratingsCount','readingModes','subtitle']
trending_books_data.drop(columns=columns_to_drop,inplace=True)

In [12]:
trending_books_data.shape

(1197, 7)

In [13]:
trending_books_data.head(10)

Unnamed: 0,authors,categories,description,isbn,language,publisher,title
0,"[Viktor Mayer-Schönberger, Kenneth Cukier]",[Business & Economics],A revelatory exploration of the hottest trend ...,9780544002937,en,Houghton Mifflin Harcourt,Big Data
1,[Delia Owens],[Fiction],#1 New York Times Bestseller A Reese Witherspo...,9780735219113,en,Penguin,Where the Crawdads Sing
2,[David D. Burns],[Cognitive therapy.],"A guide to a drug-free cure for anxiety, guilt...",9780380718030,en,,Feeling Good
3,[Steven Pinker],[Psychology],Presents a controversial history of violence w...,9780143122012,en,Penguin Group USA,The Better Angels of Our Nature
4,"[Perle Slavik Cowen, Sue Moorhead]",[Medical],Current Issues in Nursing provides a forum for...,9780323293198,en,Elsevier Health Sciences,Current Issues In Nursing - E-Book
5,[Robert H. Lane],[Medical],Neonatal-perinatal medicine has a strong histo...,9780323326735,en,Elsevier Health Sciences,"Current Controversies in Perinatology, An Issu..."
6,[Elisabeth Busch],,The Real Top 1000 Baby Names provides an in-de...,9781793035837,en,Independently Published,The Real Top 1000 Baby Names
7,"[David Camacho, Sang-Wook Kim, Bogdan Trawiński]",[Computers],This book consists of 20 chapters in which the...,9783319107745,en,Springer,New Trends in Computational Collective Intelli...
8,"[Sheryl Sandberg, Adam M. Grant]",[Biography & Autobiography],"""The author's experience with grief after the ...",9781524732684,en,Knopf,Option B
9,[Elizabeth Gilbert],[Fiction],From the # 1 New York Times bestselling author...,9780698408326,en,Penguin,City of Girls


In [14]:
trending_books_data.isnull().sum()

authors        215
categories     136
description    277
isbn           337
language         0
publisher      324
title            0
dtype: int64

In [15]:
trending_books_data.dropna(inplace=True)

In [16]:
trending_books_data.head(10)

Unnamed: 0,authors,categories,description,isbn,language,publisher,title
0,"[Viktor Mayer-Schönberger, Kenneth Cukier]",[Business & Economics],A revelatory exploration of the hottest trend ...,9780544002937,en,Houghton Mifflin Harcourt,Big Data
1,[Delia Owens],[Fiction],#1 New York Times Bestseller A Reese Witherspo...,9780735219113,en,Penguin,Where the Crawdads Sing
3,[Steven Pinker],[Psychology],Presents a controversial history of violence w...,9780143122012,en,Penguin Group USA,The Better Angels of Our Nature
4,"[Perle Slavik Cowen, Sue Moorhead]",[Medical],Current Issues in Nursing provides a forum for...,9780323293198,en,Elsevier Health Sciences,Current Issues In Nursing - E-Book
5,[Robert H. Lane],[Medical],Neonatal-perinatal medicine has a strong histo...,9780323326735,en,Elsevier Health Sciences,"Current Controversies in Perinatology, An Issu..."
7,"[David Camacho, Sang-Wook Kim, Bogdan Trawiński]",[Computers],This book consists of 20 chapters in which the...,9783319107745,en,Springer,New Trends in Computational Collective Intelli...
8,"[Sheryl Sandberg, Adam M. Grant]",[Biography & Autobiography],"""The author's experience with grief after the ...",9781524732684,en,Knopf,Option B
9,[Elizabeth Gilbert],[Fiction],From the # 1 New York Times bestselling author...,9780698408326,en,Penguin,City of Girls
10,"[Chelsea Rousso, Nancy Kaplan Ostroff]",[Business & Economics],Fashion Forward demystifies the exciting caree...,9781501328275,en,Bloomsbury Publishing USA,Fashion Forward
11,[Alan Northcott],[Business & Economics],Money managers have traditionally used hedge f...,9781601380005,en,Atlantic Publishing Company,The Hedge Funds Book


In [17]:
trending_books_data['authors'] = trending_books_data['authors'].apply(', '.join)

In [18]:
trending_books_data.head(10)

Unnamed: 0,authors,categories,description,isbn,language,publisher,title
0,"Viktor Mayer-Schönberger, Kenneth Cukier",[Business & Economics],A revelatory exploration of the hottest trend ...,9780544002937,en,Houghton Mifflin Harcourt,Big Data
1,Delia Owens,[Fiction],#1 New York Times Bestseller A Reese Witherspo...,9780735219113,en,Penguin,Where the Crawdads Sing
3,Steven Pinker,[Psychology],Presents a controversial history of violence w...,9780143122012,en,Penguin Group USA,The Better Angels of Our Nature
4,"Perle Slavik Cowen, Sue Moorhead",[Medical],Current Issues in Nursing provides a forum for...,9780323293198,en,Elsevier Health Sciences,Current Issues In Nursing - E-Book
5,Robert H. Lane,[Medical],Neonatal-perinatal medicine has a strong histo...,9780323326735,en,Elsevier Health Sciences,"Current Controversies in Perinatology, An Issu..."
7,"David Camacho, Sang-Wook Kim, Bogdan Trawiński",[Computers],This book consists of 20 chapters in which the...,9783319107745,en,Springer,New Trends in Computational Collective Intelli...
8,"Sheryl Sandberg, Adam M. Grant",[Biography & Autobiography],"""The author's experience with grief after the ...",9781524732684,en,Knopf,Option B
9,Elizabeth Gilbert,[Fiction],From the # 1 New York Times bestselling author...,9780698408326,en,Penguin,City of Girls
10,"Chelsea Rousso, Nancy Kaplan Ostroff",[Business & Economics],Fashion Forward demystifies the exciting caree...,9781501328275,en,Bloomsbury Publishing USA,Fashion Forward
11,Alan Northcott,[Business & Economics],Money managers have traditionally used hedge f...,9781601380005,en,Atlantic Publishing Company,The Hedge Funds Book


In [19]:
trending_books_data['categories'] = trending_books_data['categories'].apply(', '.join)

In [20]:
trending_books_data.head(10)

Unnamed: 0,authors,categories,description,isbn,language,publisher,title
0,"Viktor Mayer-Schönberger, Kenneth Cukier",Business & Economics,A revelatory exploration of the hottest trend ...,9780544002937,en,Houghton Mifflin Harcourt,Big Data
1,Delia Owens,Fiction,#1 New York Times Bestseller A Reese Witherspo...,9780735219113,en,Penguin,Where the Crawdads Sing
3,Steven Pinker,Psychology,Presents a controversial history of violence w...,9780143122012,en,Penguin Group USA,The Better Angels of Our Nature
4,"Perle Slavik Cowen, Sue Moorhead",Medical,Current Issues in Nursing provides a forum for...,9780323293198,en,Elsevier Health Sciences,Current Issues In Nursing - E-Book
5,Robert H. Lane,Medical,Neonatal-perinatal medicine has a strong histo...,9780323326735,en,Elsevier Health Sciences,"Current Controversies in Perinatology, An Issu..."
7,"David Camacho, Sang-Wook Kim, Bogdan Trawiński",Computers,This book consists of 20 chapters in which the...,9783319107745,en,Springer,New Trends in Computational Collective Intelli...
8,"Sheryl Sandberg, Adam M. Grant",Biography & Autobiography,"""The author's experience with grief after the ...",9781524732684,en,Knopf,Option B
9,Elizabeth Gilbert,Fiction,From the # 1 New York Times bestselling author...,9780698408326,en,Penguin,City of Girls
10,"Chelsea Rousso, Nancy Kaplan Ostroff",Business & Economics,Fashion Forward demystifies the exciting caree...,9781501328275,en,Bloomsbury Publishing USA,Fashion Forward
11,Alan Northcott,Business & Economics,Money managers have traditionally used hedge f...,9781601380005,en,Atlantic Publishing Company,The Hedge Funds Book


In [21]:
trending_books_data.shape

(743, 7)

In [22]:
trending_books_data.to_pickle('trending_books_data.pkl')

In [23]:
trending_books_data.dtypes

authors        object
categories     object
description    object
isbn           object
language       object
publisher      object
title          object
dtype: object

In [24]:
fulldata = pd.read_pickle('full_metadata.pkl')

In [25]:
fulldata.shape

(51487, 15)

In [26]:
fulldata.head(10)

Unnamed: 0,isbn,user_id,user_name,user_ratings,user_review_text,authors,categories,description,language,pageCount,printType,publishedYear,publisher,title,weighted_avg
0,9780321948540,779608,Erin,4.0,"There are a lot of great tips in this volume, ...",Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",en,236.0,BOOK,2013.0,Pearson Education,The Digital Photography Book,4.128036
1,9780321948540,753824,Rolf Häsänen,4.0,Not as many useful tips as volume1 unless you ...,Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",en,236.0,BOOK,2013.0,Pearson Education,The Digital Photography Book,4.128036
2,9780321948540,35798331,Valery,4.0,"Not as helpful as the first, but does go into ...",Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",en,236.0,BOOK,2013.0,Pearson Education,The Digital Photography Book,4.128036
3,9780321948540,2100772,Bruce,1.0,"OMG. Surely the absolute worst, amature photo ...",Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",en,236.0,BOOK,2013.0,Pearson Education,The Digital Photography Book,4.128036
4,9780321948540,1794100,Icepick,5.0,"The format may not be for everybody, but I rea...",Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",en,236.0,BOOK,2013.0,Pearson Education,The Digital Photography Book,4.128036
5,9780321948540,3425034,Deb,4.0,Lots of great tips! This writer is engaging an...,Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",en,236.0,BOOK,2013.0,Pearson Education,The Digital Photography Book,4.128036
6,9780321948540,29448721,Mark,4.0,For me it would have been a 3 because of secti...,Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",en,236.0,BOOK,2013.0,Pearson Education,The Digital Photography Book,4.128036
7,9780321948540,11916175,Angela Benthin,4.0,Very informational,Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",en,236.0,BOOK,2013.0,Pearson Education,The Digital Photography Book,4.128036
8,9780321948540,8176218,Cedric Jean-marie,0.0,I originally bought this trilogy for my wife w...,Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",en,236.0,BOOK,2013.0,Pearson Education,The Digital Photography Book,4.128036
9,9780321948540,289983,Candice,4.0,I'd actually give this 4.5 out of 5 stars for ...,Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",en,236.0,BOOK,2013.0,Pearson Education,The Digital Photography Book,4.128036


In [27]:
fulldata.dtypes

isbn                 object
user_id              object
user_name            object
user_ratings        float64
user_review_text     object
authors              object
categories           object
description          object
language             object
pageCount           float64
printType            object
publishedYear       float64
publisher            object
title                object
weighted_avg        float64
dtype: object

In [28]:
booksdata = fulldata[['authors','categories','description','isbn','language','publisher','title']]

In [29]:
booksdata.shape

(51487, 7)

In [30]:
booksdata.head()

Unnamed: 0,authors,categories,description,isbn,language,publisher,title
0,Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",9780321948540,en,Pearson Education,The Digital Photography Book
1,Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",9780321948540,en,Pearson Education,The Digital Photography Book
2,Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",9780321948540,en,Pearson Education,The Digital Photography Book
3,Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",9780321948540,en,Pearson Education,The Digital Photography Book
4,Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",9780321948540,en,Pearson Education,The Digital Photography Book


In [31]:
booksdata.drop_duplicates(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [32]:
booksdata.shape

(2279, 7)

In [33]:
booksdata.head(10)

Unnamed: 0,authors,categories,description,isbn,language,publisher,title
0,Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",9780321948540,en,Pearson Education,The Digital Photography Book
30,Scott Kelby,Photography,"Scott Kelby, author of the top-selling digital...",9780133856934,en,Peachpit Press,"The Digital Photography Book, Part 5"
60,Scott Kelby,Computers,Offers advice for shooting different types of ...,9780133856880,en,Pearson Education,The Digital Photography Book
90,Susan Sontag,Photography,Winner of the National Book Critics' Circle Aw...,9781429957113,en,"Farrar, Straus and Giroux",On Photography
169,Scott Kelby,Photography,What could top the #1 best-selling photography...,9780134385273,en,Peachpit Press,The Best of The Digital Photography Book Series
246,Scott Kelby,Photography,"Scott Kelby, author of The Digital Photography...",9780133510720,en,Peachpit Press,The Digital Photography Book
276,Liz Wells,Photography,This seminal text for photography students ide...,9780415307031,en,Psychology Press,Photography
306,Michael Freeman,Photography,Completely updated and revised to reflect tech...,9781579907594,en,Lark Books,The Complete Guide to Digital Photography
343,Edward M. Robinson,Law,Crime Scene Photography is a book wrought from...,9780080476926,en,Elsevier,Crime Scene Photography
373,Scott Kelby,Photography,Includes: The Digital Photography Book The Dig...,9780321686008,en,Pearson Education,"Scott Kelby's Digital Photography Books, Volum..."


In [34]:
booksdata_trendingdata_combined = pd.concat([booksdata,trending_books_data])

In [35]:
booksdata_trendingdata_combined.shape

(3022, 7)

In [36]:
booksdata_trendingdata_combined.head()

Unnamed: 0,authors,categories,description,isbn,language,publisher,title
0,Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",9780321948540,en,Pearson Education,The Digital Photography Book
30,Scott Kelby,Photography,"Scott Kelby, author of the top-selling digital...",9780133856934,en,Peachpit Press,"The Digital Photography Book, Part 5"
60,Scott Kelby,Computers,Offers advice for shooting different types of ...,9780133856880,en,Pearson Education,The Digital Photography Book
90,Susan Sontag,Photography,Winner of the National Book Critics' Circle Aw...,9781429957113,en,"Farrar, Straus and Giroux",On Photography
169,Scott Kelby,Photography,What could top the #1 best-selling photography...,9780134385273,en,Peachpit Press,The Best of The Digital Photography Book Series


In [37]:
booksdata_trendingdata_combined.drop_duplicates(inplace=True)

In [38]:
booksdata_trendingdata_combined.shape

(2905, 7)

In [39]:
#Get list of all isbns
fullisbns = booksdata_trendingdata_combined[['isbn']]

In [40]:
fullisbns.drop_duplicates(inplace=True)
fullisbns.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,isbn
0,9780321948540
30,9780133856934
60,9780133856880
90,9781429957113
169,9780134385273


In [41]:
fullisbns_list = fullisbns['isbn'].tolist()

In [42]:
fullisbnsfile = open("all_isbn_list.pkl","wb")
pickle.dump(fullisbns_list,fullisbnsfile)
fullisbnsfile.close()

In [43]:
#Get list of all isbns
trendingisbns = trending_books_data[['isbn']]

In [44]:
trendingisbns.drop_duplicates(inplace=True)
trendingisbns_list = trendingisbns['isbn'].tolist()
trendingisbnsfile = open("all_isbn_list.pkl","wb")
pickle.dump(trendingisbns_list,trendingisbnsfile)
trendingisbnsfile.close()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [45]:
#Get list of all isbns and book titles
combined_isbn_title_df = booksdata_trendingdata_combined[['isbn','title']]

In [46]:
combined_isbn_title_df.drop_duplicates(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [47]:
combined_isbn_title_df.head()

Unnamed: 0,isbn,title
0,9780321948540,The Digital Photography Book
30,9780133856934,"The Digital Photography Book, Part 5"
60,9780133856880,The Digital Photography Book
90,9781429957113,On Photography
169,9780134385273,The Best of The Digital Photography Book Series


In [48]:
combined_isbn_title_dict = pd.Series(combined_isbn_title_df.title.values,index=combined_isbn_title_df.isbn).to_dict()

In [49]:
combinedisbntitlefile = open("combined_isbn_title_dict.pkl","wb")
pickle.dump(combined_isbn_title_dict,combinedisbntitlefile)
combinedisbntitlefile.close()

In [50]:
booksdata_trendingdata_combined.to_pickle('booksdata_trendingdata_combined.pkl')

In [51]:
combineddata = pd.read_pickle('booksdata_trendingdata_combined.pkl')

In [52]:
combineddata.shape

(2905, 7)

In [53]:
combineddata.head()

Unnamed: 0,authors,categories,description,isbn,language,publisher,title
0,Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",9780321948540,en,Pearson Education,The Digital Photography Book
30,Scott Kelby,Photography,"Scott Kelby, author of the top-selling digital...",9780133856934,en,Peachpit Press,"The Digital Photography Book, Part 5"
60,Scott Kelby,Computers,Offers advice for shooting different types of ...,9780133856880,en,Pearson Education,The Digital Photography Book
90,Susan Sontag,Photography,Winner of the National Book Critics' Circle Aw...,9781429957113,en,"Farrar, Straus and Giroux",On Photography
169,Scott Kelby,Photography,What could top the #1 best-selling photography...,9780134385273,en,Peachpit Press,The Best of The Digital Photography Book Series


In [54]:
combineddata.reset_index(drop=True,inplace=True)

In [55]:
combineddata.head()

Unnamed: 0,authors,categories,description,isbn,language,publisher,title
0,Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",9780321948540,en,Pearson Education,The Digital Photography Book
1,Scott Kelby,Photography,"Scott Kelby, author of the top-selling digital...",9780133856934,en,Peachpit Press,"The Digital Photography Book, Part 5"
2,Scott Kelby,Computers,Offers advice for shooting different types of ...,9780133856880,en,Pearson Education,The Digital Photography Book
3,Susan Sontag,Photography,Winner of the National Book Critics' Circle Aw...,9781429957113,en,"Farrar, Straus and Giroux",On Photography
4,Scott Kelby,Photography,What could top the #1 best-selling photography...,9780134385273,en,Peachpit Press,The Best of The Digital Photography Book Series


In [56]:
fulldata = pd.read_pickle('full_metadata.pkl')

In [57]:
fulldata.shape

(51487, 15)

In [58]:
fulldata.head()

Unnamed: 0,isbn,user_id,user_name,user_ratings,user_review_text,authors,categories,description,language,pageCount,printType,publishedYear,publisher,title,weighted_avg
0,9780321948540,779608,Erin,4.0,"There are a lot of great tips in this volume, ...",Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",en,236.0,BOOK,2013.0,Pearson Education,The Digital Photography Book,4.128036
1,9780321948540,753824,Rolf Häsänen,4.0,Not as many useful tips as volume1 unless you ...,Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",en,236.0,BOOK,2013.0,Pearson Education,The Digital Photography Book,4.128036
2,9780321948540,35798331,Valery,4.0,"Not as helpful as the first, but does go into ...",Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",en,236.0,BOOK,2013.0,Pearson Education,The Digital Photography Book,4.128036
3,9780321948540,2100772,Bruce,1.0,"OMG. Surely the absolute worst, amature photo ...",Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",en,236.0,BOOK,2013.0,Pearson Education,The Digital Photography Book,4.128036
4,9780321948540,1794100,Icepick,5.0,"The format may not be for everybody, but I rea...",Scott Kelby,Computers,"Furnishes an overview of digital photography, ...",en,236.0,BOOK,2013.0,Pearson Education,The Digital Photography Book,4.128036


In [59]:
tfidfv = TfidfVectorizer(analyzer='word',stop_words=set(stopwords.words('english')))

In [60]:
vocab = tfidfv.fit_transform(combineddata['description'])
similarity = linear_kernel(vocab, vocab)
isbns = pd.Series(combineddata['isbn']).to_dict()
isbns = dict((v,k) for k,v in isbns.items())

In [61]:
def trending_similar(isbn):
    bookid = isbns[isbn]
    bookslist = list(enumerate(similarity[bookid]))
    bookslist.sort(key=lambda x: x[1], reverse=True)
    top10TrendingBooks = bookslist[1:11]
    return top10TrendingBooks

In [62]:
isbn_userid = fulldata[['isbn','user_id']]

In [63]:
isbn_userid.shape

(51487, 2)

In [64]:
isbn_userid.head()

Unnamed: 0,isbn,user_id
0,9780321948540,779608
1,9780321948540,753824
2,9780321948540,35798331
3,9780321948540,2100772
4,9780321948540,1794100


In [65]:
user_books_read_mapping = {}
for index, row in isbn_userid.iterrows():
    isbn = row['isbn']
    userid = row['user_id']
    if userid in user_books_read_mapping.keys():
        user_books_read_mapping[userid].append(isbn)
    else:
        bookslist = []
        user_books_read_mapping[userid] = bookslist
        user_books_read_mapping[userid].append(isbn)

In [66]:
user_books_sorted = sorted(user_books_read_mapping.items(), key=lambda x : len(x[1]), reverse= True)

In [67]:
user_books_sorted_json = {}
for i in user_books_sorted:
    user_books_sorted_json[str(i[0])] = i[1]

ubfile = open("user_books_sorted.pkl","wb")
pickle.dump(user_books_sorted_json,ubfile)
ubfile.close()

In [68]:
usersbooks = pickle.load(open( "user_books_sorted.pkl", "rb" ) )

In [69]:
isbnlookup = dict((v,k) for k,v in isbns.items())

In [70]:
def trendingTop10(user):
    booklist = usersbooks[user]
    ratingsbooks = []
    recommendedbooks = []
    for book in booklist:
        newbooks = trending_similar(book)
        ratingsbooks = ratingsbooks + newbooks
    ratingsbooks = list(set(ratingsbooks))
    for item in ratingsbooks:
        try :
            bookisbn = isbnlookup[item[0]]
            if bookisbn in trendingisbns_list:
                recommendedbooks.append([item[1],bookisbn])
        except:
            continue
    recommendedbooks.sort(key=lambda x: x[1], reverse=True)
    return recommendedbooks[:10]

In [71]:
#get isbn from recommended ratings
def top10isbn(recommendations):
    top10_isbn = []
    for item in recommendations:
        item_isbn = item[1]
        top10_isbn.append(item_isbn)
    return top10_isbn

In [72]:
trendingrecommended = trendingTop10('269235')
isbn_recommendations = top10isbn(trendingrecommended)
print("Recommendations based on trending books : ")
for isbnitem in isbn_recommendations :
    print("ISBN :", str(isbnitem), "Title :", str(combined_isbn_title_dict[isbnitem]))
    #print("ISBN :", str(isbnitem))

Recommendations based on trending books : 
ISBN : 9788186775097 Title : The Five Love Languages
ISBN : 9788175110625 Title : Answer is Blowing in the Wind
ISBN : 9788175110625 Title : Answer is Blowing in the Wind
ISBN : 9783319326931 Title : Human Dignity of the Vulnerable in the Age of Rights
ISBN : 9783319326931 Title : Human Dignity of the Vulnerable in the Age of Rights
ISBN : 9783319012223 Title : Most-Cited Scholars in Criminology and Criminal Justice, 1986-2010
ISBN : 9781948584081 Title : The Catalain Book of Secrets
ISBN : 9781943562183 Title : Rescuing Wendy
ISBN : 9781943562183 Title : Rescuing Wendy
ISBN : 9781895431162 Title : New World Order & Third World
