In [58]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import re

class Books:
    def __init__(self, global_path):
        self.objects = pd.read_csv(global_path+ "/books.csv")
    
    def get_book_id_by_title(self, title):
        return self.objects[self.objects.title.str.contains(title, flags=re.IGNORECASE)]
        
    def get_books(self):
        return list(self.objects.T.to_dict().values())

    def get_book_by_id(self, book_id):
        return self.objects.iloc[book_id-1]
        
class Tags:
    def __init__(self, global_path):
        self.objects = pd.read_csv(global_path+ "/tags.csv")

class BookTags:
    def __init__(self, global_path):
        self.objects = pd.read_csv(global_path+"/book_tags.csv")
        self._books = Books(global_path)
        self._tags = Tags(global_path)
        self.objects = pd.merge(self.objects, self._books.objects[["goodreads_book_id","book_id"]], on="goodreads_book_id", how="right")

        book_ids = self.objects.book_id.unique().shape[0]
        tag_ids = self.objects.tag_id.unique().shape[0]
        
        self.matrix = np.zeros((book_ids, tag_ids))
        for row in self.objects.itertuples():
            self.matrix[row.book_id-1, row.tag_id-1] = 1
    
    def get_book_similarities(self, book):
        book_id = book.book_id
        book_row = self.matrix[book_id-1]
        similarities_to_book = cosine_similarity([book_row],self.matrix)[0]
        
        return similarities_to_book



In [59]:
goodbooks_dataset_path = "dataset/"
books = Books(goodbooks_dataset_path)
tags = Tags(goodbooks_dataset_path)
book_tags = BookTags(goodbooks_dataset_path)

In [24]:
while True:
    title = input("Title: ")
    print("")
    matches = books.get_book_id_by_title(title)
    [print(match.title,match.book_id) for match in matches.itertuples()]
    print("")
    if input("Encontraste tu libro? (y/n)") == "y":
        print("")
        break


Demian. Die Geschichte von Emil Sinclairs Jugend 2096




In [25]:
book_id = int(input("Ingresa el id del libro:"))
print("")
book = books.get_book_by_id(book_id)
book_similarities = book_tags.get_book_similarities(book)
print("Nuestro libro es", book["title"],"...")
print("Los mas parecidos son:\n")
[print(books.objects.iloc[i]["title"], "-", books.objects.iloc[i]["authors"], "("+str(int(book_similarities[i]*100))+"%)") for i in book_similarities.argsort()[::-1][1:10]]


Nuestro libro es Demian. Die Geschichte von Emil Sinclairs Jugend ...
Los mas parecidos son:

Steppenwolf - Hermann Hesse, Basil Creighton (72%)
Narcissus and Goldmund - Hermann Hesse, Ursule Molinaro (68%)
Siddhartha - Hermann Hesse, Hilda Rosner (64%)
The Metamorphosis - Franz Kafka, Stanley Corngold (62%)
The Stranger - Albert Camus, Matthew    Ward (62%)
Perfume: The Story of a Murderer - Patrick Süskind, John E. Woods (59%)
Death in Venice - Thomas Mann, Michael Henry Heim, Michael Cunningham (58%)
All Quiet on the Western Front - Erich Maria Remarque, A.W. Wheen (56%)
The Glass Bead Game - Hermann Hesse, Richard Winston, Clara Winston, Theodore Ziolkowski (56%)


[None, None, None, None, None, None, None, None, None]

In [28]:
import json
books_as_json = books.get_books()
json.dumps(books_as_json)

In [60]:
books.get_book_id_by_title("sidd")

Unnamed: 0,book_id,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,ratings_count,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url
179,180,52036,52036,4840290,972,553208845,9780553000000.0,"Hermann Hesse, Hilda Rosner",1922.0,Siddhartha,...,372099,418653,11518,10229,25529,83698,138837,160360,https://images.gr-assets.com/books/1428715580m...,https://images.gr-assets.com/books/1428715580s...


In [63]:
books.get_book_by_id(180)

book_id                                                                    180
goodreads_book_id                                                        52036
best_book_id                                                             52036
work_id                                                                4840290
books_count                                                                972
isbn                                                                 553208845
isbn13                                                         9780553208850.0
authors                                            Hermann Hesse, Hilda Rosner
original_publication_year                                               1922.0
original_title                                                      Siddhartha
title                                                               Siddhartha
language_code                                                              eng
average_rating                                      