In [1]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import math
import sklearn
from plotly import graph_objects as go
from plotly import express as px
import seaborn as sns
from jupyterthemes import jtplot
jtplot.style()

datapath = ""
books = pd.read_csv(datapath + "books.csv")
toRead = pd.read_csv(datapath + "to_read.csv")
tags = pd.read_csv(datapath + "tags.csv")
bookTags = pd.read_csv(datapath + "book_tags.csv").rename(columns = {'goodreads_book_id': 'book_id'}, inplace = False)
ratings = pd.read_csv(datapath + "ratings.csv")


In [358]:
from PIL import Image
from IPython.display import HTML
import requests

def Display_Recommendation(rec, bookId, n=10):
    rec = np.array(rec)
    ind = np.argsort(rec[:,2])
    srec = np.array(rec)[ind][::-1]
    srec = srec[:n]
    html = ''
    print("Based on ", books[books["book_id"] ==bookId]["original_title"].values[0], " we recommend : ")
    for i in srec:
        book = books[books["book_id"] == int(i[0])]
        url = book.image_url.values
        buf = '<div style="float: left; margin-right:30px; width:250px; height:350px"><img  src="#URL#" width=120/><figcaption>#TITLE#</br>#AUTHOR#</br>#SCORE#</br>ID : #ID#</figcaption></div>'
        if url.size > 0:
            buf = buf.replace("#URL#", url[0], 1)
            buf = buf.replace("#TITLE#", i[1], 1)
            buf = buf.replace("#AUTHOR#", book.authors.values[0], 1)
            buf = buf.replace("#SCORE#", i[2][:5], 1)
            buf = buf.replace("#ID#", i[0], 1)
            html += buf
            im = Image.open(requests.get(url[0], stream=True).raw)
        
    display(HTML(html))

In [205]:
BookSimMatrix = np.zeros((len(books.values), len(books.values), 3))
BookSimMatrix.fill(-2)

# Tag based recommendation

In [163]:
booksTagLists = {}
for i in bookTags.values:
    if i[0] in booksTagLists: booksTagLists[i[0]].append([i[1], i[2]])
    else:                     booksTagLists[i[0]] = [[i[1], i[2]]]
        
bookList = {}
for i in books.values:
    bookList[i[1]] = i[0]

In [210]:
from sklearn import preprocessing
def Tags2Vec(tagList):
    ret = np.zeros(len(tags))
    for i in tagList:
        ret[i[0]] = 1
    return ret

def Tags_similarity(bookId1, bookId2):
    i = bookList[bookId1]-1
    j = bookList[bookId2]-1
    ret = BookSimMatrix[i, j, 0]
    if ret == -2:
        a = booksTagLists[bookId1]
        b = booksTagLists[bookId2]
        ret = cosine_similarity([Tags2Vec(a)], [Tags2Vec(b)])[0][0]
        BookSimMatrix[i, j, 0] = ret
        BookSimMatrix[j, i, 0] = ret
    return ret

In [295]:
from sklearn.metrics.pairwise import cosine_similarity
selected = 88077
rec = []
for b in booksTagLists:
    sim = Tags_similarity(selected, b)
    if sim > 0.65:
        rec.append([b, books[books["book_id"] == b]["original_title"].values[0], sim])

In [345]:
Display_Recommendation(rec, selected)

(9947, 3)
Based on  Der Zauberberg  we recommend : 


# Author based recommendation

In [102]:
AuthorList = {}
for i in books["authors"]:
    authors = i.split(", ")
    for a in authors:
        AuthorList[a] = len(AuthorList)

In [386]:
def Auth2Vec(authors):
    ret = np.zeros(len(AuthorList))
    for i in authors:
        ret[AuthorList[i]] = 1
    return ret

def Authors_similarity(bookId1, bookId2, authList1, authList2):
    i = bookList[bookId1]-1
    j = bookList[bookId2]-1
    ret = BookSimMatrix[i, j, 1]
    if ret == -2:
        a = authList1
        b = authList2
        ret = cosine_similarity([Auth2Vec(a)], [Auth2Vec(b)])[0][0]
        BookSimMatrix[i, j, 1] = ret
        BookSimMatrix[j, i, 1] = ret
    return ret

def Authors_similarity2(authVec, authList):
    a = authList
    ret = cosine_similarity([authVec], [Auth2Vec(a)])[0][0]
    return ret

In [439]:
from sklearn.metrics.pairwise import cosine_similarity
selected = 88077
rec = []
sb = books.loc[bookList[selected]-1].authors.split(", ")
for index, b in books.iterrows():
    i = b.authors.split(", ")
    sim = Authors_similarity(selected, b.book_id, sb, i)
    if sim > 0.45:
        rec.append([b.book_id, b.original_title, sim])

In [440]:
Display_Recommendation(rec, selected, 6)

Based on  Der Zauberberg  we recommend : 


In [589]:
from sklearn.metrics.pairwise import cosine_similarity
selected = 1
rec = []
sb = ["Thomas Mann"]
for index, b in books.iterrows():
    i = b.authors.split(", ")
    sim = Authors_similarity(sb, i)
    
    if sim > 0.65:
        rec.append([b.book_id, b.original_title, sim])

rec = np.array(rec)
ind=np.argsort(rec[:,1])
rec2 = np.array(rec)[ind][::-1]
for i in rec2:
    print(i[0],"  ", i[1], " : ", i[2])

88077    Der Zauberberg  :  1.0
53064    Der Tod in Venedig  :  0.7071067811865475
343    Das Parfum. Die Geschichte eines Mörders  :  0.7071067811865475
80890    Buddenbrooks: Verfall einer Familie  :  1.0


# Ratings based recommendation

In [115]:
RatingMatrix = {}
for index, r in ratings.iterrows():
    if r.book_id not in RatingMatrix: RatingMatrix[r.book_id] = [[r.user_id, r.rating]]
    else: RatingMatrix[r.book_id].append([r.user_id, r.rating])
    

In [116]:
def Rating2Vec(UserRatings):
    ret = np.zeros(len(ratings.user_id.values))
    for i in UserRatings:
        ret[i[0]] = i[1]
    return ret

def Ratings_similarity(ratings1, ratings2):
    return cosine_similarity([Rating2Vec(ratings1)], [Rating2Vec(ratings2)])[0][0]

# Mixed recommendation

In [279]:
def Recommend_with_book(bookId, w=[0.5, 0.5]):
    rec = []
    sa = books[books["book_id"] == bookId].authors.values[0].split(", ")
    for index, b in books.iterrows():
        a = b.authors.split(", ")
        
        simT = Tags_similarity(bookId, b.book_id)
        simA = Authors_similarity(bookId, b.book_id, sa, a)
        
        sim = (w[0] * simT + w[1] * simA)
        
        if sim > 0.05:
            rec.append([b.book_id, b.original_title, sim])
            
    return rec

In [432]:
bookId = 1
rec = Recommend_with_book(bookId)

In [433]:
Display_Recommendation(rec, bookId, 10)

Based on  Harry Potter and the Half-Blood Prince  we recommend : 


# Recommend to user

In [441]:
class User:
    Authors = np.zeros(len(AuthorList))
    Books = []
    
    def have_read(self, bookId):
        book = books.loc[bookList[bookId]-1]
        self.Authors += Auth2Vec(book.authors.split(', '))
        self.Books.append(bookId)
        
    def recommend_by_authors(self):
        rec = []
        for index, b in books.iterrows():
            i = b.authors.split(", ")
            sim = Authors_similarity(self.Authors, i)
            if sim > 0.05:
                rec.append([b.book_id, b.original_title, sim])
        return rec
    
    def recommend_by_books(self, w = [0.5, 0.5]):
        rec = []
        booksBuf = []
        for b in self.Books:
            booksBuf.append([b, books.loc[bookList[b]-1].authors.split(", ")])
            
        for index, b in books.iterrows():
            if b.book_id not in self.Books:
                a = b.authors.split(", ")

                simT = 0
                simA = 0
                for i in booksBuf:
                    simT += Tags_similarity(i[0], b.book_id)
                    simA += Authors_similarity(i[0], b.book_id, i[1], a)
                simT /= len(booksBuf)
                simA /= len(booksBuf)
                sim = (w[0] * simT + w[1] * simA)

                if sim > 0.05:
                    rec.append([b.book_id, b.original_title, sim])

        return rec


In [444]:
U = User()

In [445]:
U.have_read(22911)
U.have_read(80890)
U.have_read(333538)
U.have_read(17690)

In [448]:
Display_Recommendation(U.recommend_by_books([0.8, 0.2]), 1, 9)

Based on  Harry Potter and the Half-Blood Prince  we recommend : 
