In [1]:
import pandas as pd

BOOKS_FILE = 'books.jsonlines'

books = pd.read_json(BOOKS_FILE, lines=True, nrows=100)

In [2]:
import re

def clean_description(description):
    return re.sub("[^\w ]", "", description)

filtered_books = books.fillna('')
clean_description_col = filtered_books['description'].apply(clean_description)
filtered_books = filtered_books.assign(clean_description = clean_description_col)

In [3]:
# Utilities

def get_recomendations(recomendations_df, books, book_id):
    similarity_indexes = recomendations_df[book_id].sort_values(ascending=False)
    book_ids_recomended = similarity_indexes.index[:10]

    return books[(books['book_id'].isin(book_ids_recomended))] \
        .reindex(book_ids_recomended)[['book_id', 'title', 'titleEnglish']] \
        .assign(similarity_index = similarity_indexes)

def get_book_title_by_id(books, book_id):
    return books[(books['book_id'] == book_id)]['title'].iloc[0]

CORPUS = filtered_books['clean_description']
BOOKS = filtered_books
BOOK_ID = 1

In [4]:
# Recomendation based on the count words

import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def create_count_word_recomendations(corpus):
    cv = CountVectorizer()
    word_count_matrix = cv.fit_transform(corpus)
    word_count_documents_similarity = cosine_similarity(word_count_matrix, dense_output = True)
    word_count_recomendations_df = pd.DataFrame(word_count_documents_similarity)
    return word_count_recomendations_df

def count_words_recomendation_demo():
    book_title = get_book_title_by_id(BOOKS, BOOK_ID)
    print(f"Book title: '{book_title}'")

    count_word_recomendations_df = create_count_word_recomendations(CORPUS)
    recomendations = get_recomendations(count_word_recomendations_df, BOOKS, BOOK_ID)
    display(recomendations)

count_words_recomendation_demo()

Book title: 'Серія книжок про Джуді Муді (комплект із 10 книг)'


Unnamed: 0,book_id,title,titleEnglish,similarity_index
1,1,Серія книжок про Джуді Муді (комплект із 10 книг),,1.0
45,45,Кольори наших споминів,Les couleurs de nos souvenirs,0.213998
2,2,Три новеллы (комплект из 2 книг),Och Varje Morgon Blir Vagen. Hem Langre och La...,0.203489
39,39,Во имя Гуччи. Мемуары дочери,In the Name of Gucci. A Memoir,0.193756
0,0,"Їсти, молитися, кохати","Eat, Pray, Love: One Woman's Search for Everyt...",0.191892
18,18,"Невероятные женщины, которые изменили искусств...",Broad Strokes. 15 Women Who Made Art and Made ...,0.189679
15,15,Календар на 2020. Лаунж. Шупляк. Український к...,,0.187059
13,13,"Вискочки. Uber, Airbnb та битва за Кремнієву д...","The Upstarts.How Uber, Airbnb, and the Killer ...",0.185715
20,20,Література! Мандрівка світом книжок,Literatur!,0.177986
21,21,Школа литературного и сценарного мастерства. О...,,0.173252


In [5]:
# Recomendation based on the tfidf

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def create_tfidf_recomendations(corpus):
    tfidf = TfidfVectorizer()
    tfidf_matrix = tfidf.fit_transform(corpus)
    tfidf_similarity_matrix = cosine_similarity(tfidf_matrix)
    tfidf_recomendation_df = pd.DataFrame(tfidf_similarity_matrix)
    return tfidf_recomendation_df

def tfidf_recomendation_demo():
    book_title = get_book_title_by_id(BOOKS, BOOK_ID)
    print(f"Book title: '{book_title}'")
    
    tfidf_recomendations_df = create_tfidf_recomendations(CORPUS)
    recomendations = get_recomendations(tfidf_recomendations_df, BOOKS, BOOK_ID)
    display(recomendations)

tfidf_recomendation_demo()

Book title: 'Серія книжок про Джуді Муді (комплект із 10 книг)'


Unnamed: 0,book_id,title,titleEnglish,similarity_index
1,1,Серія книжок про Джуді Муді (комплект із 10 книг),,1.0
2,2,Три новеллы (комплект из 2 книг),Och Varje Morgon Blir Vagen. Hem Langre och La...,0.080349
45,45,Кольори наших споминів,Les couleurs de nos souvenirs,0.073309
15,15,Календар на 2020. Лаунж. Шупляк. Український к...,,0.071486
0,0,"Їсти, молитися, кохати","Eat, Pray, Love: One Woman's Search for Everyt...",0.067499
20,20,Література! Мандрівка світом книжок,Literatur!,0.06545
57,57,От идеи до злодея. Учимся создавать истории вм...,Pixar Storytelling. Rules for Effective Storyt...,0.065398
18,18,"Невероятные женщины, которые изменили искусств...",Broad Strokes. 15 Women Who Made Art and Made ...,0.06474
76,76,Світло у ліхтарі. Календар очікування Різдва в...,,0.064017
7,7,Развивай свой мозг. Как перенастроить разум и ...,Evolve Your Brain: The Science of Changing You...,0.061759
