In [1]:
import pandas as pd

BOOKS_FILE = 'books.jsonlines'

books = pd.read_json(BOOKS_FILE, lines=True, nrows=10000)

In [2]:
# Update rating

REVIEWS_FILE = 'reviews.jsonlines'
reviews = pd.read_json(REVIEWS_FILE, lines=True)

def update_rating(reviews_df, books_df):
    book_id_rate = reviews_df.groupby('book_id').agg({'rating': 'mean'}).round(2)
    book_id_book = books_df.set_index('book_id')
    book_id_book.update(book_id_rate)
    books_with_updated_rating = book_id_book.reset_index()
    return books_with_updated_rating

books = update_rating(reviews, books)

In [3]:
import re

def senitize_description(description):
    return re.sub("[^\w ]", "", description)

sanitized_description_col = books['description'].apply(senitize_description)
books = books.assign(sanitized_description_col = sanitized_description_col)

In [4]:
# Utilities

def get_recomendations(recomendations_df, books, book_id):
    similarity_indexes = recomendations_df[book_id].sort_values(ascending=False).round(3)
    book_ids_recomended = similarity_indexes.index[:10]

    return books[(books['book_id'].isin(book_ids_recomended))] \
        .reindex(book_ids_recomended)[['book_id', 'title', 'titleEnglish', 'rating']] \
        .assign(similarity_index = similarity_indexes) \
        .sort_values(['similarity_index', 'rating'], ascending=[False, False])

def get_book_title_by_id(books, book_id):
    return books[(books['book_id'] == book_id)]['title'].iloc[0]

CORPUS = books['sanitized_description_col']
BOOKS = books
BOOK_ID = 12

In [5]:
# Recomendation based on the count words

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def create_count_word_recomendations(corpus):
    cv = CountVectorizer()
    word_count_matrix = cv.fit_transform(corpus)
    word_count_documents_similarity = cosine_similarity(word_count_matrix, dense_output = True)
    word_count_recomendations_df = pd.DataFrame(word_count_documents_similarity)
    return word_count_recomendations_df

def count_words_recomendation_demo():
    book_title = get_book_title_by_id(BOOKS, BOOK_ID)
    print(f"Book title: '{book_title}'")

    count_word_recomendations_df = create_count_word_recomendations(CORPUS)
    recomendations = get_recomendations(count_word_recomendations_df, BOOKS, BOOK_ID)
    display(recomendations)

count_words_recomendation_demo()

Book title: 'Большая книга пекаря. Хлеб, бриоши, выпечка. Учимся готовить шедевры'


Unnamed: 0,book_id,title,titleEnglish,rating,similarity_index
12,12,"Большая книга пекаря. Хлеб, бриоши, выпечка. У...",Le Grand Manuel du Boulanger,5.0,1.0
9647,9647,Телекомунікаційні системи та мережі. Принципи ...,,,0.236
4543,4543,Матеріалознавство для архітекторів та дизайнерів,,,0.223
416,416,Кишенькова книжка емоційного інтелекту. Невели...,Emotional Intelligence Pocketbook: Little Exer...,5.0,0.213
4244,4244,Бетмен. Цить,Batman. Hush,5.0,0.2
6470,6470,Інтернет речей,,,0.194
6007,6007,Чарiвна Україна / Magic Ukraine,,,0.191
6654,6654,Сервер на основі операційної системи FreeBSD 6.1,,,0.19
6460,6460,Дивовижні технології. Дизайн та інтернет речей,"Enchanted Objects: Design, Human Desire, and t...",,0.186
6899,6899,В Землю #1,,,0.186


In [6]:
# Recomendation based on the tfidf

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def create_tfidf_recomendations(corpus):
    tfidf = TfidfVectorizer(ngram_range=(1, 3))
    tfidf_matrix = tfidf.fit_transform(corpus)
    tfidf_similarity_matrix = cosine_similarity(tfidf_matrix)
    tfidf_recomendation_df = pd.DataFrame(tfidf_similarity_matrix)
    return tfidf_recomendation_df

def tfidf_recomendation_demo():
    book_title = get_book_title_by_id(BOOKS, BOOK_ID)
    print(f"Book title: '{book_title}'")
    
    tfidf_recomendations_df = create_tfidf_recomendations(CORPUS)
    recomendations = get_recomendations(tfidf_recomendations_df, BOOKS, BOOK_ID)
    display(recomendations)

tfidf_recomendation_demo()

Book title: 'Большая книга пекаря. Хлеб, бриоши, выпечка. Учимся готовить шедевры'


Unnamed: 0,book_id,title,titleEnglish,rating,similarity_index
12,12,"Большая книга пекаря. Хлеб, бриоши, выпечка. У...",Le Grand Manuel du Boulanger,5.0,1.0
199,199,Випічка. Десерти,,5.0,0.024
2634,2634,Выразительный человек. Сценическое воспитание ...,,,0.016
416,416,Кишенькова книжка емоційного інтелекту. Невели...,Emotional Intelligence Pocketbook: Little Exer...,5.0,0.015
329,329,Игры тестостерона и другие вопросы биологии по...,Trouble With Testosterone. And Other Essays on...,5.0,0.015
4981,4981,Классика ХХI века. Фортепианная игра. Ответы н...,,,0.015
224,224,Солодка неділя,,5.0,0.014
209,209,Готовим в стиле хюгге,,5.0,0.014
9647,9647,Телекомунікаційні системи та мережі. Принципи ...,,,0.014
658,658,Англійська з Бреді. Level 2 (комплект з 5 книг),,,0.013
