In [1]:
import pandas as pd

books = pd.read_csv("data/books.csv")
print(books.columns)

Index(['book_id', 'goodreads_book_id', 'best_book_id', 'work_id',
       'books_count', 'isbn', 'isbn13', 'authors', 'original_publication_year',
       'original_title', 'title', 'language_code', 'average_rating',
       'ratings_count', 'work_ratings_count', 'work_text_reviews_count',
       'ratings_1', 'ratings_2', 'ratings_3', 'ratings_4', 'ratings_5',
       'image_url', 'small_image_url'],
      dtype='object')


In [2]:
books2=books[["book_id","goodreads_book_id","title","authors","average_rating","ratings_count"]].sort_values(["average_rating","ratings_count"],ascending=False)

# Calcul de la note pondérée (weighted rating)

$$ wr = ( \frac{v}{v+m} \cdot R ) + ( \frac{m}{v+m} \cdot C ) $$

- $v$ le nombre de votes par livre
- $m$ le minimum de votes requis pour être dans la liste
- $R$ la note moyenne du livre
- $C$ la note moyenne de tous les livres

In [3]:
# Calcul de C, à partir des moyennes de notes
C = books2["average_rating"].mean()

# Calcul de m, à partire du 50ème quantile
m = books2["ratings_count"].quantile(0.75)

# On ne garde que les livres au dessus de m
books3 = books2[books2["ratings_count"]>=m].copy()

# Calcul du weighted_rating de chaque livre
v = books3["ratings_count"]
R = books3["average_rating"]

books3["weighted_rating"] = ( v/(v+m) * R )+( m/(v+m) * C )

In [4]:
books3.sort_values("weighted_rating",ascending=False).head(25)

Unnamed: 0,book_id,goodreads_book_id,title,authors,average_rating,ratings_count,weighted_rating
421,422,862041,"Harry Potter Boxset (Harry Potter, #1-7)",J.K. Rowling,4.74,190050,4.608935
24,25,136251,Harry Potter and the Deathly Hallows (Harry Po...,"J.K. Rowling, Mary GrandPré",4.61,1746574,4.596041
26,27,1,Harry Potter and the Half-Blood Prince (Harry ...,"J.K. Rowling, Mary GrandPré",4.54,1678823,4.527162
1307,1308,17927395,A Court of Mist and Fury (A Court of Thorns an...,Sarah J. Maas,4.72,108384,4.522803
17,18,5,Harry Potter and the Prisoner of Azkaban (Harr...,"J.K. Rowling, Mary GrandPré, Rufus Beck",4.53,1832823,4.518437
23,24,6,Harry Potter and the Goblet of Fire (Harry Pot...,"J.K. Rowling, Mary GrandPré",4.53,1753043,4.517922
561,562,7235533,"The Way of Kings (The Stormlight Archive, #1)",Brandon Sanderson,4.64,144822,4.49913
191,192,186074,The Name of the Wind (The Kingkiller Chronicle...,Patrick Rothfuss,4.55,400101,4.499021
134,135,62291,"A Storm of Swords (A Song of Ice and Fire, #3)",George R.R. Martin,4.54,469022,4.496714
861,862,17332218,"Words of Radiance (The Stormlight Archive, #2)",Brandon Sanderson,4.77,73572,4.495007
