In [20]:
import pandas as pd
import seaborn as sns

In [21]:
def read_file_csv(file_name):
    return pd.read_csv(file_name, low_memory=False, encoding="ISO-8859-1")

def weighted_average_rating(counts, m, mean_rating, rating_all_mean):
    return (counts / (counts + m)) * mean_rating + (m / (counts + m)) * rating_all_mean

def get_simply_recommendation(data_frame, counts, k, final_rating, m):
    return data_frame[counts > m].nlargest(k, final_rating)

In [22]:
books_df = read_file_csv("data/books.csv")
users_df = read_file_csv("data/users.csv")
ratings_df = read_file_csv("data/ratings.csv")

In [23]:
ratings_data = pd.merge(books_df[['book_id', 'title']], ratings_df, on='book_id', how='inner')
ratings_data = pd.merge(ratings_data, users_df, on=['user_id'], how='inner')
ratings_data.head()

Unnamed: 0,book_id,title,user_id,rating,location,age
0,1,"The Hunger Games (The Hunger Games, #1)",3634,4,District of Columbia,92
1,4,To Kill a Mockingbird,3634,5,District of Columbia,92
2,5,The Great Gatsby,3634,3,District of Columbia,92
3,8,The Catcher in the Rye,3634,5,District of Columbia,92
4,17,"Catching Fire (The Hunger Games, #2)",3634,4,District of Columbia,92


In [24]:
title_df = ratings_data[['book_id', 'title']].drop_duplicates(subset='book_id')
title_df.head()

Unnamed: 0,book_id,title
0,1,"The Hunger Games (The Hunger Games, #1)"
1,4,To Kill a Mockingbird
2,5,The Great Gatsby
3,8,The Catcher in the Rye
4,17,"Catching Fire (The Hunger Games, #2)"


In [25]:
nb_voters_book = ratings_data['book_id'].value_counts()
nb_voters_df = pd.DataFrame(data={'book_id': nb_voters_book.index.tolist(), 'counts': nb_voters_book.values.tolist()})
nb_voters_df.head()

Unnamed: 0,book_id,counts
0,1,1764
1,4,1747
2,5,1655
3,2,1634
4,26,1597


In [26]:
rating_mean = ratings_data.groupby(['book_id'])['rating'].mean()
rating_mean_df = pd.DataFrame(data={'book_id': rating_mean.index.tolist(), 'mean_rating': rating_mean.values.tolist()})
rating_mean_df.head()

Unnamed: 0,book_id,mean_rating
0,1,4.238662
1,2,4.05814
2,3,3.376528
3,4,4.364625
4,5,3.748036


In [27]:
# Fix the number of minimum readers to get into the list - m
m = nb_voters_book.quantile(0.90)
print(m)

202.0


In [28]:
# Rating mean (all of the books) - C.
rating_all_mean = ratings_data['rating'].mean()
print(rating_all_mean)

3.779077570255898


In [29]:
title_voters_df = pd.merge(title_df, nb_voters_df, on=['book_id'], how='inner')
df = pd.merge(title_voters_df, rating_mean_df, on=['book_id'], how='inner')
df.head()

Unnamed: 0,book_id,title,counts,mean_rating
0,1,"The Hunger Games (The Hunger Games, #1)",1764,4.238662
1,4,To Kill a Mockingbird,1747,4.364625
2,5,The Great Gatsby,1655,3.748036
3,8,The Catcher in the Rye,1535,3.76873
4,17,"Catching Fire (The Hunger Games, #2)",1494,4.048193


In [11]:
df['weighted_average_rating'] = weighted_average_rating(df['counts'],m,df['mean_rating'], rating_all_mean)
df.head()

Unnamed: 0,book_id,title,counts,mean_rating,weighted_average_rating
0,1,"The Hunger Games (The Hunger Games, #1)",1764,4.238662,4.191441
1,2,Harry Potter and the Sorcerer's Stone (Harry P...,1634,4.05814,4.027437
2,3,"Twilight (Twilight, #1)",1227,3.376528,3.433432
3,4,To Kill a Mockingbird,1747,4.364625,4.303937
4,5,The Great Gatsby,1655,3.748036,3.751413


In [12]:
get_simply_recommendation(df, df['counts'], 10, ['weighted_average_rating'], m)

Unnamed: 0,book_id,title,counts,mean_rating,weighted_average_rating
24,25,Harry Potter and the Deathly Hallows (Harry Po...,1444,4.421745,4.342876
3,4,To Kill a Mockingbird,1747,4.364625,4.303937
101,102,Where the Wild Things Are,612,4.449346,4.283014
84,85,The Giving Tree,815,4.364417,4.248155
49,50,Where the Sidewalk Ends,976,4.343238,4.246497
30,31,The Help,1265,4.318577,4.24429
143,144,"Unbroken: A World War II Story of Survival, Re...",557,4.396768,4.232376
26,27,Harry Potter and the Half-Blood Prince (Harry ...,1394,4.28264,4.218906
132,133,"Anne of Green Gables (Anne of Green Gables, #1)",533,4.348968,4.192345
0,1,"The Hunger Games (The Hunger Games, #1)",1764,4.238662,4.191441


In [13]:
def get_simply_place_recommendation(place, k):
    pass