In [1]:
import pandas as pd
import numpy as np

In [2]:
Beer_comparison = pd.read_csv('/home/grimoire/Projects/BeerRatings/comparison_beer.csv')

In [3]:
Beer_comparison.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56857 entries, 0 to 56856
Data columns (total 7 columns):
beer_name            56857 non-null object
review_overall       56857 non-null float64
review_taste         56857 non-null float64
review_appearance    56857 non-null float64
review_palate        56857 non-null float64
review_aroma         56857 non-null float64
total_reviews        56857 non-null int64
dtypes: float64(5), int64(1), object(1)
memory usage: 3.0+ MB


In [4]:
Beer_comparison['total_reviews'].describe()

count    56857.000000
mean        27.905341
std        122.198636
min          1.000000
25%          1.000000
50%          3.000000
75%          9.000000
max       3290.000000
Name: total_reviews, dtype: float64

To ameliorate any possibly accidental entries in the overall rating. Using an average of all the ratings helped to solve any issues that could have possibly arisen as a result of accidental ratings. This methodology also allowed for more flexibility in rating increments. Instead of .5 increments the Averaged has a full decimal range.

In [5]:
Beer_comparison['averaged'] = (Beer_comparison.review_appearance + \
                               Beer_comparison.review_aroma + \
                               Beer_comparison.review_palate + \
                               Beer_comparison.review_taste + \
                               Beer_comparison.review_overall) / 5

In [6]:
beer_mean = Beer_comparison['averaged'].mean()

In [7]:
print('The mean rating for beer is: ', beer_mean)

The mean rating for beer is:  3.6135706681114668


In [8]:
#Using new apply functionality, this should be factored into a seperate part
#Keeping for prototyping

def weighted_rating(df, cut_off=0, mean=0, V_col='total_reviews', R_col='averaged'):
    
    #########################################
    # Calculates and returns a weighted rating for specific feature
    # m is minimum votes/ratings required to be listed
    # C is the mean rating/vote across the whole dataframe
    # R is average rating/votes of feature
    # v is number of ratings/votes of feature
    #########################################
    
    v = df[V_col]
    R = df[R_col]
    
    # Calculation based on the IMDB formula
    return (v/(v+cut_off) * R) + (cut_off/(cut_off+v) * mean)

In [9]:
#minimum votes/ratings required to be listed
n = 100

q_ratings = (Beer_comparison.copy()
                            .loc[Beer_comparison['total_reviews'] >= n]
            )
q_ratings.head()

Unnamed: 0,beer_name,review_overall,review_taste,review_appearance,review_palate,review_aroma,total_reviews,averaged
35,"""Shabadoo"" Black & Tan Ale",3.991453,3.901709,3.893162,3.846154,3.717949,117,3.870085
48,# 100,3.961009,4.094037,4.15367,4.084862,4.013761,218,4.061468
59,#9,3.539528,3.396671,3.50104,3.398058,3.434466,1442,3.453953
165,10 Commandments,3.828814,3.986441,3.986441,3.883051,4.061017,295,3.949153
172,10 Squared Fish Tale Ale,3.924779,3.955752,3.964602,3.995575,3.946903,113,3.957522


In [10]:
q_ratings = (q_ratings.assign(weighted_average = lambda df : weighted_rating(df, 
                                                                             cut_off=n, 
                                                                             mean=beer_mean))
                      .sort_values('weighted_average', 
                                  ascending=False)
            )
q_ratings.head(25)

Unnamed: 0,beer_name,review_overall,review_taste,review_appearance,review_palate,review_aroma,total_reviews,averaged,weighted_average
51855,Trappist Westvleteren 12,4.617925,4.718553,4.454009,4.633255,4.583333,1272,4.601415,4.529415
39649,Pliny The Elder,4.590028,4.630985,4.388603,4.451326,4.612188,2527,4.534626,4.499565
39650,Pliny The Younger,4.6,4.72459,4.482787,4.612295,4.72377,610,4.628689,4.485714
50464,The Abyss,4.450779,4.588173,4.593484,4.513102,4.443343,1412,4.517776,4.457974
19392,Founders KBS (Kentucky Breakfast Stout),4.397516,4.626294,4.37707,4.479814,4.570393,1932,4.490217,4.447075
19339,Founders CBS Imperial Stout,4.591052,4.697017,4.457614,4.579278,4.558085,637,4.576609,4.445939
51860,Trappistes Rochefort 10,4.339401,4.543548,4.344931,4.430876,4.418664,2170,4.415484,4.380157
19338,Founders Breakfast Stout,4.354516,4.501199,4.364508,4.38789,4.41207,2502,4.404037,4.373658
51856,Trappist Westvleteren 8,4.514144,4.553041,4.415842,4.49505,4.42645,707,4.480905,4.373429
5447,Bell's Hopslam Ale,4.377609,4.51842,4.24253,4.314368,4.533156,2443,4.397217,4.366401
