In [1]:
import pandas as pd
import numpy as np

In [2]:
comp_csv = pd.read_csv('/home/grimoire/Projects/BeerRatings/comparison_brewery.csv')
brewery_comp = pd.DataFrame(comp_csv)

In [3]:
brewery_comp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5742 entries, 0 to 5741
Data columns (total 7 columns):
brewery_name             5742 non-null object
review_overall           5742 non-null float64
review_taste             5742 non-null float64
review_appearance        5742 non-null float64
review_palate            5742 non-null float64
review_aroma             5742 non-null float64
brewery_rating_counts    5742 non-null int64
dtypes: float64(5), int64(1), object(1)
memory usage: 314.1+ KB


In [4]:
brewery_comp['brewery_rating_counts'].describe()

count     5742.000000
mean       276.314699
std       1500.440673
min          1.000000
25%          3.000000
50%         14.000000
75%         73.000000
max      39444.000000
Name: brewery_rating_counts, dtype: float64

To ameliorate any possibly accidental entries in the overall rating. Using an average of all the ratings helped to solve any issues that could have possibly arisen as a result of accidental ratings. This methodology also allowed for more flexibility in rating increments. Instead of .5 increments the Averaged has a full decimal range.

In [5]:
brewery_comp['averaged'] = (brewery_comp.review_appearance + \
                           brewery_comp.review_aroma + \
                           brewery_comp.review_palate + \
                           brewery_comp.review_taste + \
                           brewery_comp.review_overall) / 5

In [6]:
C = brewery_comp['averaged'].mean()

In [7]:
print('The mean rating for brewery is: ', C)

The mean rating for brewery is:  3.4795528141487697


In [8]:
m = 14
q_ratings = brewery_comp.copy().loc[brewery_comp['brewery_rating_counts'] >= m]

In [9]:
def weighted_rating(df, m=m, C=C):
    
    #########################################
    # Calculates and returns a weighted rating for specific feature
    # m is minimum votes/ratings required to be listed
    # C is the mean rating/vote across the whole dataframe
    # R is average rating/votes of feature
    # v is number of ratings/votes of feature
    #########################################
    
    v = df['brewery_rating_counts']
    R = df['averaged']
    
    # Calculation based on the IMDB formula
    return (v/(v+m) * R) + (m/(m+v) * C)

In [10]:
q_ratings['weighted_averaged'] = q_ratings.apply(weighted_rating, axis=1)

In [11]:
q_ratings = q_ratings.sort_values('weighted_averaged', ascending=False)

In [12]:
q_ratings

Unnamed: 0,brewery_name,review_overall,review_taste,review_appearance,review_palate,review_aroma,brewery_rating_counts,averaged,weighted_averaged
1342,Brouwerij Westvleteren (Sint-Sixtusabdij van W...,4.544996,4.588520,4.403280,4.526493,4.468881,2378,4.506434,4.500424
5085,The Alchemist,4.582543,4.572106,4.171727,4.388994,4.610057,527,4.465085,4.439582
4538,Russian River Brewing Company,4.373884,4.409999,4.234639,4.287464,4.349660,11311,4.331129,4.330076
916,Brasserie de Rochefort,4.267913,4.409546,4.260681,4.294504,4.300401,4494,4.306609,4.304040
3871,Närke Kulturbryggeri AB,4.363208,4.445755,4.242925,4.334906,4.384434,212,4.354245,4.300061
4088,Peg's Cantina & Brewpub / Cycle Brewing,4.506329,4.569620,4.177215,4.411392,4.487342,79,4.430380,4.287245
1292,Brouwerij Drie Fonteinen,4.291966,4.333034,4.187950,4.200240,4.308153,1668,4.264269,4.257737
2707,Hill Farmstead Brewery,4.297191,4.296865,4.207381,4.188766,4.298824,1531,4.257805,4.250753
1325,Brouwerij St. Bernardus NV,4.208276,4.287564,4.230544,4.199569,4.162198,6489,4.217630,4.216041
3033,Kern River Brewing Company,4.290635,4.237890,4.199139,4.124865,4.265877,929,4.223681,4.212634
