In [1]:
import pandas as pd
import numpy as np

In [2]:
comp_csv = pd.read_csv('/home/grimoire/Projects/BeerRatings/comparison_style.csv')
style_comp = pd.DataFrame(comp_csv)

In [3]:
style_comp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 103 entries, 0 to 102
Data columns (total 7 columns):
beer_style           103 non-null object
review_overall       103 non-null float64
review_taste         103 non-null float64
review_appearance    103 non-null float64
review_palate        103 non-null float64
review_aroma         103 non-null float64
count                103 non-null int64
dtypes: float64(5), int64(1), object(1)
memory usage: 5.7+ KB


In [4]:
style_comp['count'].describe()

count       103.000000
mean      15404.019417
std       17918.355962
min         241.000000
25%        4000.000000
50%       10130.000000
75%       19757.500000
max      117586.000000
Name: count, dtype: float64

To ameliorate any possibly accidental entries in the overall rating. Using an average of all the ratings helped to solve any issues that could have possibly arisen as a result of accidental ratings. This methodology also allowed for more flexibility in rating increments. Instead of .5 increments the Averaged has a full decimal range.

In [5]:
style_comp['averaged'] = (style_comp.review_overall + \
                         style_comp.review_taste + \
                         style_comp.review_appearance + \
                         style_comp.review_aroma + \
                         style_comp.review_taste) / 5

In [6]:
C = style_comp['averaged'].mean()

In [7]:
print('The mean rating for style is: ', C)

The mean rating for style is:  3.7029655559499357


In [8]:
m = 241
q_ratings = style_comp.copy().loc[style_comp['count'] >= m]

In [9]:
def weighted_rating(df, m=m, C=C):
    
    #########################################
    # Calculates and returns a weighted rating for specific feature
    # m is minimum votes/ratings required to be listed
    # C is the mean rating/vote across the whole dataframe
    # R is average rating/votes of feature
    # v is number of ratings/votes of feature
    #########################################
    
    v = df['count']
    R = df['averaged']
    
    # Calculation based on the IMDB formula
    return (v/(v+m) * R) + (m/(m+v) * C)

In [10]:
q_ratings['weighted_averaged'] = q_ratings.apply(weighted_rating, axis=1)

In [11]:
q_ratings = q_ratings.sort_values('weighted_averaged', ascending=False)

In [12]:
q_ratings

Unnamed: 0,beer_style,review_overall,review_taste,review_appearance,review_palate,review_aroma,count,averaged,weighted_averaged
12,American Imperial Stout,4.029820,4.187230,4.163633,4.098669,4.160665,50705,4.145715,4.143621
32,Belgian Quadrupel (Quad),4.071630,4.210909,4.117964,4.124986,4.132533,18086,4.148789,4.142927
94,Russian Imperial Stout,4.023084,4.149569,4.210072,4.086922,4.076576,54129,4.121774,4.119918
22,American Wild Ale,4.093262,4.149938,4.005451,4.040632,4.126756,17794,4.105069,4.099696
28,Belgian Gueuze,4.086287,4.127143,4.034864,4.046680,4.117574,6009,4.098602,4.083346
72,German Eisbock,3.977094,4.211603,3.964514,4.113594,4.156778,2663,4.104318,4.071011
10,American Imperial IPA,3.998017,4.091280,4.078916,4.023128,4.097782,85977,4.071455,4.070425
84,German Weizenbock,4.007969,4.077348,4.009297,3.990703,4.044677,9412,4.043328,4.034830
64,Flanders Red Ale,3.992722,4.090636,4.001801,3.970888,4.044043,6664,4.043968,4.032066
3,American Barleywine,3.896756,4.042633,4.036535,3.996521,4.019343,26728,4.007580,4.004858


In [13]:
q_ratings[q_ratings.beer_style == 'Bohemian Pilsener']

Unnamed: 0,beer_style,review_overall,review_taste,review_appearance,review_palate,review_aroma,count,averaged,weighted_averaged
40,Bohemian Pilsener,3.794662,3.603414,3.641052,3.606162,3.44427,12740,3.617363,3.618952
