In [1]:
import pandas as pd

In [2]:
# %load get_data.py
def get_data() :
    
    import pandas as pd
    
    csv_beer = pd.read_csv("/home/grimoire/Projects/BeerRatings/rating_update.csv")
    beer_ratings = pd.DataFrame(csv_beer)
    
    return beer_ratings


In [3]:
descriptions = pd.DataFrame(
    pd.read_csv('/home/grimoire/Projects/BeerRatings/beer_description.csv'))
ratings = get_data()

# This section will build a system that recommends beers styles that are similar to a particular beer style.
I will compute pairwise similarity scores for all beer styles based on their style descriptions and recommend beer styles based on that similarity score.

In [4]:
# import TfidVectorizer from sklearn
'''
This computes Term Frequency-Inverse Document Frequency (TF-IDF) vectors for each document. 
This will give a matrix where each column represents a word in the overview vocabulary 
(all the words that appear in at least one description) and each column represents a beer style, as before. 
'''
from sklearn.feature_extraction.text import TfidfVectorizer

# Remove all the stop words in the descriptions
tfidf = TfidfVectorizer(stop_words='english',ngram_range=(1,3))

tfidf_matrix = tfidf.fit_transform(descriptions['description'])

In [5]:
tfidf_matrix.shape

(103, 11003)

In [6]:
from sklearn.metrics.pairwise import linear_kernel

cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [7]:
indices = pd.Series(descriptions.index, index=descriptions['style']).drop_duplicates()

In [8]:
# Function that takes in beer style as input and outputs most similar styles
def get_recommendations(style, cosine_sim=cosine_sim):
    # Get the index of the style that matches the title
    idx = indices[style]

    # Get the pairwsie similarity scores of all styles with an individual style
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the styles based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar styles
    sim_scores = sim_scores[1:6]

    # Get the movie indices
    beer_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar styles
    return descriptions['style'].iloc[beer_indices]

In [9]:
descriptions['style'].values

array(['German Bock', 'German Doppelbock', 'German Eisbock',
       'German Maibock', 'German Weizenbock', 'American Brown Ale',
       'English Brown Ale', 'English Dark Mild Ale', 'German Altbier',
       'American Black Ale', 'Belgian Dark Ale', 'Belgian Dubbel',
       'German Roggenbier', 'Scottish Ale', 'Winter Warmer',
       'American Amber / Red Lager', 'European Dark Lager',
       'German Märzen / Oktoberfest', 'German Rauchbier',
       'German Schwarzbier', 'Munich Dunkel Lager', 'Vienna Lager',
       'American Cream Ale', 'Bière de Champagne / Bière Brut', 'Braggot',
       'California Common / Steam Beer', 'American Imperial IPA',
       'American IPA', 'Belgian IPA', 'English India Pale Ale (IPA)',
       'American Amber / Red Ale', 'American Blonde Ale',
       'American Pale Ale (APA)', 'Belgian Pale Ale', 'Belgian Saison',
       'English Bitter', 'English Extra Special / Strong Bitter (ESB)',
       'English Pale Ale', 'English Pale Mild Ale',
       'French Bière 

In [10]:
get_recommendations('British Barleywine')

77           American Barleywine
70                American Stout
15    American Amber / Red Lager
86            English Strong Ale
1              German Doppelbock
Name: style, dtype: object

In [11]:
get_recommendations('American Wild Ale')

58        Finnish Sahti
99       Belgian Lambic
101    Flanders Red Ale
85      English Old Ale
34       Belgian Saison
Name: style, dtype: object

In [12]:
get_recommendations('Smoke Beer')

18                German Rauchbier
48    European Export / Dortmunder
3                   German Maibock
4                German Weizenbock
1                German Doppelbock
Name: style, dtype: object

## This section implements a simple weighted model with the style recommendation model

In [13]:
# %load get_data.py
def get_data() :
    
    import pandas as pd
    
    csv_beer = pd.read_csv("/home/grimoire/Projects/BeerRatings/rating_update.csv")
    beer_ratings = pd.DataFrame(csv_beer)
    
    return beer_ratings


In [14]:
ratings = get_data()

In [15]:
beer_style_map = ratings.copy()

In [16]:
beer_style_map.keys()

Index(['brewery_id', 'brewery_name', 'review_time', 'review_overall',
       'review_aroma', 'review_appearance', 'review_profilename', 'beer_style',
       'review_palate', 'review_taste', 'beer_name', 'beer_abv',
       'beer_beerid'],
      dtype='object')

In [17]:
beer_style_map = beer_style_map.drop(['brewery_id', 'brewery_name', 'review_time', 
                                      'review_profilename','beer_abv', 'beer_beerid'], axis=1)

In [18]:
comparison_beer = pd.DataFrame(pd.read_csv('/home/grimoire/Projects/BeerRatings/comparison_beer.csv'))

In [19]:
comparison_beer.keys()

Index(['beer_name', 'review_overall', 'review_taste', 'review_appearance',
       'review_palate', 'review_aroma', 'total_reviews'],
      dtype='object')

In [20]:
comparison_beer = comparison_beer.drop(['review_overall', 'review_taste', 'review_appearance',
                                        'review_palate', 'review_aroma'], axis=1)

In [21]:
beer_style_map = beer_style_map.merge(comparison_beer, left_on='beer_name', right_on='beer_name')

In [22]:
beer_style_map.keys()

Index(['review_overall', 'review_aroma', 'review_appearance', 'beer_style',
       'review_palate', 'review_taste', 'beer_name', 'total_reviews'],
      dtype='object')

In [23]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

beer_style_map.total_reviews.describe()

count   1586613.000
mean        562.866
std         657.579
min           0.000
25%          80.000
50%         313.000
75%         801.000
max        3289.000
Name: total_reviews, dtype: float64

In [24]:
beer_style_map['combined_average'] = (beer_style_map.review_appearance.values + 
                                    beer_style_map.review_aroma.values +
                                    beer_style_map.review_palate.values +
                                    beer_style_map.review_taste.values +
                                    beer_style_map.review_overall.values) / 5

In [25]:
C = beer_style_map['combined_average'].mean()
m = 250
q_ratings = beer_style_map.copy().loc[beer_style_map['total_reviews'] >= m]

In [26]:
def weighted_rating(df, m=m, C=C):
    
    #########################################
    # Calculates and returns a weighted rating for specific feature
    # m is minimum votes/ratings required to be listed
    # C is the mean rating/vote across the whole dataframe
    # R is average rating/votes of feature
    # v is number of ratings/votes of feature
    #########################################
    
    v = df['total_reviews']
    R = df['combined_average']
    
    # Calculation based on the IMDB formula
    return (v/(v+m) * R) + (m/(m+v) * C)

In [27]:
q_ratings['weighted_average'] = q_ratings.apply(weighted_rating, axis=1)

In [28]:
q_ratings = q_ratings.groupby(['beer_style', 'beer_name']).mean()

In [29]:
q_ratings.loc['Japanese Rice Lager', :]['weighted_average'].sort_values(ascending=False).head(4)

beer_name
Sapporo Premium Beer   3.235
Asahi Super Dry        3.140
Pilsner                2.478
Name: weighted_average, dtype: float64

In [30]:
def style_recommendation(title) :
    style_recommendations = []
    recommend = get_recommendations(title)
    recommend = recommend.reset_index(drop=True)
    for x in range(3) :
        style_recommendations.append(recommend.iloc[x])
    return style_recommendations

In [31]:
style_recommendation('American Adjunct Lager')

['American Light Lager', 'Japanese Rice Lager', 'European Strong Lager']

In [32]:
def beer_recommendation(df, style_list) :
    beer_temp = []
    for style in style_list :
        beer_temp.append(df.loc[style, :]['weighted_average'].sort_values(ascending=False).head(3))
    beer_recommendations = pd.concat(beer_temp).sort_values(ascending=False)
    return beer_recommendations

In [33]:
beer_recommendation(q_ratings, style_recommendation('American Adjunct Lager'))

beer_name
Sam Adams Light             3.519
Elephant Beer               3.298
Baltika #9 Extra (Strong)   3.247
Sapporo Premium Beer        3.235
Asahi Super Dry             3.140
Amstel Light                2.928
Bud Light Lime              2.896
Pilsner                     2.478
Name: weighted_average, dtype: float64

In [34]:
beer_recommendation(q_ratings, style_recommendation('American Porter'))

beer_name
Mocha Porter                              4.553
Fuller's London Porter                    4.106
Samuel Smith's, The Famous Taddy Porter   4.073
Palo Santo Marron                         4.065
Christmas Ale                             4.052
Bender                                    4.046
Coffee Bender                             4.032
Coffee Stout                              4.020
Oatmeal Stout                             3.986
Name: weighted_average, dtype: float64

In [35]:
beer_recommendation(q_ratings, style_recommendation('American Stout'))

beer_name
Founders KBS (Kentucky Breakfast Stout)   4.409
Coffee Stout                              4.408
The Abyss                                 4.408
Samuel Smith's Oatmeal Stout              4.150
Shakespeare Oatmeal Stout                 4.149
Young's Double Chocolate Stout            4.045
St-Ambroise Oatmeal Stout                 4.022
Mackeson Triple XXX Stout                 3.983
Moloko                                    3.966
Name: weighted_average, dtype: float64