In [1]:
import pandas as pd

In [2]:
# %load get_data.py
def get_data() :
    
    import pandas as pd
    
    csv_beer = pd.read_csv("/home/grimoire/Projects/BeerRatings/rating_update.csv")
    beer_ratings = pd.DataFrame(csv_beer)
    
    return beer_ratings


In [3]:
descriptions = pd.DataFrame(
    pd.read_csv('/home/grimoire/Projects/BeerRatings/beer_description.csv'))
ratings = get_data()

## This section removes styles present in our descriptions data but not ratings

In [4]:
# These are styles that were added after our data set was released
# They will be removed so the model doesn't recommend bad styles
# Styles that aren't available within our rating data
# The set of bad descriptions can be found in Description_style_merge_mgmt notebook

bad_styles = ['American Brett','American Brut IPA','American Imperial Porter',
              'American Imperial Red Ale','Belgian Blonde Ale','New England IPA',
              'Robust Porter','Smoke Porter']

In [5]:
for style in bad_styles :
    descriptions = descriptions.drop(descriptions[descriptions['style'] == style].index.values)

In [6]:
for style in bad_styles :
    print(style in descriptions['style'].values)

False
False
False
False
False
False
False
False


In [7]:
descriptions = descriptions.reset_index(drop=True)

### This section will build a system that recommends beers styles that are similar to a particular beer style.
I will compute pairwise similarity scores for all beer styles based on their style descriptions and recommend beer styles based on that similarity score.

In [8]:
descriptions.keys()

Index(['style', 'description', 'abv_low', 'abv_high', 'ibu_low', 'ibu_high'], dtype='object')

In [9]:
# import TfidVectorizer from sklearn
'''
This computes Term Frequency-Inverse Document Frequency (TF-IDF) vectors for each document. 
This will give a matrix where each column represents a word in the overview vocabulary 
(all the words that appear in at least one document) and each column represents a beer style, as before. 
'''
from sklearn.feature_extraction.text import TfidfVectorizer

# Remove all the stop words in the descriptions
tfidf = TfidfVectorizer(stop_words='english')

tfidf_matrix = tfidf.fit_transform(descriptions['description'])

In [10]:
tfidf_matrix.shape

(103, 1559)

In [11]:
from sklearn.metrics.pairwise import linear_kernel

cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [12]:
indices = pd.Series(descriptions.index, index=descriptions['style']).drop_duplicates()

In [13]:
# Function that takes in movie title as input and outputs most similar movies
def get_recommendations(title, cosine_sim=cosine_sim):
    # Get the index of the movie that matches the title
    idx = indices[title]

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    beer_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar movies
    return descriptions['style'].iloc[beer_indices]

In [14]:
descriptions['style'].values

array(['German Bock', 'German Doppelbock', 'German Eisbock',
       'German Maibock', 'German Weizenbock', 'American Brown Ale',
       'English Brown Ale', 'English Dark Mild Ale', 'German Altbier',
       'American Black Ale', 'Belgian Dark Ale', 'Belgian Dubbel',
       'German Roggenbier', 'Scottish Ale', 'Winter Warmer',
       'American Amber / Red Lager', 'European Dark Lager',
       'German Märzen / Oktoberfest', 'German Rauchbier',
       'German Schwarzbier', 'Munich Dunkel Lager', 'Vienna Lager',
       'American Cream Ale', 'Bière de Champagne / Bière Brut', 'Braggot',
       'California Common / Steam Beer', 'American Imperial IPA',
       'American IPA', 'Belgian IPA', 'English India Pale Ale (IPA)',
       'American Amber / Red Ale', 'American Blonde Ale',
       'American Pale Ale (APA)', 'Belgian Pale Ale', 'Belgian Saison',
       'English Bitter', 'English Extra Special / Strong Bitter (ESB)',
       'English Pale Ale', 'English Pale Mild Ale',
       'French Bière 

In [15]:
get_recommendations('American Brown Ale')

67                   Rye Beer
9          American Black Ale
44             American Lager
100        Flanders Oud Bruin
6           English Brown Ale
71      English Oatmeal Stout
53             German Pilsner
54            American Porter
47          Bohemian Pilsener
74     Foreign / Export Stout
Name: style, dtype: object

In [16]:
get_recommendations('British Barleywine')

77             American Barleywine
86              English Strong Ale
5               American Brown Ale
29    English India Pale Ale (IPA)
15      American Amber / Red Lager
1                German Doppelbock
48    European Export / Dortmunder
59            Fruit and Field Beer
69         American Imperial Stout
81         Belgian Strong Dark Ale
Name: style, dtype: object

In [17]:
get_recommendations('American Wild Ale')

101                Flanders Red Ale
34                   Belgian Saison
85                  English Old Ale
29     English India Pale Ale (IPA)
58                    Finnish Sahti
99                   Belgian Lambic
28                      Belgian IPA
97             Belgian Fruit Lambic
91                  Berliner Weisse
81          Belgian Strong Dark Ale
Name: style, dtype: object

In [18]:
get_recommendations('Smoke Beer')

18                German Rauchbier
48    European Export / Dortmunder
13                    Scottish Ale
4                German Weizenbock
3                   German Maibock
53                  German Pilsner
47               Bohemian Pilsener
1                German Doppelbock
21                    Vienna Lager
87          Scotch Ale / Wee Heavy
Name: style, dtype: object

## This section implements a simple weighted model with the style recommendation model

In [19]:
# %load get_data.py
def get_data() :
    
    import pandas as pd
    
    csv_beer = pd.read_csv("/home/grimoire/Projects/BeerRatings/rating_update.csv")
    beer_ratings = pd.DataFrame(csv_beer)
    
    return beer_ratings


In [20]:
ratings = get_data()

In [21]:
beer_style_map = ratings.copy()

In [22]:
beer_style_map.keys()

Index(['brewery_id', 'brewery_name', 'review_time', 'review_overall',
       'review_aroma', 'review_appearance', 'review_profilename', 'beer_style',
       'review_palate', 'review_taste', 'beer_name', 'beer_abv',
       'beer_beerid'],
      dtype='object')

In [23]:
beer_style_map = beer_style_map.drop(['brewery_id', 'brewery_name', 'review_time', 
                                      'review_profilename','beer_abv', 'beer_beerid'], axis=1)

In [24]:
comparison_beer = pd.DataFrame(pd.read_csv('/home/grimoire/Projects/BeerRatings/comparison_beer.csv'))

In [25]:
comparison_beer.keys()

Index(['beer_name', 'review_overall', 'review_taste', 'review_appearance',
       'review_palate', 'review_aroma', 'total_reviews'],
      dtype='object')

In [26]:
comparison_beer = comparison_beer.drop(['review_overall', 'review_taste', 'review_appearance',
                                        'review_palate', 'review_aroma'], axis=1)

In [27]:
beer_style_map = beer_style_map.merge(comparison_beer, left_on='beer_name', right_on='beer_name')

In [28]:
C = beer_style_map['review_overall'].mean()
m = 1 # 25th % Quartile
q_ratings = beer_style_map.copy().loc[beer_style_map['total_reviews'] >= m]

In [29]:
def weighted_rating(df, m=m, C=C):
    
    #########################################
    # Calculates and returns a weighted rating for specific feature
    # m is minimum votes/ratings required to be listed
    # C is the mean rating/vote across the whole dataframe
    # R is average rating/votes of feature
    # v is number of ratings/votes of feature
    #########################################
    
    v = df['total_reviews']
    R = df['review_overall']
    
    # Calculation based on the IMDB formula
    return (v/(v+m) * R) + (m/(m+v) * C)

In [30]:
q_ratings['weighted_overall'] = q_ratings.apply(weighted_rating, axis=1)

In [31]:
q_ratings = q_ratings.groupby(['beer_style', 'beer_name']).mean()

In [32]:
q_ratings.loc['Japanese Rice Lager', :]['weighted_overall'].sort_values(ascending=False).head(4)

beer_name
Mike Duggan #05 Asian Lager    4.528689
Atari Rice Ale                 4.157790
Heiwa                          4.157790
Samurai Gazebo                 4.119598
Name: weighted_overall, dtype: float64

In [33]:
def style_recommendation(title) :
    style_recommendations = []
    recommend = get_recommendations(title)
    recommend = recommend.reset_index(drop=True)
    for x in range(3) :
        style_recommendations.append(recommend.iloc[x])
    return style_recommendations

In [34]:
style_recommendation('American Adjunct Lager')

['Japanese Rice Lager', 'European Strong Lager', 'American Light Lager']

In [35]:
def beer_recommendation(df, style_list) :
    beer_temp = []
    for style in style_list :
        beer_temp.append(df.loc[style, :]['weighted_overall'].sort_values(ascending=False).head(4))
    beer_recommendations = pd.concat(beer_temp).sort_values(ascending=False)
    return beer_recommendations

In [36]:
results = beer_recommendation(q_ratings, style_recommendation('American Adjunct Lager'))
results

beer_name
Mike Duggan #05 Asian Lager        4.528689
Layflatter Lager                   4.438527
Ohota Extra Light                  4.407790
Lemon Light                        4.407790
Willimantic Last Delivery Lager    4.407790
Kingfisher Super Strong            4.407790
Bohemian Regent Kvasnicove 16°     4.157790
Harrington's Gold Lager            4.157790
Heiwa                              4.157790
Atari Rice Ale                     4.157790
Samurai Gazebo                     4.119598
Gordon Biersch Imperial Pilsner    4.105194
Name: weighted_overall, dtype: float64

In [38]:
results = beer_recommendation(q_ratings, style_recommendation('American Porter'))
results

beer_name
Espresso Porter               4.892326
Roasted Cocoanut Brown Ale    4.605194
Pisgah Jason's Brown Ale      4.531558
X                             4.516395
Mocha Porter                  4.499476
Black Jack Porter             4.498434
Chocolate Porter              4.497172
Frambozen                     4.490751
Prairie Porter                4.490097
Sandy Paws (2008)             4.463116
Tea Bagged Bender             4.453895
22 Porter                     4.438527
Name: weighted_overall, dtype: float64

In [40]:
results = beer_recommendation(q_ratings, style_recommendation('American Stout'))
results

beer_name
Coffee Stout                                      4.997024
Bourbon Barrel Stout                              4.994222
Vanilla Stout                                     4.915399
Rare D.O.S.                                       4.818105
Alesmith Speedway Stout - Vanilla And Coconut     4.812842
The Inkwell 2005                                  4.726948
Kopi Con Leche Stout                              4.664448
Merlot Stout                                      4.635930
Carnie Fire                                       4.619598
Pinglehead                                        4.578895
Special Ale                                       4.488200
Flambeau Red                                      4.476948
Name: weighted_overall, dtype: float64