In [1]:
import pandas as pd

In [2]:
# %load get_data.py
def get_data() :
    
    import pandas as pd
    
    csv_beer = pd.read_csv("/home/grimoire/Projects/BeerRatings/rating_update.csv")
    beer_ratings = pd.DataFrame(csv_beer)
    
    return beer_ratings


In [3]:
descriptions = pd.DataFrame(
    pd.read_csv('/home/grimoire/Projects/BeerRatings/beer_description.csv'))
ratings = get_data()

### This section will build a system that recommends beers styles that are similar to a particular beer style.
I will compute pairwise similarity scores for all beer styles based on their style descriptions and recommend beer styles based on that similarity score.

In [4]:
descriptions.keys()

Index(['style', 'description', 'abv_low', 'abv_high', 'ibu_low', 'ibu_high'], dtype='object')

In [5]:
# import TfidVectorizer from sklearn
'''
This computes Term Frequency-Inverse Document Frequency (TF-IDF) vectors for each document. 
This will give a matrix where each column represents a word in the overview vocabulary 
(all the words that appear in at least one document) and each column represents a beer style, as before. 
'''
from sklearn.feature_extraction.text import TfidfVectorizer

# Remove all the stop words in the descriptions
tfidf = TfidfVectorizer(stop_words='english')

tfidf_matrix = tfidf.fit_transform(descriptions['description'])

In [6]:
tfidf_matrix.shape

(111, 1610)

In [7]:
from sklearn.metrics.pairwise import linear_kernel

cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [8]:
indices = pd.Series(descriptions.index, index=descriptions['style']).drop_duplicates()

In [9]:
# Function that takes in movie title as input and outputs most similar movies
def get_recommendations(title, cosine_sim=cosine_sim):
    # Get the index of the movie that matches the title
    idx = indices[title]

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    beer_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar movies
    return (descriptions['style'].iloc[beer_indices], sim_scores)

In [10]:
descriptions['style'].values

array(['German Bock', 'German Doppelbock', 'German Eisbock',
       'German Maibock', 'German Weizenbock', 'American Brown Ale',
       'English Brown Ale', 'English Dark Mild Ale', 'German Altbier',
       'American Black Ale', 'Belgian Dark Ale', 'Belgian Dubbel',
       'German Roggenbier', 'Scottish Ale', 'Winter Warmer',
       'American Amber / Red Lager', 'European Dark Lager',
       'German Märzen / Oktoberfest', 'German Rauchbier',
       'German Schwarzbier', 'Munich Dunkel Lager', 'Vienna Lager',
       'American Cream Ale', 'Bière de Champagne / Bière Brut', 'Braggot',
       'California Common / Steam Beer', 'American Brut IPA',
       'American Imperial IPA', 'American IPA', 'Belgian IPA',
       'English India Pale Ale (IPA)', 'New England IPA',
       'American Amber / Red Ale', 'American Blonde Ale',
       'American Pale Ale (APA)', 'Belgian Blonde Ale', 'Belgian Pale Ale',
       'Belgian Saison', 'English Bitter',
       'English Extra Special / Strong Bitter (ESB)

In [11]:
get_recommendations('American Brown Ale')

(73                      Rye Beer
 9             American Black Ale
 108           Flanders Oud Bruin
 47                American Lager
 61                 Robust Porter
 6              English Brown Ale
 77         English Oatmeal Stout
 56                German Pilsner
 84     American Imperial Red Ale
 35            Belgian Blonde Ale
 Name: style, dtype: object,
 [(73, 0.31344174811212283),
  (9, 0.28589101263717653),
  (108, 0.24756912829268524),
  (47, 0.24651832928826289),
  (61, 0.2375379569324256),
  (6, 0.2317536245633485),
  (77, 0.22291150816383437),
  (56, 0.21910200787490658),
  (84, 0.21895100918694296),
  (35, 0.21792004382078195)])

In [12]:
get_recommendations('Winter Warmer')

(6             English Brown Ale
 75      American Imperial Stout
 5            American Brown Ale
 47               American Lager
 46    American Imperial Pilsner
 83          American Barleywine
 48         American Light Lager
 9            American Black Ale
 66          Herb and Spice Beer
 12            German Roggenbier
 Name: style, dtype: object,
 [(6, 0.15449931488540311),
  (75, 0.14958886525917078),
  (5, 0.14672253816052311),
  (47, 0.12644064141309522),
  (46, 0.12428693930044885),
  (83, 0.1240413347258571),
  (48, 0.11742830845513819),
  (9, 0.11374584535704388),
  (66, 0.11020752669297315),
  (12, 0.10999718448237814)])

In [13]:
get_recommendations('American Wild Ale')

(102                  American Brett
 109                Flanders Red Ale
 37                   Belgian Saison
 92                  English Old Ale
 30     English India Pale Ale (IPA)
 64                    Finnish Sahti
 107                  Belgian Lambic
 29                      Belgian IPA
 105            Belgian Fruit Lambic
 98                  Berliner Weisse
 Name: style, dtype: object,
 [(102, 0.14185855370204725),
  (109, 0.11963720727231932),
  (37, 0.11411546375443843),
  (92, 0.097585990593295785),
  (30, 0.095756324308394425),
  (64, 0.092310631132708348),
  (107, 0.091496325800926467),
  (29, 0.087294676287866746),
  (105, 0.077400736422219146),
  (98, 0.073541482481532111)])

In [14]:
get_recommendations('Smoke Beer')

(62                    Smoke Porter
 18                German Rauchbier
 51    European Export / Dortmunder
 13                    Scottish Ale
 4                German Weizenbock
 3                   German Maibock
 56                  German Pilsner
 50               Bohemian Pilsener
 1                German Doppelbock
 21                    Vienna Lager
 Name: style, dtype: object,
 [(62, 0.31352965868961907),
  (18, 0.27358363753385462),
  (51, 0.21046624253569035),
  (13, 0.15766139001133622),
  (4, 0.14883633675741054),
  (3, 0.14650939712450159),
  (56, 0.13590036763768243),
  (50, 0.1335841206822127),
  (1, 0.13009386906243459),
  (21, 0.1297855455040878)])