In [24]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors

In [25]:
#View Data
data = pd.read_csv("beer_profile_and_ratings.csv")
data.head()

Unnamed: 0,Name,Style,Brewery,Beer Name (Full),Description,ABV,Min IBU,Max IBU,Astringency,Body,...,Fruits,Hoppy,Spices,Malty,review_aroma,review_appearance,review_palate,review_taste,review_overall,number_of_reviews
0,Amber,Altbier,Alaskan Brewing Co.,Alaskan Brewing Co. Alaskan Amber,"Notes:Richly malty and long on the palate, wit...",5.3,25,50,13,32,...,33,57,8,111,3.498994,3.636821,3.556338,3.643863,3.847082,497
1,Double Bag,Altbier,Long Trail Brewing Co.,Long Trail Brewing Co. Double Bag,"Notes:This malty, full-bodied double alt is al...",7.2,25,50,12,57,...,24,35,12,84,3.798337,3.846154,3.904366,4.024948,4.034304,481
2,Long Trail Ale,Altbier,Long Trail Brewing Co.,Long Trail Brewing Co. Long Trail Ale,Notes:Long Trail Ale is a full-bodied amber al...,5.0,25,50,14,37,...,10,54,4,62,3.409814,3.667109,3.600796,3.6313,3.830239,377
3,Doppelsticke,Altbier,Uerige Obergärige Hausbrauerei GmbH / Zum Uerige,Uerige Obergärige Hausbrauerei GmbH / Zum Ueri...,Notes:,8.5,25,50,13,55,...,49,40,16,119,4.148098,4.033967,4.150815,4.205163,4.005435,368
4,Sleigh'r Dark Doüble Alt Ale,Altbier,Ninkasi Brewing Company,Ninkasi Brewing Company Sleigh'r Dark Doüble A...,Notes:Called 'Dark Double Alt' on the label.Se...,7.2,25,50,25,51,...,11,51,20,95,3.625,3.973958,3.734375,3.765625,3.817708,96


In [26]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3197 entries, 0 to 3196
Data columns (total 25 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Name               3197 non-null   object 
 1   Style              3197 non-null   object 
 2   Brewery            3197 non-null   object 
 3   Beer Name (Full)   3197 non-null   object 
 4   Description        3197 non-null   object 
 5   ABV                3197 non-null   float64
 6   Min IBU            3197 non-null   int64  
 7   Max IBU            3197 non-null   int64  
 8   Astringency        3197 non-null   int64  
 9   Body               3197 non-null   int64  
 10  Alcohol            3197 non-null   int64  
 11  Bitter             3197 non-null   int64  
 12  Sweet              3197 non-null   int64  
 13  Sour               3197 non-null   int64  
 14  Salty              3197 non-null   int64  
 15  Fruits             3197 non-null   int64  
 16  Hoppy              3197 

In [27]:
print(data.shape)

(3197, 25)


In [28]:
print(data['Style'].value_counts())

Lager - Adjunct                    45
Lager - European Pale              43
Wheat Beer - Hefeweizen            42
Stout - Irish Dry                  42
Lambic - Fruit                     42
                                   ..
Bière de Champagne / Bière Brut     8
Lager - India Pale Lager (IPL)      6
IPA - New England                   4
Brett Beer                          3
Sour - Gose                         3
Name: Style, Length: 111, dtype: int64


In [29]:
print(data['Brewery'].value_counts())

Boston Beer Company (Samuel Adams)    40
Dogfish Head Brewery                  31
Anheuser-Busch                        30
Three Floyds Brewing Co. & Brewpub    29
Victory Brewing Company               23
                                      ..
Asia Pacific Breweries Ltd.            1
Boon Rawd Brewery Co., Ltd             1
Birra Moretti (Heineken)               1
Stella Artois                          1
Natty Greene's Pub & Brewing Co.       1
Name: Brewery, Length: 934, dtype: int64


In [30]:
# List numeric features (columns) of different types
tasting_profile_cols = ['Astringency', 'Body', 'Alcohol', 'Bitter', 'Sweet', 'Sour', 'Salty', 'Fruits', 'Hoppy', 'Spices', 'Malty']
chem_cols = ['ABV', 'Min IBU', 'Max IBU']

In [31]:
def scale_col_by_row(df, cols):
    scaler = MinMaxScaler()
    # Scale values by row
    scaled_cols = pd.DataFrame(scaler.fit_transform(df[cols].T).T, columns=cols)
    df[cols] = scaled_cols
    return df

def scale_col_by_col(df, cols):
    scaler = MinMaxScaler()
    # Scale values by column
    scaled_cols = pd.DataFrame(scaler.fit_transform(df[cols]), columns=cols)
    df[cols] = scaled_cols
    return df

# Scale values in tasting profile features (across rows)
data = scale_col_by_row(data, tasting_profile_cols)

# Scale values in tasting profile features (across columns)
data = scale_col_by_col(data, tasting_profile_cols)

# Scale values in chemical features (across columns)
data = scale_col_by_col(data, chem_cols)

# Peak at re-scaled data
data.head()


Unnamed: 0,Name,Style,Brewery,Beer Name (Full),Description,ABV,Min IBU,Max IBU,Astringency,Body,...,Fruits,Hoppy,Spices,Malty,review_aroma,review_appearance,review_palate,review_taste,review_overall,number_of_reviews
0,Amber,Altbier,Alaskan Brewing Co.,Alaskan Brewing Co. Alaskan Amber,"Notes:Richly malty and long on the palate, wit...",0.092174,0.384615,0.5,0.117117,0.288288,...,0.297297,0.513514,0.072072,1.0,3.498994,3.636821,3.556338,3.643863,3.847082,497
1,Double Bag,Altbier,Long Trail Brewing Co.,Long Trail Brewing Co. Double Bag,"Notes:This malty, full-bodied double alt is al...",0.125217,0.384615,0.5,0.142857,0.678571,...,0.285714,0.416667,0.142857,1.0,3.798337,3.846154,3.904366,4.024948,4.034304,481
2,Long Trail Ale,Altbier,Long Trail Brewing Co.,Long Trail Brewing Co. Long Trail Ale,Notes:Long Trail Ale is a full-bodied amber al...,0.086957,0.384615,0.5,0.225806,0.596774,...,0.16129,0.870968,0.064516,1.0,3.409814,3.667109,3.600796,3.6313,3.830239,377
3,Doppelsticke,Altbier,Uerige Obergärige Hausbrauerei GmbH / Zum Uerige,Uerige Obergärige Hausbrauerei GmbH / Zum Ueri...,Notes:,0.147826,0.384615,0.5,0.101695,0.457627,...,0.40678,0.330508,0.127119,1.0,4.148098,4.033967,4.150815,4.205163,4.005435,368
4,Sleigh'r Dark Doüble Alt Ale,Altbier,Ninkasi Brewing Company,Ninkasi Brewing Company Sleigh'r Dark Doüble A...,Notes:Called 'Dark Double Alt' on the label.Se...,0.125217,0.384615,0.5,0.255319,0.531915,...,0.106383,0.531915,0.202128,1.0,3.625,3.973958,3.734375,3.765625,3.817708,96


In [32]:
# Select data for training (calculating similarity)
df = data.drop(['Name', 'Description', 'review_aroma', 'review_appearance', 'review_palate', 'review_taste', 'review_overall', 'number_of_reviews'],axis=1)

# Use only numeric features
df_num = df.select_dtypes(exclude=['object'])
df_num.head()

Unnamed: 0,ABV,Min IBU,Max IBU,Astringency,Body,Alcohol,Bitter,Sweet,Sour,Salty,Fruits,Hoppy,Spices,Malty
0,0.092174,0.384615,0.5,0.117117,0.288288,0.081081,0.423423,0.666667,0.297297,0.0,0.297297,0.513514,0.072072,1.0
1,0.125217,0.384615,0.5,0.142857,0.678571,0.214286,0.392857,0.654762,0.190476,0.0,0.285714,0.416667,0.142857,1.0
2,0.086957,0.384615,0.5,0.225806,0.596774,0.096774,0.677419,0.693548,0.177419,0.0,0.16129,0.870968,0.064516,1.0
3,0.147826,0.384615,0.5,0.101695,0.457627,0.254237,0.389831,0.847458,0.144068,0.0,0.40678,0.330508,0.127119,1.0
4,0.125217,0.384615,0.5,0.255319,0.531915,0.265957,0.457447,0.468085,0.085106,0.0,0.106383,0.531915,0.202128,1.0


In [33]:

# Random beer selector
user_input = df['Beer Name (Full)'].sample().iloc[0]
print("Beer Name: " + user_input)

# Locate numeric features for selected beer
test_data = data[data["Beer Name (Full)"] == user_input]
num_input = df_num.loc[test_data.index].values

# Detect beer style
style_input = test_data['Style'].iloc[0]
print("Beer Style: " + style_input)

def get_neighbors(data, num_input, style_input, same_style=False):
    if same_style==True:
        # Locate beers of same style
        df_target = data[data["Style"] == style_input].reset_index(drop=True)
    else:
        # Locate beers of different styles
        df_target = data[data["Style"] != style_input].reset_index(drop=True)

    df_target_num = df_num.loc[df_target.index]
    # Calculate similarities (n_neighbors=11 for 10 recommendations)
    search = NearestNeighbors(n_neighbors=11, algorithm='ball_tree').fit(df_target_num)
    _ , queried_indices = search.kneighbors(num_input)
    # Top 10 recommendations
    target_rec_df = df_target.loc[queried_indices[0][1:]]
    target_rec_df = target_rec_df.sort_values(by=['review_overall'], ascending=False)
    target_rec_df = target_rec_df[['Name', 'Brewery', 'Style', 'review_overall', 'number_of_reviews']]
    target_rec_df.index = range(1, 11)
    #target_rec_df.drop('review_overall', axis=1, inplace=True)
    return target_rec_df

# List recommended beers with the same style
top_10_same_style_rec = get_neighbors(data, num_input, style_input, same_style=True)
top_10_same_style_rec

Beer Name: Dogfish Head Brewery 120 Minute IPA
Beer Style: IPA - Imperial




Unnamed: 0,Name,Brewery,Style,review_overall,number_of_reviews
1,Pliny The Elder,Russian River Brewing Company,IPA - Imperial,4.590028,2527
2,Double Jack,Firestone Walker Brewing Co.,IPA - Imperial,4.194842,698
3,Hop Stoopid,Lagunitas Brewing Company,IPA - Imperial,4.15974,1540
4,90 Minute IPA,Dogfish Head Brewery,IPA - Imperial,4.145745,3290
5,Dorado,Ballast Point Brewing Company,IPA - Imperial,4.142638,326
6,Hop JuJu,Fat Heads Brewery & Saloon,IPA - Imperial,4.027778,18
7,Unearthly (Imperial India Pale Ale),Southern Tier Brewing Company,IPA - Imperial,4.01975,1038
8,Lake Erie Monster,Great Lakes Brewing Company,IPA - Imperial,3.924897,486
9,The Waldos' Special Ale,Lagunitas Brewing Company,IPA - Imperial,3.7,5
10,120 Minute IPA,Dogfish Head Brewery,IPA - Imperial,3.549857,1745
