In [1]:
# Dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf

In [3]:
# Read in data
beer_df = pd.read_csv("./beer_recommender/Resources/updated_beer_profile_and_ratings.csv")
beer_df

Unnamed: 0,Name,Style,Brewery,Beer Name (Full),Description,ABV,Min IBU,Max IBU,Astringency,Body,...,Fruits,Hoppy,Spices,Malty,review_aroma,review_appearance,review_palate,review_taste,review_overall,number_of_reviews
0,Amber,Altbier,Alaskan Brewing Co.,Alaskan Brewing Co. Alaskan Amber,"Notes:Richly malty and long on the palate, wit...",5.3,25,50,13,32,...,33,57,8,111,3.498994,3.636821,3.556338,3.643863,3.847082,497
1,Double Bag,Altbier,Long Trail Brewing Co.,Long Trail Brewing Co. Double Bag,"Notes:This malty, full-bodied double alt is al...",7.2,25,50,12,57,...,24,35,12,84,3.798337,3.846154,3.904366,4.024948,4.034304,481
2,Long Trail Ale,Altbier,Long Trail Brewing Co.,Long Trail Brewing Co. Long Trail Ale,Notes:Long Trail Ale is a full-bodied amber al...,5.0,25,50,14,37,...,10,54,4,62,3.409814,3.667109,3.600796,3.631300,3.830239,377
3,Doppelsticke,Altbier,Uerige Obergärige Hausbrauerei GmbH / Zum Uerige,Uerige Obergärige Hausbrauerei GmbH / Zum Ueri...,Notes:,8.5,25,50,13,55,...,49,40,16,119,4.148098,4.033967,4.150815,4.205163,4.005435,368
4,Sleigh'r Dark Doüble Alt Ale,Altbier,Ninkasi Brewing Company,Ninkasi Brewing Company Sleigh'r Dark Doüble A...,Notes:Called 'Dark Double Alt' on the label.Se...,7.2,25,50,25,51,...,11,51,20,95,3.625000,3.973958,3.734375,3.765625,3.817708,96
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3192,Winter Shredder,Winter Warmer,Cisco Brewers Inc.,Cisco Brewers Inc. Winter Shredder,Notes:,8.8,35,50,15,37,...,23,45,67,74,4.125000,3.875000,3.875000,3.750000,3.731089,4
3193,The First Snow Ale,Winter Warmer,RJ Rockers Brewing Company,RJ Rockers Brewing Company First Snow Ale,Notes:This hearty American pale ale contains a...,6.0,35,50,15,31,...,54,14,140,58,3.950000,3.750000,3.760000,3.790000,3.730000,50
3194,Red Nose Winter Ale,Winter Warmer,Natty Greene's Pub & Brewing Co.,Natty Greene's Pub & Brewing Co. Red Nose Wint...,Notes:,6.8,35,50,8,44,...,26,21,96,77,3.576923,3.711538,3.596154,3.673077,3.711538,26
3195,Fish Tale Winterfish,Winter Warmer,Fish Brewing Company / Fishbowl Brewpub,Fish Brewing Company / Fishbowl Brewpub Fish T...,Notes:,7.5,35,50,11,36,...,81,110,18,73,3.902299,3.885057,3.862069,3.902299,3.879310,87


In [4]:
# Check null cells
beer_df.isnull().sum()

Name                 0
Style                0
Brewery              0
Beer Name (Full)     0
Description          0
ABV                  0
Min IBU              0
Max IBU              0
Astringency          0
Body                 0
Alcohol              0
Bitter               0
Sweet                0
Sour                 0
Salty                0
Fruits               0
Hoppy                0
Spices               0
Malty                0
review_aroma         0
review_appearance    0
review_palate        0
review_taste         0
review_overall       0
number_of_reviews    0
dtype: int64

In [5]:
# View data types
beer_df.dtypes

Name                  object
Style                 object
Brewery               object
Beer Name (Full)      object
Description           object
ABV                  float64
Min IBU                int64
Max IBU                int64
Astringency            int64
Body                   int64
Alcohol                int64
Bitter                 int64
Sweet                  int64
Sour                   int64
Salty                  int64
Fruits                 int64
Hoppy                  int64
Spices                 int64
Malty                  int64
review_aroma         float64
review_appearance    float64
review_palate        float64
review_taste         float64
review_overall       float64
number_of_reviews      int64
dtype: object

In [6]:
# Number of Unique Values for Cat. Variables
beer_df.select_dtypes(include=object).nunique()

Name                3066
Style                 43
Brewery              934
Beer Name (Full)    3197
Description         1841
dtype: int64

In [9]:
style_counts = beer_df['Style'].value_counts()
style_counts

Lager                              567
Stout                              254
Wheat Beer                         206
Porter                             164
IPA                                163
Bock                               155
Strong Ale                         148
Lambic                             100
Pale Ale                            98
Brown Ale                           94
Red Ale                             90
Pilsner                             86
Bitter                              74
Farmhouse Ale                       72
Blonde Ale                          64
Barleywine                          64
Sour                                57
Mild Ale                            47
Dubbel                              41
Winter Warmer                       40
Altbier                             39
Tripel                              37
Rye Beer                            37
Fruit and Field Beer                37
Scotch Ale / Wee Heavy              35
Quadrupel (Quad)         

In [11]:
replace_styles = list(style_counts[style_counts < 23].index)

# Replace in DataFrame
for style in replace_styles:
    beer_df['Style'] = beer_df['Style'].replace(style,"Other")

# Check to make sure binning was successful
beer_df['Style'].value_counts()

Lager                     567
Stout                     254
Wheat Beer                206
Porter                    164
IPA                       163
Bock                      155
Strong Ale                148
Lambic                    100
Pale Ale                   98
Brown Ale                  94
Red Ale                    90
Pilsner                    86
Other                      85
Bitter                     74
Farmhouse Ale              72
Barleywine                 64
Blonde Ale                 64
Sour                       57
Mild Ale                   47
Dubbel                     41
Winter Warmer              40
Altbier                    39
Rye Beer                   37
Tripel                     37
Fruit and Field Beer       37
Scotch Ale / Wee Heavy     35
Old Ale                    34
Quadrupel (Quad)           34
Scottish Ale               34
Low Alcohol Beer           32
Kölsch                     32
Pumpkin Beer               30
Smoked Beer                28
Chile Beer