In [47]:
# Import Dependencies
import sqlite3
import csv
import pandas as pd
import plotly.express as px
import hvplot.pandas
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import silhouette_score

In [2]:
# Display all columns
pd.set_option('display.max_columns', None)
# Do not display false positive warnings
pd.options.mode.chained_assignment = None  # default='warn'

In [24]:
# Connect to database
connection = sqlite3.connect('SQL/beer.sqlite')

In [25]:
# Read database table to dataframe
df = pd.read_sql_query("SELECT * from combined_beer_data_major_style", connection)

In [26]:
df.head()

Unnamed: 0,index,beer_id,beer_name,beer_style,style_key,brewery,description,abv,ave_rating,min_ibu,max_ibu,astringency,body,alcohol,bitter,sweet,sour,salty,fruits,hoppy,spices,malty,sum_of_factors,BA_Big_styles
0,0,2813,Golden Faro,Lambic - Faro,118.0,Van Dyck - Mad Jack Brewing,Notes:,4.2,3.88,0.0,10.0,0,0,0,0,0,0,0,0,0,0,0,0,Wild/Sour
1,1,2882,Doesjel,Lambic - Gueuze,120.0,Brouwerij 3 Fonteinen,Notes:,6.0,4.12,0.0,10.0,48,41,4,14,53,160,0,80,26,5,28,459,Wild/Sour
2,2,5393,Anodyne,Wheat Beer - Wheatwine,108.0,Revolver Brewing,Notes:,9.0,3.88,45.0,85.0,0,0,7,0,2,3,0,3,0,1,0,16,Strong Ales
3,3,1,Amber,Altbier,8.0,Alaskan Brewing Co.,"Notes:Richly malty and long on the palate, wit...",5.3,3.65,25.0,50.0,13,32,9,47,74,33,0,33,57,8,111,417,Brown Ales
4,4,2,Double Bag,Altbier,8.0,Long Trail Brewing Co.,"Notes:This malty, full-bodied double alt is al...",7.2,3.9,25.0,50.0,12,57,18,33,55,16,0,24,35,12,84,346,Brown Ales


### Filter Dataframe

In [27]:
# Filter out beers with abv > 13
original_length = len(df)
df = df[df.abv <= 13]
df = df[df.abv >= 3]
new_length = len(df)
print(f'Dropped {original_length - new_length} rows')
print(f'New length: {new_length}')

Dropped 213 rows
New length: 5587


In [28]:
# Remove duplicate beers
original_length = len(df)
df["beer_brewary"] = df.beer_name + df.brewery
df.drop_duplicates(subset=['beer_brewary'], inplace=True)
df.drop(columns = ["beer_brewary"], inplace=True)
new_length = len(df)
print(f'Dropped {original_length - new_length} rows')
print(f'New length: {new_length}')

Dropped 211 rows
New length: 5376


In [29]:
# Filter for entries with over 50 cumulative factors
original_length = len(df)
df = df[df['sum_of_factors'] > 50]
new_length = len(df)
print(f'Dropped {original_length - new_length} rows')
print(f'New length: {new_length}')

Dropped 352 rows
New length: 5024


In [30]:
df.columns

Index(['index', 'beer_id', 'beer_name', 'beer_style', 'style_key', 'brewery',
       'description', 'abv', 'ave_rating', 'min_ibu', 'max_ibu', 'astringency',
       'body', 'alcohol', 'bitter', 'sweet', 'sour', 'salty', 'fruits',
       'hoppy', 'spices', 'malty', 'sum_of_factors', 'BA_Big_styles'],
      dtype='object')

In [31]:
# Drop unnecessary index column
df.drop(columns = ["index"], inplace=True)

### K-Means
First k-means to obtain majoy groupings

In [209]:
# Drop columns unnecessary for analysis
new_df = df.drop(['beer_id', 'beer_name', 'beer_style', 'style_key', 'brewery',
       'description', 'ave_rating', "min_ibu", "max_ibu",
        "sum_of_factors", "BA_Big_styles"], axis=1)

In [210]:
# define standard scaler
scaler = StandardScaler()
# transform data
scaled_df = scaler.fit_transform(new_df)

In [211]:
# Create Elbow Plot
inertia = []
k = list(range(1, 21))

for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(scaled_df)
    inertia.append(km.inertia_)
    
# Define a DataFrame to plot the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", title="Elbow Curve", xticks=k)

In [212]:
silhouette_avg = []
for num_clusters in range(2, 20):

    # Produce model
    model = KMeans(n_clusters=num_clusters)
    model.fit(scaled_df)
    cluster_labels = model.labels_

    # Get silhoutte score
    silhouette_avg.append(silhouette_score(scaled_df, cluster_labels))
print(f'Silhoutte average: {silhouette_avg}')

# Number of clusters for subclass will be number of clusters with greatest silhoutte score
max_silhouette_avg = max(silhouette_avg)
max_index = silhouette_avg.index(max_silhouette_avg)
k = max_index + 2
print("\n")
print(f'Best K: {k}; Max Silhoutte: {max_silhouette_avg}')

Silhoutte average: [0.18206123012985367, 0.20136045360157004, 0.18310459998135187, 0.20880750380064572, 0.21884155620609458, 0.22836783385332, 0.19443568406978867, 0.19414151039245123, 0.19421658910782114, 0.18998530209411083, 0.1914369453566042, 0.19204037270546975, 0.16586544923595514, 0.1637081423190597, 0.15818389281925915, 0.16008536419639466, 0.15551155442159098, 0.15344881737171975]


Best K: 7; Max Silhoutte: 0.22836783385332


In [213]:
# Drop columns unnecessary for analysis
minmax_df = df.drop(['beer_id', 'beer_name', 'beer_style', 'style_key', 'brewery',
       'description', 'ave_rating', "min_ibu", "max_ibu",
        "sum_of_factors", "BA_Big_styles"], axis=1)

In [214]:
# define min max scaler
scaler = MinMaxScaler()
# transform data
minmax_scaled_df = scaler.fit_transform(minmax_df)

In [215]:
# Create Elbow Plot
inertia = []
k = list(range(1, 21))

for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(minmax_scaled_df)
    inertia.append(km.inertia_)
    
# Define a DataFrame to plot the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", title="Elbow Curve", xticks=k)

In [216]:
silhouette_avg = []
for num_clusters in range(2, 20):

    # Produce model
    model = KMeans(n_clusters=num_clusters)
    model.fit(minmax_scaled_df)
    cluster_labels = model.labels_

    # Get silhoutte score
    silhouette_avg.append(silhouette_score(minmax_scaled_df, cluster_labels))
print(f'Silhoutte average: {silhouette_avg}')

# Number of clusters for subclass will be number of clusters with greatest silhoutte score
max_silhouette_avg = max(silhouette_avg)
max_index = silhouette_avg.index(max_silhouette_avg)
k = max_index + 2
print("\n")
print(f'Best K: {k}; Max Silhoutte: {max_silhouette_avg}')

Silhoutte average: [0.21560618889493077, 0.18622869618263804, 0.20534219328642334, 0.22291567721468009, 0.1825373607969787, 0.19535613687358075, 0.1927836173644396, 0.18726691936142248, 0.18253560714142072, 0.18664639356102333, 0.18337175982902756, 0.18010769625441178, 0.17737833798783126, 0.15927093898945585, 0.1752758606720499, 0.17619675928788117, 0.15612624860616253, 0.1585321024086346]


Best K: 5; Max Silhoutte: 0.22291567721468009


Notes: Ran k-means using min/max and standard scaler; the standard scaler resulted in a slightly higher silhoutte score - will proceed with 7 clusters and scale with the standard scaler

In [217]:
# Initializing model with K = 7
clusters = 10
model = KMeans(n_clusters=clusters)

# Fit model
model.fit(scaled_df)

# Get predictions
predictions = model.predict(scaled_df)

# Add a new class column to the df
new_df["class"] = model.labels_
new_df.head()

Unnamed: 0,abv,astringency,body,alcohol,bitter,sweet,sour,salty,fruits,hoppy,spices,malty,class
1,6.0,48,41,4,14,53,160,0,80,26,5,28,0
3,5.3,13,32,9,47,74,33,0,33,57,8,111,6
4,7.2,12,57,18,33,55,16,0,24,35,12,84,6
5,5.0,14,37,6,42,43,11,0,10,54,4,62,6
6,8.5,13,55,31,47,101,18,1,49,40,16,119,8


In [218]:
# Select columns from original df to add to newdf
combined_df = new_df.join(df[["beer_style", "ave_rating", "BA_Big_styles", 'beer_name', 'brewery']])

In [219]:
# View dataframe grouped by major style
combined_df.groupby(["BA_Big_styles"]).median().sort_values(["class"])

Unnamed: 0_level_0,abv,astringency,body,alcohol,bitter,sweet,sour,salty,fruits,hoppy,spices,malty,class,ave_rating
BA_Big_styles,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Wild/Sour,6.0,31.0,30.0,9.0,6.0,57.0,133.0,1.0,87.0,12.0,7.0,24.0,0,4.07
Porters,6.5,10.0,88.0,12.0,53.0,65.0,9.0,0.0,11.5,24.0,17.0,119.0,2,3.965
Stouts,6.8,11.0,91.0,12.0,60.5,62.0,9.0,0.0,10.0,24.0,11.0,117.5,2,3.96
Hybrid Beers,5.5,14.0,38.5,7.0,20.5,40.5,19.5,0.0,22.0,28.0,6.5,49.0,4,3.68
Specialty Beers,5.8,11.0,30.0,10.0,15.0,39.0,16.0,0.0,31.0,19.0,34.0,47.0,4,3.71
Wheat Beers,5.3,15.0,35.0,8.0,17.0,30.0,38.0,0.0,57.0,22.0,24.0,60.0,4,3.68
Pale Ales,5.5,19.0,36.0,9.0,39.0,48.0,39.0,1.0,42.0,60.0,9.0,64.0,5,3.75
Pale Lagers,5.2,21.0,28.0,7.0,32.0,29.0,16.0,2.0,17.0,50.0,5.0,57.0,5,3.54
Bock,7.4,12.0,50.0,20.0,27.0,77.0,23.0,0.0,37.0,27.0,11.0,94.0,6,3.82
Brown Ales,5.5,12.0,51.5,9.0,36.5,62.0,13.0,0.0,16.0,34.0,8.0,103.0,6,3.725


In [220]:
# Create DF for barcharts
bar_df = combined_df.groupby(["class"]).mean().sort_values(["class"])
bar_df.reset_index(inplace=True)
bar_df

Unnamed: 0,class,abv,astringency,body,alcohol,bitter,sweet,sour,salty,fruits,hoppy,spices,malty,ave_rating
0,0,6.332973,35.783784,34.449631,11.813268,14.36855,74.162162,142.272727,1.734644,104.358722,23.452088,12.717445,32.267813,4.08317
1,1,6.631401,12.136187,52.568093,17.571984,25.750973,47.793774,18.256809,1.132296,34.536965,23.182879,98.046693,72.439689,3.760156
2,2,7.32574,13.095672,101.373576,16.904328,75.462415,77.840547,13.621868,0.530752,18.498861,36.08656,18.596811,142.104784,4.003576
3,3,10.66937,9.762963,69.262963,69.374074,34.537037,105.92963,27.996296,0.42963,52.518519,26.444444,31.47037,119.792593,4.073222
4,4,5.754089,10.00213,20.261981,6.2918,14.899894,23.626198,18.021299,0.642173,20.191693,19.774228,9.944622,33.206603,3.502993
5,5,5.199047,28.45,33.609375,7.814063,43.439062,34.939062,31.7375,2.992188,34.126562,68.117188,11.345313,66.629688,3.641438
6,6,5.551998,14.298454,54.502973,10.147444,39.318668,60.878716,16.525565,0.668252,20.022592,39.89893,10.976219,101.470868,3.663639
7,7,7.219583,20.326389,44.538194,15.555556,73.390625,55.300347,54.739583,1.032986,72.255208,99.262153,12.331597,65.904514,3.980503
8,8,8.396932,12.93864,43.20398,29.988391,23.363184,85.87728,39.854063,0.60199,59.686567,28.810945,23.451078,68.623549,3.781575
9,9,4.919231,28.961538,33.153846,9.75,14.173077,44.057692,112.346154,34.576923,80.326923,29.788462,11.480769,47.846154,3.914423


In [221]:
fig = px.bar(bar_df, x="class", y=["abv", "astringency", "body", "alcohol", "bitter", "sweet", "sour",
                                  "salty", "fruits", "hoppy", "spices", "malty"])
fig.show()

### K-Means on Classes

In [222]:
# Create copy of DF to work on

kmeans_df = combined_df.copy()

In [223]:
# Drop columns unnecessary for analysis
df_for_iteration = kmeans_df.drop(['beer_name', 'beer_style', 'brewery',
       'ave_rating', "BA_Big_styles"], axis=1)

In [224]:
df_for_iteration.head()

Unnamed: 0,abv,astringency,body,alcohol,bitter,sweet,sour,salty,fruits,hoppy,spices,malty,class
1,6.0,48,41,4,14,53,160,0,80,26,5,28,0
3,5.3,13,32,9,47,74,33,0,33,57,8,111,6
4,7.2,12,57,18,33,55,16,0,24,35,12,84,6
5,5.0,14,37,6,42,43,11,0,10,54,4,62,6
6,8.5,13,55,31,47,101,18,1,49,40,16,119,8


In [225]:
try:

    del df_with_subclasses
    
except:
    print("dataframe empty already")
    
created_new_df = False

for k_class in range(0,clusters):
    
    # Filter for class and drop class column
    temp_df = df_for_iteration[df_for_iteration["class"] == k_class]
    temp_df.drop(columns = ["class"], inplace = True)
    
    # Subclass will be in columns after the first iteration
    if "subclass" in temp_df.columns:
        temp_df.drop(columns = ["subclass"], inplace = True)
    
    # define standard scaler
    scaler = StandardScaler()
    # transform data
    scaled_df = scaler.fit_transform(temp_df)
    
    silhouette_avg = []
    for num_clusters in range(2,16):
        
        # Produce model
        model = KMeans(n_clusters=num_clusters)
        model.fit(scaled_df)
        cluster_labels = model.labels_
        
        # Get silhoutte score
        silhouette_avg.append(silhouette_score(scaled_df, cluster_labels))
    
#     print(f'Silhoutte score for class {k_class}: {silhouette_avg}')
    
    # Number of clusters for subclass will be number of clusters with greatest silhoutte score
    max_silhouette_avg = max(silhouette_avg)
    max_index = silhouette_avg.index(max_silhouette_avg)
    k = max_index + 2
    
    # Run model with highest silhoutte score
    model = KMeans(n_clusters=k)
    model.fit(scaled_df)
    cluster_labels = model.labels_
    
    temp_df["class"] = k_class
    temp_df["subclass"] = cluster_labels
    
    print(f'Class: {k_class}; Number of Subclasses: {k}; Max Silhoutte: {max_silhouette_avg}')
        
    if created_new_df == False:
        df_with_subclasses = temp_df.copy()
        created_new_df = True
        
    else:
        df_with_subclasses = df_with_subclasses.append(temp_df)

print("Finished")

dataframe empty already
Class: 0; Number of Subclasses: 2; Max Silhoutte: 0.21507971797500494
Class: 1; Number of Subclasses: 3; Max Silhoutte: 0.2051430191651554
Class: 2; Number of Subclasses: 2; Max Silhoutte: 0.16611055228085994
Class: 3; Number of Subclasses: 2; Max Silhoutte: 0.16124147505731182
Class: 4; Number of Subclasses: 3; Max Silhoutte: 0.21039304246306165
Class: 5; Number of Subclasses: 2; Max Silhoutte: 0.21535910963959828
Class: 6; Number of Subclasses: 2; Max Silhoutte: 0.15930657945197874
Class: 7; Number of Subclasses: 6; Max Silhoutte: 0.17289364027429832
Class: 8; Number of Subclasses: 2; Max Silhoutte: 0.14441984392202178
Class: 9; Number of Subclasses: 2; Max Silhoutte: 0.4548375565619257
Finished


In [226]:
# Number of beers per class
df_with_subclasses['class'].value_counts()

4    939
6    841
5    640
8    603
7    576
2    439
0    407
3    270
1    257
9     52
Name: class, dtype: int64

In [227]:
# Number of beers per subclass
df_with_subclasses["class_subclass"] = df_with_subclasses["class"].astype(str) + df_with_subclasses["subclass"].astype(str)
df_with_subclasses['class_subclass'].value_counts()

60    520
50    465
42    376
40    361
80    357
61    321
01    264
21    256
81    246
41    202
70    186
20    183
51    175
11    151
00    143
30    142
31    128
72     96
73     89
71     85
10     84
75     69
74     51
90     48
12     22
91      4
Name: class_subclass, dtype: int64

In [228]:
# View resulting dataframe with subclasses
df_with_subclasses

Unnamed: 0,abv,astringency,body,alcohol,bitter,sweet,sour,salty,fruits,hoppy,spices,malty,class,subclass,class_subclass
1,6.0,48,41,4,14,53,160,0,80,26,5,28,0,1,01
257,9.0,37,27,10,39,86,119,2,111,47,8,22,0,0,00
258,10.0,47,27,10,7,87,157,0,96,9,5,17,0,1,01
261,6.7,58,39,10,28,105,84,1,129,35,8,40,0,0,00
263,12.0,43,55,37,27,149,93,4,140,67,49,61,0,0,00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4457,4.5,59,23,9,7,58,146,25,141,14,10,48,9,0,90
4458,4.8,38,34,24,10,43,145,34,127,26,54,25,9,0,90
4459,4.2,43,34,10,20,31,159,40,123,10,9,37,9,0,90
4460,8.3,47,33,25,6,53,148,29,125,21,1,57,9,0,90


In [229]:
# Combine results with relevant data
results_df = df_with_subclasses.join(df[["beer_style", "ave_rating", "BA_Big_styles", 'beer_name', 'brewery']])

In [230]:
# Make a lower case beer name column, easier for searching
results_df["beer_name_lower"] = results_df.beer_name.apply(lambda x: x.lower())

In [231]:
results_df.head()

Unnamed: 0,abv,astringency,body,alcohol,bitter,sweet,sour,salty,fruits,hoppy,spices,malty,class,subclass,class_subclass,beer_style,ave_rating,BA_Big_styles,beer_name,brewery,beer_name_lower
1,6.0,48,41,4,14,53,160,0,80,26,5,28,0,1,1,Lambic - Gueuze,4.12,Wild/Sour,Doesjel,Brouwerij 3 Fonteinen,doesjel
257,9.0,37,27,10,39,86,119,2,111,47,8,22,0,0,0,Bière de Champagne / Bière Brut,4.3,Hybrid Beers,Nelson Sauvignon (Nelson Sauvin Brut),Mikkeller ApS,nelson sauvignon (nelson sauvin brut)
258,10.0,47,27,10,7,87,157,0,96,9,5,17,0,1,1,Bière de Champagne / Bière Brut,4.51,Hybrid Beers,Blanc De Blancs,Side Project Brewing,blanc de blancs
261,6.7,58,39,10,28,105,84,1,129,35,8,40,0,0,0,Bière de Champagne / Bière Brut,3.94,Hybrid Beers,Two Lights,Allagash Brewing Company,two lights
263,12.0,43,55,37,27,149,93,4,140,67,49,61,0,0,0,Bière de Champagne / Bière Brut,3.88,Hybrid Beers,Jubilee,Victory Brewing Company - Downingtown,jubilee


### App Test

In [241]:
# Beer input
user_input = input("Input a beer: ")
input_beer = user_input.lower()
# input_beer = "Devil's Milk"
input_beer = input_beer.lower() # can remove when switching to manual input

number_to_recommend = 10

if input_beer not in results_df["beer_name_lower"].to_list():
    print("Hmm, that one isn't in the table, only good beers are in this database.")
    
else:
    print("Woo, that beer is in the table, good job!")
    
    duplicates = results_df[results_df.beer_name_lower == input_beer]
    if len(duplicates) > 1:
        duplicate_breweries = duplicates.brewery.values.tolist()
        print(f'There are multiple "{user_input}" beers; what brewery is it from?')
        for i in duplicate_breweries:
            print(i)
        input_brewery = input("Copy/Type the brewery here: ")
        
        while input_brewery not in duplicate_breweries:
            print("\n")
            print("Please input a correct brewery")
            print(f'There are multiple "{user_input}" beers; what brewery is it from?')
            for i in duplicate_breweries:
                print(i)
            input_brewery = input("Copy/Type the brewery here: ")
        
        # Get the input beer class and subclass
        beer_class = results_df.loc[
            (results_df["beer_name_lower"] == input_beer) & 
            (results_df["brewery"] == input_brewery)]["class"].item()
        
        beer_class = results_df.loc[
            (results_df["beer_name_lower"] == input_beer) & 
            (results_df["brewery"] == input_brewery)]["subclass"].item()
        print(f'Beer Class: {beer_class}')
        print(f'Beer Subclass: {beer_subclass}')
        
    else:
    
        # Get the input beer class and subclass
        beer_class = results_df.loc[results_df["beer_name_lower"] == input_beer]["class"].item()
        beer_subclass = results_df.loc[results_df["beer_name_lower"] == input_beer]["subclass"].item()
        input_brewery = results_df.loc[results_df["beer_name_lower"] == input_beer]["brewery"].item()
        print(f'Beer Class: {beer_class}')
        print(f'Beer Subclass: {beer_subclass}')
    
    print('\n')
    
    # Get dataframe of beers in same class and subclass
    similar_df = results_df[(results_df["class"] == beer_class) & (results_df["subclass"] == beer_subclass)]
    similar_df.sort_values("ave_rating", inplace=True, ascending=False)
    recommended_beers = similar_df.beer_name.to_list()
    recommended_brewery = similar_df.brewery.to_list()
    
    # Option 1: Return list of similar beers ranked by are rating
    
    if len(recommended_beers) <= number_to_recommend:
        print("Recommended Beers with similar taste profiles:")
        for x in range(0, number_to_recommend):
            print(f' {recommended_beers[x]} by {recommended_brewery[x]}')
     
    
    
    # Option 2: Calcualte differences from input beer from similar beers, return those with smallest differences
    else: 
        
        in_abv = results_df.loc[
            (results_df["beer_name_lower"] == input_beer) & 
            (results_df["brewery"] == input_brewery)]["abv"].item()
        in_astringency = results_df.loc[
            (results_df["beer_name_lower"] == input_beer) & 
            (results_df["brewery"] == input_brewery)]["astringency"].item()
        in_body = results_df.loc[
            (results_df["beer_name_lower"] == input_beer) & 
            (results_df["brewery"] == input_brewery)]["body"].item()
        in_bitter = results_df.loc[
            (results_df["beer_name_lower"] == input_beer) & 
            (results_df["brewery"] == input_brewery)]["bitter"].item()
        in_sweet = results_df.loc[
            (results_df["beer_name_lower"] == input_beer) & 
            (results_df["brewery"] == input_brewery)]["sweet"].item()
        in_sour = results_df.loc[
            (results_df["beer_name_lower"] == input_beer) & 
            (results_df["brewery"] == input_brewery)]["sour"].item()
        in_fruits = results_df.loc[
            (results_df["beer_name_lower"] == input_beer) & 
            (results_df["brewery"] == input_brewery)]["fruits"].item()
        in_hoppy = results_df.loc[
            (results_df["beer_name_lower"] == input_beer) & 
            (results_df["brewery"] == input_brewery)]["hoppy"].item()
        in_spices = results_df.loc[
            (results_df["beer_name_lower"] == input_beer) & 
            (results_df["brewery"] == input_brewery)]["spices"].item()
        in_malty = results_df.loc[
            (results_df["beer_name_lower"] == input_beer) & 
            (results_df["brewery"] == input_brewery)]["malty"].item()
        in_salty = results_df.loc[
            (results_df["beer_name_lower"] == input_beer) & 
            (results_df["brewery"] == input_brewery)]["salty"].item()
        in_alcohol = results_df.loc[
            (results_df["beer_name_lower"] == input_beer) & 
            (results_df["brewery"] == input_brewery)]["alcohol"].item()


        def calc_differences(abv, astringency, body, bitter, sweet, sour, fruits, hoppy, spices, malty, salty, alcohol):
                value = (abv - in_abv) + (astringency - in_astringency) + (body - in_body) + (bitter - in_bitter) + (sweet - in_sweet) + (sour - in_sour) + (fruits - in_fruits) + (hoppy - in_hoppy) + (spices - in_spices) + (malty - in_malty)+ (salty - in_salty) + (alcohol - in_alcohol)
                value = abs(value)
                return value


        if len(similar_df) > 10:
            similar_df["difference"] = similar_df.apply(lambda row: calc_differences(row['abv'],
                        row['astringency'], row['body'], row['bitter'],
                        row['sweet'], row['sour'], row['fruits'], row['hoppy'],
                        row['spices'], row['malty'], row['salty'], row['alcohol']), axis = 1)
            similar_df.sort_values("difference", inplace=True, ascending=True)
            recommended_beers = similar_df.beer_name.to_list()
            recommended_brewery = similar_df.brewery.to_list()
            recommended_difference = similar_df.difference.to_list()

            print("Recommended Beers with similar taste profiles:\n")
            for x in range(0, number_to_recommend + 1): # starting at index 1 bc index 0 should be the input beer
                if recommended_beers[x].lower() != input_beer.lower():
                    print(f'{recommended_beers[x]} by {recommended_brewery[x]}; difference: {recommended_difference[x]}')

Input a beer: Hoppy feet
Woo, that beer is in the table, good job!
Beer Class: 7
Beer Subclass: 0


Recommended Beers with similar taste profiles:

Centennial IPA by Founders Brewing Company; difference: 0.20000000000000284
Harvest Ale by Southern Tier Brewing Company; difference: 0.29999999999999716
Commodore Perry by Great Lakes Brewing Co.; difference: 0.7000000000000028
Lagunitas Sucks by Lagunitas Brewing Company; difference: 1.1499999999999986
8-Bit Pale Ale by Tallgrass Brewing Co. Tap House; difference: 1.7999999999999972
Anti-Hero IPA by Revolution Brewing; difference: 2.299999999999997
Hop JuJu by Fat Head's Brewery & Saloon; difference: 3.0
Sublimely Self-Righteous Black IPA by Stone Brewing; difference: 3.299999999999997
My Antonia by Dogfish Head Craft Brewery; difference: 3.5
Celebration Fresh Hop IPA by Sierra Nevada Brewing Co.; difference: 6.199999999999999


In [242]:
results_df[results_df.beer_name == "Celebration Fresh Hop IPA"]

Unnamed: 0,abv,astringency,body,alcohol,bitter,sweet,sour,salty,fruits,hoppy,spices,malty,class,subclass,class_subclass,beer_style,ave_rating,BA_Big_styles,beer_name,brewery,beer_name_lower
1458,6.8,19,38,15,66,28,24,0,25,92,5,47,7,0,70,IPA - American,4.17,India Pale Ales,Celebration Fresh Hop IPA,Sierra Nevada Brewing Co.,celebration fresh hop ipa


In [181]:
results_df[results_df.beer_name == "Green Chili Beer"]

Unnamed: 0,abv,astringency,body,alcohol,bitter,sweet,sour,salty,fruits,hoppy,spices,malty,class,subclass,class_subclass,beer_style,ave_rating,BA_Big_styles,beer_name,brewery,beer_name_lower
997,5.25,8,16,10,4,4,4,0,3,3,63,7,1,1,11,Chile Beer,3.29,Specialty Beers,Green Chili Beer,Flat Branch Pub & Brewery,green chili beer


In [182]:
results_df[results_df.beer_name == "Outdoor"]

Unnamed: 0,abv,astringency,body,alcohol,bitter,sweet,sour,salty,fruits,hoppy,spices,malty,class,subclass,class_subclass,beer_style,ave_rating,BA_Big_styles,beer_name,brewery,beer_name_lower
2351,5.3,12,5,0,30,9,7,1,10,38,2,7,1,1,11,Lager - India Pale Lager (IPL),3.96,Pale Lagers,Outdoor,Fieldwork Brewing Co.,outdoor


In [243]:
results_df[results_df["class"] == 3]

Unnamed: 0,abv,astringency,body,alcohol,bitter,sweet,sour,salty,fruits,hoppy,spices,malty,class,subclass,class_subclass,beer_style,ave_rating,BA_Big_styles,beer_name,brewery,beer_name_lower
61,11.00,14,58,36,52,117,29,0,54,67,13,122,3,0,30,Barleywine - American,4.14,Strong Ales,Old Numbskull,AleSmith Brewing Company,old numbskull
62,9.30,17,74,41,66,143,38,1,78,72,11,130,3,0,30,Barleywine - American,4.15,Strong Ales,Flying Mouflan,Tröegs Brewing Company,flying mouflan
63,12.20,7,60,74,48,82,20,0,53,50,20,92,3,0,30,Barleywine - American,4.18,Strong Ales,Bigfoot - Barrel-Aged,Sierra Nevada Brewing Co.,bigfoot - barrel-aged
67,11.00,9,57,67,26,113,20,0,36,23,25,132,3,1,31,Barleywine - American,4.31,Strong Ales,Old Numbskull - Bourbon Barrel-Aged,AleSmith Brewing Company,old numbskull - bourbon barrel-aged
68,12.80,6,39,56,25,101,23,5,45,48,21,134,3,0,30,Barleywine - American,4.29,Strong Ales,Helldorado,Firestone Walker Brewing Co.,helldorado
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5373,13.00,7,41,57,22,54,49,0,58,15,56,77,3,0,30,Wheat Beer - Wheatwine,4.25,Strong Ales,The Revival - Barrel-Aged,Pipeworks Brewing Company,the revival - barrel-aged
5382,11.75,10,43,34,58,142,36,0,39,61,8,118,3,0,30,Wheat Beer - Wheatwine,3.75,Strong Ales,Vernal,Element Brewing Company,vernal
5388,10.00,7,53,52,22,81,30,0,46,23,22,93,3,0,30,Wheat Beer - Wheatwine,3.46,Strong Ales,Manly Men Beer Club El Hefe,Atlantic Brewing Company / Mainely Meat BBQ,manly men beer club el hefe
5529,9.40,7,59,118,36,73,12,0,27,47,45,140,3,1,31,Winter Warmer,3.64,Dark Ales,Barrel Aged Brrrbon (Brothers' Reserve Series),Widmer Brothers Brewing Company,barrel aged brrrbon (brothers' reserve series)


In [245]:
set(results_df.beer_style.to_list())

{'Altbier',
 'Barleywine - American',
 'Barleywine - English',
 'Bitter - English',
 'Bitter - English Extra Special / Strong Bitter (ESB)',
 'Bière de Champagne / Bière Brut',
 'Blonde Ale - American',
 'Blonde Ale - Belgian',
 'Bock - Doppelbock',
 'Bock - Eisbock',
 'Bock - Maibock',
 'Bock - Traditional',
 'Bock - Weizenbock',
 'Braggot',
 'Brett Beer',
 'Brown Ale - American',
 'Brown Ale - Belgian Dark',
 'Brown Ale - English',
 'California Common / Steam Beer',
 'Chile Beer',
 'Cream Ale',
 'Dubbel',
 'Farmhouse Ale - Bière de Garde',
 'Farmhouse Ale - Sahti',
 'Farmhouse Ale - Saison',
 'Fruit and Field Beer',
 'Gruit / Ancient Herbed Ale',
 'Happoshu',
 'Herb and Spice Beer',
 'IPA - American',
 'IPA - Belgian',
 'IPA - Black / Cascadian Dark Ale',
 'IPA - Brut',
 'IPA - English',
 'IPA - Imperial',
 'IPA - New England',
 'Kvass',
 'Kölsch',
 'Lager - Adjunct',
 'Lager - American',
 'Lager - American Amber / Red',
 'Lager - European / Dortmunder Export',
 'Lager - European Dar