In [1]:
# Machine Learning toolkit
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler,OneHotEncoder
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint

# Python SQL toolkit and Object Relational Mapper
import sqlite3

In [2]:
con = sqlite3.connect("./beer_data.sqlite")
c = con.cursor()

In [3]:
beers_df = pd.read_sql_query("SELECT * FROM taste_profiles", con)
beer_stats = pd.read_sql_query("SELECT * FROM beer_stats", con).drop('Full_Beer_Name', axis=1, inplace=True)
reviews_df = pd.read_sql_query("SELECT * FROM reviews", con)

In [4]:
# Drop beer and brewery names (for simple test model)
beers_df.drop(['Full_Beer_Name', 'Brewery'], axis=1, inplace=True)
beers_df.head()

Unnamed: 0,Style,ABV,Min_IBU,Max_IBU,Astringency,Body,Alcohol,Bitter,Sweet,Sour,Salty,Fruits,Hoppy,Spices,Malty
0,Altbier,5.3,25,50,13,32,9,47,74,33,0,33,57,8,111
1,Altbier,7.2,25,50,12,57,18,33,55,16,0,24,35,12,84
2,Altbier,5.0,25,50,14,37,6,42,43,11,0,10,54,4,62
3,Altbier,8.5,25,50,13,55,31,47,101,18,1,49,40,16,119
4,Altbier,7.2,25,50,25,51,26,44,45,9,1,11,51,20,95


In [5]:
scaler = MinMaxScaler()

def scale_col_by_row(df, cols):
    # Scale values by row
    scaled_cols = pd.DataFrame(scaler.fit_transform(df[cols].T).T, columns=cols)
    df[cols] = scaled_cols
    return df

def scale_col_by_col(df, cols):
    # Scale values by column
    scaled_cols = pd.DataFrame(scaler.fit_transform(df[cols]), columns=cols)
    df[cols] = scaled_cols
    return df

# Scale values in tasting profile features (across rows)
beers_df = scale_col_by_row(beers_df, beers_df.columns[4:15])

# Scale values in tasting profile features (across columns)
beers_df = scale_col_by_col(beers_df, beers_df.columns[4:15])

# Scale values in chemical features (across columns)
beers_df = scale_col_by_col(beers_df, beers_df.columns[1:4])

# Peak at re-scaled data
beers_df.head()

Unnamed: 0,Style,ABV,Min_IBU,Max_IBU,Astringency,Body,Alcohol,Bitter,Sweet,Sour,Salty,Fruits,Hoppy,Spices,Malty
0,Altbier,0.092174,0.384615,0.5,0.117117,0.288288,0.081081,0.423423,0.666667,0.297297,0.0,0.297297,0.513514,0.072072,1.0
1,Altbier,0.125217,0.384615,0.5,0.142857,0.678571,0.214286,0.392857,0.654762,0.190476,0.0,0.285714,0.416667,0.142857,1.0
2,Altbier,0.086957,0.384615,0.5,0.225806,0.596774,0.096774,0.677419,0.693548,0.177419,0.0,0.16129,0.870968,0.064516,1.0
3,Altbier,0.147826,0.384615,0.5,0.101695,0.457627,0.254237,0.389831,0.847458,0.144068,0.0,0.40678,0.330508,0.127119,1.0
4,Altbier,0.125217,0.384615,0.5,0.255319,0.531915,0.265957,0.457447,0.468085,0.085106,0.0,0.106383,0.531915,0.202128,1.0


In [6]:
enc = OneHotEncoder(sparse=False)

# Fit the encoder and produce encoded DataFrame
encode_df = pd.DataFrame(enc.fit_transform(beers_df['Style'].values.reshape(-1,1)))

# Rename encoded columns
encode_df.columns = enc.get_feature_names(['Style'])
encode_df.head()



Unnamed: 0,Style_Altbier,Style_Barleywine,Style_Bitter,Style_Blonde Ale,Style_Bock,Style_Brown Ale,Style_Chile Beer,Style_Cream Ale,Style_Dubbel,Style_Farmhouse Ale,...,Style_Scotch Ale / Wee Heavy,Style_Scottish Ale,Style_Smoked Beer,Style_Sour,Style_Stout,Style_Strong Ale,Style_Tripel,Style_Wheat Beer,Style_Wild Ale,Style_Winter Warmer
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
# Merge the two DataFrames together and drop the Style column
encoded_styles_df = beers_df.merge(encode_df,left_index=True,right_index=True).drop("Style",1)
encoded_styles_df

  


Unnamed: 0,ABV,Min_IBU,Max_IBU,Astringency,Body,Alcohol,Bitter,Sweet,Sour,Salty,...,Style_Scotch Ale / Wee Heavy,Style_Scottish Ale,Style_Smoked Beer,Style_Sour,Style_Stout,Style_Strong Ale,Style_Tripel,Style_Wheat Beer,Style_Wild Ale,Style_Winter Warmer
0,0.092174,0.384615,0.5,0.117117,0.288288,0.081081,0.423423,0.666667,0.297297,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.125217,0.384615,0.5,0.142857,0.678571,0.214286,0.392857,0.654762,0.190476,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.086957,0.384615,0.5,0.225806,0.596774,0.096774,0.677419,0.693548,0.177419,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.147826,0.384615,0.5,0.101695,0.457627,0.254237,0.389831,0.847458,0.144068,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.125217,0.384615,0.5,0.255319,0.531915,0.265957,0.457447,0.468085,0.085106,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3192,0.153043,0.538462,0.5,0.202703,0.500000,0.324324,0.472973,0.621622,0.256757,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3193,0.104348,0.538462,0.5,0.107143,0.221429,0.164286,0.114286,0.385714,0.307143,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3194,0.118261,0.538462,0.5,0.083333,0.458333,0.250000,0.197917,0.541667,0.218750,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3195,0.130435,0.538462,0.5,0.100000,0.327273,0.454545,0.636364,0.654545,0.536364,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [8]:
# Split our preprocessed data into our features and target arrays
y = encoded_styles_df[encoded_styles_df.columns[14:]].values
X = encoded_styles_df[encoded_styles_df.columns[:14]].values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=78)

In [9]:
# Define the checkpoint path and filenames
os.makedirs("./ML_Weight_Checkpoints/", exist_ok=True)
checkpoint_path = "./ML_Weight_Checkpoints/seg1_basic_model.h5"

# Create a callback that saves the model's weights every 5 epochs
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    save_freq='epoch',
    period=10)



In [10]:
# Define the model
number_input_features = len(X_train[0])
hidden_nodes_layer1 = 15
hidden_nodes_layer2 = 12
hidden_nodes_layer3 = 8

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Third hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=38, activation="softmax"))

# Check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 15)                225       
                                                                 
 dense_1 (Dense)             (None, 12)                192       
                                                                 
 dense_2 (Dense)             (None, 8)                 104       
                                                                 
 dense_3 (Dense)             (None, 38)                342       
                                                                 
Total params: 863
Trainable params: 863
Non-trainable params: 0
_________________________________________________________________


In [11]:
# Compile the model
nn.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [12]:
# Train the model
fit_model = nn.fit(X_train,y_train,epochs=200, callbacks=[cp_callback])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 10: saving model to ./ML_Weight_Checkpoints\seg1_basic_model.h5
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 20: saving model to ./ML_Weight_Checkpoints\seg1_basic_model.h5
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 30: saving model to ./ML_Weight_Checkpoints\seg1_basic_model.h5
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 40: saving model to ./ML_Weight_Checkpoints\seg1_basic_model.h5
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 50: saving model to ./ML_Weight_Checkpoints\seg1_basic_model.h5
Epoch 51/

In [13]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

25/25 - 0s - loss: 1.2603 - accuracy: 0.6062 - 279ms/epoch - 11ms/step
Loss: 1.260319471359253, Accuracy: 0.606249988079071


In [14]:
# Test model with data from a random beer, simulating user's preference data
user_input = beers_df.sample().iloc[0][1:15].values
user_input = user_input.reshape(1,-1)
user_input

array([[0.08695652173913043, 0.7692307692307693, 0.7000000000000001,
        0.10144927536231885, 0.5144927536231884, 0.08695652173913043,
        0.9420289855072463, 0.19565217391304343, 0.3043478260869565, 0.0,
        0.3550724637681159, 0.9999999999999998, 0.07971014492753623,
        0.5507246376811593]], dtype=object)

In [17]:
# Output beer style based on taste profile inputs
index = nn.predict(np.asarray(user_input).astype(np.float64)).argmax()
print(f"Predicted Beer Style: {encode_df.columns[index].split('_', 1)[1]}")

Predicted Beer Style: IPA
