In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras import callbacks
import kerastuner as kt
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.utils import to_categorical
import tensorflow.keras.metrics
from sklearn.metrics import f1_score, accuracy_score
import shutil
import warnings
warnings.filterwarnings('ignore')

  import kerastuner as kt


In [2]:
df = pd.read_csv('data/beer_reviews.csv')
df = df.drop(['brewery_id', 'review_profilename', 'beer_name', 'beer_beerid'], axis=1)
df['review_time'] = df['review_time'].astype(float)
df['beer_abv'].fillna((df['beer_abv'].mean()), inplace=True)
df = df.dropna()

In [3]:
features = df.drop('beer_style', axis=1)
target = df['beer_style']

categorical_features = ['brewery_name']
label_encoder = LabelEncoder()
for feature in categorical_features:
    features[feature] = label_encoder.fit_transform(features[feature])

scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

label_encoder = LabelEncoder()
encoded_target = label_encoder.fit_transform(target)
one_hot_target = to_categorical(encoded_target)

X_train, X_test, y_train, y_test = train_test_split(scaled_features, one_hot_target, test_size=0.2, random_state=42)

In [4]:
tuner_directory = 'tuner_directory/classification'

def build_model(hp):
    model = keras.Sequential()
    
    model.add(layers.InputLayer(input_shape=(X_train.shape[1],)))

    for i in range(hp.Int('num_layers', 1, 5)):
        model.add(layers.Dense(units=hp.Int('units_' + str(i), 16, 256, 16),
                               activation='relu'))
        model.add(layers.Dropout(hp.Float('dropout_' + str(i), 0.0, 0.5, step=0.1)))  # Add dropout layer
    
    model.add(layers.Dense(len(label_encoder.classes_), activation='softmax'))

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=5)

tuner = kt.Hyperband(build_model,
                     objective='val_accuracy',
                     max_epochs=30,
                     directory='tuner_directory',
                     project_name='multiclass')

tuner.search(X_train, y_train, epochs=30, validation_data=(X_test, y_test),
             callbacks=[early_stopping], batch_size=512)

best_model = tuner.get_best_models(num_models=1)[0]

loss, accuracy = best_model.evaluate(X_test, y_test)
print(f'Test loss: {loss}, Test accuracy: {accuracy}')

#shutil.rmtree(tuner_directory)

Trial 80 Complete [00h 06m 13s]
val_accuracy: 0.4681362807750702

Best val_accuracy So Far: 0.4681362807750702
Total elapsed time: 01h 55m 21s
INFO:tensorflow:Oracle triggered exit
Test loss: 1.9613968133926392, Test accuracy: 0.4681362807750702


In [5]:
best_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 32)                288       
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense_1 (Dense)             (None, 144)               4752      
                                                                 
 dropout_1 (Dropout)         (None, 144)               0         
                                                                 
 dense_2 (Dense)             (None, 160)               23200     
                                                                 
 dropout_2 (Dropout)         (None, 160)               0         
                                                                 
 dense_3 (Dense)             (None, 256)               4

In [6]:
best_model.save('model_0.47.h5')