In [None]:
# loading the preprocess package from the drive...
import ipynb.fs.full.preprocess_eda as preprocess

[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     C:\Users\agarw\AppData\Roaming\nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to
[nltk_data]     C:\Users\agarw\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\agarw\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


The length of the training data:  7500
The length of the testing data:  4500
The length of the validation data:  3000


In [None]:
# importing the required packages...
import tensorflow as tf
import keras
from tensorflow.keras.models import Sequential

In [None]:
# importing keras tuner for hyperparameter tuning
import keras_tuner

In [None]:
# creating class objects for datasets 
train_obj = preprocess.pre_process(preprocess.train_df)
val_obj = preprocess.pre_process(preprocess.val_df)
test_obj = preprocess.pre_process(preprocess.test_df)

In [None]:
# preprocessing the data...
# training
x_train = train_obj.preprocess()
x_train = train_obj.lemmatise()
# validation
x_val = val_obj.preprocess()
x_val = val_obj.lemmatise()
# testing
x_test = test_obj.preprocess()
x_test = test_obj.lemmatise()

In [None]:
# encoding the tragets into one-hot
y_train = train_obj.encode_class()
y_val = val_obj.encode_class()
y_test = test_obj.encode_class()

In [None]:
#setting number of classes/tragets
num_class = y_train.shape[1]
# setting vocabulary size 
vocab_size = 50000
# setting the oputput sequence length for textvector layer
text_seq_len = 45

In [None]:
# creating a keras text vector layer 
textvector_layer = keras.layers.TextVectorization(max_tokens=vocab_size, 
                                                  ngrams=(1, 2, 3),
                                                  output_mode="int", 
                                                  output_sequence_length=text_seq_len)
# learning the vocabulary in the dataset from training data
textvector_layer.adapt(x_train)

In [None]:
# displaying the learned vocabulary
textvector_layer.get_vocabulary()

['',
 '[UNK]',
 'need',
 'please',
 'card',
 'tell',
 'know',
 'get',
 'credit',
 'account',
 'want',
 'would',
 'car',
 'list',
 'like',
 'change',
 'time',
 'go',
 'bank',
 'new',
 'many',
 'bill',
 'would like',
 'make',
 'reservation',
 'much',
 'day',
 'take',
 'credit card',
 'find',
 'name',
 'help',
 'long',
 'next',
 'visa',
 'oil',
 'set',
 'let',
 'use',
 'check',
 'pay',
 'flight',
 'date',
 'phone',
 'song',
 'shopping',
 'tire',
 'need know',
 'vacation',
 'number',
 'good',
 'last',
 'call',
 'one',
 'meeting',
 'add',
 'shopping list',
 'going',
 'order',
 'current',
 'march',
 'give',
 'put',
 'limit',
 'gas',
 'today',
 'travel',
 'transaction',
 'request',
 'insurance',
 'could',
 'score',
 'playlist',
 'calendar',
 'credit score',
 'tax',
 'say',
 'report',
 'point',
 'reminder',
 'right',
 'thing',
 'way',
 'schedule',
 'location',
 'credit limit',
 'ai',
 'rate',
 'kind',
 'bank account',
 'dollar',
 'let know',
 'pm',
 'lost',
 'setting',
 'see',
 'chase',
 'look

In [10]:
class models():
    def __init__(self, num_class, vocab_size, embedding_seq_length, 
                 textvector_layer=textvector_layer):
        # self.hp = hp
        self.num_class = num_class 
        self.vocab_size = vocab_size
        self.embedding_seq_length = embedding_seq_length
        self.textvector_layer = textvector_layer

    def base_layer(self):
        model = Sequential()
        model.add(self.textvector_layer)
        model.add(keras.layers.Embedding(self.vocab_size+2,
                                         self.embedding_seq_length,
                                         trainable="True"))
        return model
    
    def top_layer(self, model):
        model.add(keras.layers.Flatten())
        model.add(keras.layers.Dense(self.num_class, activation="softmax"))
    
    def single_lstm(self, lstm_units, rate, activation):
        model = self.base_layer()
        model.add(keras.layers.LSTM(lstm_units, 
                       dropout=rate,
                       recurrent_dropout=rate,
                       activation=activation))
        self.top_layer(model=model)

        return model
    
    def stacked_lstm(self, lstm_units1, lstm_units2, activation, 
                     rate1=0.2, rate2=0.2):
        model = self.base_layer()

        model.add(keras.layers.LSTM(lstm_units1,
                                    activation=activation,
                                    dropout=rate1,
                                    recurrent_dropout=rate1,
                                    return_sequences=True))

        model.add(keras.layers.LSTM(lstm_units2,
                                    activation=activation,
                                    dropout=rate2,
                                    recurrent_dropout=rate2))
        self.top_layer(model=model)

    def convo_lstm(self, convo_units, kernal_size, convo_rate, convo_activaton,
                   lstm_units, lstm_rate, lstm_activation):
        model = self.base_layer()

        model.add(keras.layers.Conv1D(units=convo_units, 
                                      kernal_size=kernal_size,
                                      activation=convo_activaton,
                                      padding="same"))
        model.dropout(convo_rate)
        model.add(keras.layers.LSTM(lstm_units,
                                    dropout=lstm_rate,
                                    recurrent_dropout=lstm_rate,
                                    activation=lstm_activation))
        self.top_layer(model=model)
        
        return model

In [11]:
# class hyper_model(keras_tuner.HyperModel):
#         def build(self, hp):
#             lstm = models(num_class=num_class, 
#                            vocab_size=vocab_size, 
#                            embedding_seq_length=hp.Int("embedding_seq_length", 
#                                                        min_value=10, 
#                                                        max_value=150, 
#                                                        step=10))
            
#             activations = ["relu", "tanh", "sigmoid",
#                            "elu", "exponential", "selu"]
            
                                
#             which_model = hp.Choice("which_model",
#                                     values=["stacked_lstm", 
#                                             "convo_lstm",
#                                             "single_lstm"])
#             if(which_model == "single_lstm"):
#                   model = lstm.single_lstm(lstm_units=hp.Int("lstm_units", 
#                                                              min_value=25, 
#                                                              max_value=200, 
#                                                              step=5), 
#                                            rate=hp.Float("rate", 
#                                                          min_value=0.2,
#                                                          max_value=0.7, 
#                                                          step=0.025),
#                                            activation=hp.Choice("activation", 
#                                                                 values=activations))

#             elif(which_model == "stacked_lstm"):
#                   model = lstm.stacked_lstm(lstm_units1=hp.Int("lstm_units1", 
#                                                              min_value=25, 
#                                                              max_value=200, 
#                                                              step=5),
#                                              lstm_units2=hp.Int("lstm_units2", 
#                                                              min_value=25, 
#                                                              max_value=200, 
#                                                              step=5),
#                                              rate1=hp.Float("rate1", 
#                                                          min_value=0.2,
#                                                          max_value=0.7, 
#                                                          step=0.025),
#                                              rate2=hp.Float("rate2", 
#                                                          min_value=0.2,
#                                                          max_value=0.7, 
#                                                          step=0.025),
#                                            activation=hp.Choice("activation", 
#                                                                 values=activations))
                  
#             elif(which_model == "convo_lstm"):
#                   convo_units = hp.Int("convo_uints", min_value=32, 
#                                        max_value=512, step=32)
#                   kernal_size = hp.Int("kernal_size", min_value=2, 
#                                        max_value=6, step=1)
#                   convo_rate = hp.Float("convo_rate", min_value=0.2, 
#                                         max_value=0.7, step=0.025)
#                   convo_activaton = hp.Choice("convo_activation", values=activations)
#                   lstm_units = hp.Int("lstm_units", min_value=25, 
#                                       max_value=200, step=5)
#                   lstm_rate = hp.Float("lstm_rate", min_value=0.2, 
#                                         max_value=0.7, step=0.025)
#                   lstm_activation = hp.Choice("lstm_activation", values=activations)

#                   model = lstm.convo_lstm(convo_units=convo_units, kernal_size=kernal_size, convo_rate=convo_rate,
#                                           convo_activaton=convo_activaton, lstm_units=lstm_units, lstm_rate=lstm_rate, 
#                                           lstm_activation=lstm_activation)

#             lr = hp.Float("learning_rate", min_value=0.001, 
#                           max_value=0.02, sampling="log")
            
#             optimizer_dict = {
#                   "SGD" : keras.optimizers.SGD(lr),
#                   "Adam" : keras.optimizers.Adam(lr),
#                   "Nadam" : keras.optimizers.Nadam(lr),
#                   "Adamax" : keras.optimizers.Adamax(lr),
#                   "RMSprop" : keras.optimizers.RMSprop(lr)
#             }

#             optimizer = hp.Choice("optimizer", 
#                                   values=["SGD", "Adam", "Nadam", 
#                                           "Adamax", "RMSprop"])

#             model.compile(optimizer=optimizer_dict[optimizer], 
#                           loss=keras.losses.CategoricalCrossentropy(),
#                           metrics=[keras.metrics.Accuracy(), 
#                                    keras.metrics.F1Score(),
#                                    keras.metrics.Precision()])
            
#             return model
        
#         def fit(self, hp, model, *args, **kwargs):
#               return model.fit(*args, 
#                                shuffle = hp.Boolean("shuffle"),
#                                **kwargs)

In [23]:
class lstm_hypermodel(keras_tuner.HyperModel):
        def build(self, hp):
            lstm = models(num_class=num_class,
                          vocab_size=vocab_size, 
                          embedding_seq_length=hp.Int("embedding_seq_length", 
                                                      min_value=10, 
                                                      max_value=150, 
                                                      step=10))
            
            activations = ["tanh", "sigmoid", "elu", "selu"]
        
            model = lstm.single_lstm(lstm_units=hp.Int("lstm_units", min_value=25, 
                                                       max_value=200, step=5), 
                                     rate=hp.Float("rate", min_value=0.2, 
                                                   max_value=0.7, step=0.025),
                                     activation=hp.Choice("activation", 
                                                          values=activations))

            lr = hp.Float("learning_rate", min_value=0.001, 
                          max_value=0.02, sampling="log") 
            
            optimizer_dict = {
                  "SGD" : keras.optimizers.SGD(lr),
                  "Adam" : keras.optimizers.Adam(lr),
                  "Nadam" : keras.optimizers.Nadam(lr),
                  "Adamax" : keras.optimizers.Adamax(lr),
                  "RMSprop" : keras.optimizers.RMSprop(lr)
            }

            optimizer = hp.Choice("optimizer", 
                                  values=["Adam", "SGD", "Nadam", 
                                          "Adamax", "RMSprop"])

            model.compile(optimizer=optimizer_dict[optimizer], 
                          loss=keras.losses.CategoricalCrossentropy(),
                          metrics=[keras.metrics.Accuracy(), 
                                   keras.metrics.F1Score(),
                                   keras.metrics.Precision()])
            
            return model
        
        def fit(self, hp, model, *args, **kwargs):
              return model.fit(*args, 
                               shuffle = hp.Boolean("shuffle", 
                                                    default=True),
                               **kwargs)

In [None]:
class stacked_lstm_hypermodel(keras_tuner.HyperModel):
        def build(self, hp):
            lstm = models(num_class=num_class, 
                           vocab_size=vocab_size, 
                           embedding_seq_length=hp.Int("embedding_seq_length", 
                                                       min_value=10, 
                                                       max_value=150, 
                                                       step=10))
            
            activations = ["tanh", "sigmoid", "elu", "selu"]


            model = lstm.stacked_lstm(lstm_units1=hp.Int("lstm_units1", min_value=25, max_value=200, step=5),
                                      lstm_units2=hp.Int("lstm_units2", min_value=25, max_value=200, step=5),
                                      rate1=hp.Float("rate1", min_value=0.2, max_value=0.7, step=0.025),
                                      rate2=hp.Float("rate2", min_value=0.2, max_value=0.7, step=0.025), 
                                      activation=hp.Choice("activation", values=activations))

            lr = hp.Float("learning_rate", min_value=0.001, 
                          max_value=0.02, sampling="log")
            
            optimizer_dict = {
                  "SGD" : keras.optimizers.SGD(lr),
                  "Adam" : keras.optimizers.Adam(lr),
                  "Nadam" : keras.optimizers.Nadam(lr),
                  "Adamax" : keras.optimizers.Adamax(lr),
                  "RMSprop" : keras.optimizers.RMSprop(lr)
            }

            optimizer = hp.Choice("optimizer", 
                                  values=["Adam", "SGD", "Nadam", 
                                          "Adamax", "RMSprop"])

            model.compile(optimizer=optimizer_dict[optimizer], 
                          loss=keras.losses.CategoricalCrossentropy(),
                          metrics=[keras.metrics.Accuracy(), 
                                   keras.metrics.F1Score(),
                                   keras.metrics.Precision()])
            
            return model
        
        def fit(self, hp, model, *args, **kwargs):
              return model.fit(*args, 
                               shuffle = hp.Boolean("shuffle", 
                                                    default=True),
                               **kwargs)

In [None]:
class convo_lstm_hypermodel(keras_tuner.HyperModel):
        def build(self, hp):
            lstm = models(num_class=num_class, 
                          vocab_size=vocab_size, 
                          embedding_seq_length=hp.Int("embedding_seq_length", 
                                                      min_value=10, max_value=150, 
                                                      step=10))
                      
            convo_units = hp.Int("convo_uints", min_value=32, 
                                max_value=512, step=32)
            kernal_size = hp.Int("kernal_size", min_value=2, 
                                max_value=6, step=1)
            convo_rate = hp.Float("convo_rate", min_value=0.2, 
                                max_value=0.7, step=0.025)
            convo_activaton = hp.Choice("convo_activation", 
                                        values=["relu", "tanh", 
                                                "sigmoid", "elu", 
                                                "exponential", "selu"])
            
            lstm_units = hp.Int("lstm_units", min_value=25, 
                                max_value=200, step=5)
            lstm_rate = hp.Float("lstm_rate", min_value=0.2, 
                                max_value=0.7, step=0.025)
            lstm_activation = hp.Choice("lstm_activation", 
                                        values=["tanh", "sigmoid", 
                                                "elu", "selu"])

            model = lstm.convo_lstm(convo_units=convo_units, 
                                    kernal_size=kernal_size, 
                                    convo_rate=convo_rate,
                                    convo_activaton=convo_activaton, 
                                    lstm_units=lstm_units, 
                                    lstm_rate=lstm_rate, 
                                    lstm_activation=lstm_activation)

            lr = hp.Float("learning_rate", min_value=0.001, 
                          max_value=0.02, sampling="log")
            
            optimizer_dict = {
                  "SGD" : keras.optimizers.SGD(lr),
                  "Adam" : keras.optimizers.Adam(lr),
                  "Nadam" : keras.optimizers.Nadam(lr),
                  "Adamax" : keras.optimizers.Adamax(lr),
                  "RMSprop" : keras.optimizers.RMSprop(lr)
            }

            optimizer = hp.Choice("optimizer", 
                                  values=["Adam", "SGD", "Nadam", 
                                          "Adamax", "RMSprop"])

            model.compile(optimizer=optimizer_dict[optimizer], 
                          loss=keras.losses.CategoricalCrossentropy(),
                          metrics=[keras.metrics.Accuracy(), 
                                   keras.metrics.F1Score(),
                                   keras.metrics.Precision()])
            
            return model
        
        def fit(self, hp, model, *args, **kwargs):
              return model.fit(*args, 
                               shuffle = hp.Boolean("shuffle", default=True),
                               **kwargs)

In [15]:
hp = keras_tuner.HyperParameters()

In [20]:
single_lstm_tuner = keras_tuner.BayesianOptimization(lstm_hypermodel(),
                                                     objective="val_accuracy",
                                                     max_trials=15,
                                                     executions_per_trial=4,
                                                     directory="lstm", 
                                                     project_name="single_lstm_model",
                                                     overwrite=True)

In [21]:
single_lstm_tuner.search_space_summary()

Search space summary
Default search space size: 6
embedding_seq_length (Int)
{'default': None, 'conditions': [], 'min_value': 10, 'max_value': 150, 'step': 10, 'sampling': 'linear'}
lstm_units (Int)
{'default': None, 'conditions': [], 'min_value': 25, 'max_value': 200, 'step': 5, 'sampling': 'linear'}
rate (Float)
{'default': 0.2, 'conditions': [], 'min_value': 0.2, 'max_value': 0.7, 'step': 0.025, 'sampling': 'linear'}
activation (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'tanh', 'sigmoid', 'elu', 'exponential', 'selu'], 'ordered': False}
learning_rate (Float)
{'default': 0.001, 'conditions': [], 'min_value': 0.001, 'max_value': 0.02, 'step': None, 'sampling': 'log'}
optimizer (Choice)
{'default': 'Adam', 'conditions': [], 'values': ['Adam', 'SGD', 'Nadam', 'Adamax', 'RMSprop'], 'ordered': False}


In [22]:
single_lstm_tuner.search(x_train.iloc[:3000], y_train[:3000, :], epochs=30, validation_data=(x_val.iloc[:800], y_val[:800, ]))

Trial 4 Complete [00h 14m 00s]
val_accuracy: 0.0

Best val_accuracy So Far: 0.0
Total elapsed time: 00h 46m 35s

Search: Running Trial #5

Value             |Best Value So Far |Hyperparameter
110               |120               |embedding_seq_length
140               |170               |lstm_units
0.475             |0.625             |rate
exponential       |exponential       |activation
0.0056901         |0.015908          |learning_rate
Nadam             |Adam              |optimizer
False             |True              |shuffle

Epoch 1/30
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 151ms/step - accuracy: 0.0000e+00 - f1_score: 0.0000e+00 - loss: nan - precision: 0.0000e+00 - val_accuracy: 0.0000e+00 - val_f1_score: 0.0000e+00 - val_loss: nan - val_precision: 0.0000e+00
Epoch 2/30
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 128ms/step - accuracy: 0.0000e+00 - f1_score: 0.0000e+00 - loss: nan - precision: 0.0000e+00 - val_accuracy: 0.0000e+0

KeyboardInterrupt: 

In [None]:
def fine_tune(model_name, trials=15, pre_trials=4, 
              epochs=30, size=3000):
    if model_name == "single_lstm":
        print("[INFO] Hyperparameter Tuning for Single LSTM...")
        hypermodel = lstm_hypermodel()
        directory = "lstm"
        project_name="single_lstm_model"
    
    elif model_name == "stacked_lstm":
        print("[INFO] Hyperparameter Tuning for Stacked LSTM...")
        hypermodel = stacked_lstm_hypermodel()
        directory = "lstm"
        project_name = "stacked_lstm_model"

    elif model_name == "convo_lstm":
        print("[INFO] Hyperparameter Tuning for Convo-LSTM...")
        hypermodel = convo_lstm_hypermodel()
        directory = "lstm"
        project_name = "convo_lstm_model"

    tuner = keras_tuner.BayesianOptimization(hypermodel= hypermodel,
                                             objective="val_accuracy",
                                             max_trials=trials,
                                             executions_per_trial=pre_trials,
                                             directory=directory, 
                                             project_name=project_name,
                                             overwrite=True)
    print("\t[INFO] Hyperparameter Search Space\n", tuner.search_space_summary())

    tuner.search(x_train.iloc[:size], y_train[:size, :], 
                 epochs=epochs, 
                 validation_data=(x_val.iloc[:int(size/3)], y_val[:int(size/3), ]))
    
    print("\t[INFO] Result Summary: ", tuner.results_summary())

    return tuner
