In [1]:
!pip install keras-tuner --upgrade -q
!pip install keras_preprocessing

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/128.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━[0m [32m61.4/128.9 kB[0m [31m1.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m128.9/128.9 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting keras_preprocessing
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.6/42.6 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: keras_preprocessing
Successfully installed keras_preprocessing-1.1.2


In [2]:
# standard library
import numpy as np
import pandas as pd
import warnings
from sklearn import preprocessing
warnings.simplefilter('ignore')
import random

# neural network library
from tensorflow import keras
from tensorflow.keras import layers
# from tensorflow.keras import preprocessing
from keras_preprocessing.text import Tokenizer
import kerastuner as kt

In [3]:
import pandas as pd

# Load Data
train_file = 'https://raw.githubusercontent.com/BhavanishDhamnaskar/poem_d/main/Poem_classification%20-%20train_data.csv'
val_file = 'https://raw.githubusercontent.com/BhavanishDhamnaskar/poem_d/main/Poem_classification%20-%20test_data.csv'

train = pd.read_csv(train_file)
val = pd.read_csv(val_file, error_bad_lines=False, warn_bad_lines=True)

# Drop NULLs in training data
train.dropna(inplace=True)
train.reset_index(inplace=True, drop=True)

# Stats
print(f"Training records: {train.shape[0]}, Validation records: {val.shape[0]}")


Training records: 837, Validation records: 150


In [4]:
# params
max_features = int(sum([len(txt.split()) for txt in train['Poem']]) /2) # top n words to consider
maxlen = max([len(txt.split()) for txt in train['Poem']])  # first n words to consider

In [5]:
# label encoding
''' affection: 0, death: 1, environment: 2, music: 3 '''
label_encoder = preprocessing.LabelEncoder()
train['Genre_Code']= label_encoder.fit_transform(train['Genre'])
val['Genre_Code']= label_encoder.fit_transform(val['Genre'])

In [6]:
#Pre-processing
tokenizer = Tokenizer(num_words=max_features, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n')

# training data
tokenizer.fit_on_texts(train['Poem'])
x_train = np.array(tokenizer.texts_to_sequences(train['Poem']))
y_train = np.array(train['Genre_Code'])

# validation data
tokenizer.fit_on_texts(val['Poem'])
x_val = np.array(tokenizer.texts_to_sequences(val['Poem']))
y_val = np.array(val['Genre_Code'])

x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen)

In [7]:
# Input for variable-length sequences of integers
inputs = keras.Input(shape=(None,), dtype="int32")
# Embed each integer in a 128-dimensional vector
x = layers.Embedding(max_features, 128)(inputs)
# Add 2 bidirectional LSTMs
x = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(x)
x = layers.Bidirectional(layers.LSTM(64))(x)
# Add a classifier
outputs = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(inputs, outputs)
# model.summary()

In [8]:
#Hyperparameter tuning the base model
def build_model(hp):
  # Choose an optimal value between 32-512
  hp_units = hp.Int('units', min_value=32, max_value=512, step=32)

  inputs = keras.Input(shape=(None,), dtype="int32")
  x = layers.Embedding(max_features, hp_units)(inputs)
  x = layers.Bidirectional(layers.LSTM(hp_units, return_sequences=True))(x)
  x = layers.Bidirectional(layers.LSTM(hp_units))(x)
  outputs = layers.Dense(1, activation="sigmoid")(x)
  model = keras.Model(inputs, outputs)

  hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
  model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                loss=keras.losses.SparseCategoricalCrossentropy(),
                metrics=['accuracy'])
  return model

In [9]:
# Instantiate the tuner
tuner = kt.Hyperband(
                      build_model, # the hypermodel
                      objective='val_accuracy', # objective to optimize
                      max_epochs=10,
                      factor=3, # factor which you have seen above
                    )

In [10]:
# start the search and get the best model
stop_early = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
tuner.search(x_train, y_train, epochs=10, validation_data=(x_val, y_val), callbacks=[stop_early])
best_param = tuner.get_best_hyperparameters()[0]

Trial 30 Complete [00h 00m 36s]
val_accuracy: 0.6666666865348816

Best val_accuracy So Far: 0.6666666865348816
Total elapsed time: 00h 14m 03s


In [12]:
print("best parameters are:\n",best_param.values)

best parameters are:
 {'units': 224, 'learning_rate': 0.01, 'tuner/epochs': 2, 'tuner/initial_epoch': 0, 'tuner/bracket': 2, 'tuner/round': 0}


#Build model with optimal parameters

In [14]:
def get_lr(lr):
  if lr == 0.01:
    return 1e-2
  elif lr == 0.001:
    return 1e-3
  elif lr == 0.0001:
    return  1e-4

In [16]:
# params for optimization
unit = best_param.values['units']
epoch = best_param.values['tuner/epochs']
size = 32
lr = get_lr(best_param.values['learning_rate'])

In [17]:
# updating the base model
inputs = keras.Input(shape=(None,), dtype="int32")
x = layers.Embedding(max_features, unit)(inputs)
x = layers.Bidirectional(layers.LSTM(unit, return_sequences=True))(x)
x = layers.Bidirectional(layers.LSTM(unit))(x)
# Add a classifier
outputs = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(inputs, outputs)
# model.summary()

In [18]:
# compiling & fitting the model
model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr), loss=keras.losses.SparseCategoricalCrossentropy(), metrics=["accuracy"])
history = model.fit(x_train, y_train, batch_size=size, epochs=epoch, validation_data=(x_val, y_val))

Epoch 1/2
Epoch 2/2


In [19]:
acc = "{:.2f}".format(history.history.get('val_accuracy')[0])
print(f"Model's Optimized Accuracy is: {acc}")

Model's Optimized Accuracy is: 0.67
