In [1]:
# Imports 
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
from keras import backend as K
import matplotlib.pyplot as plt 
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense 
from tensorflow.keras.models import clone_model

In [2]:
# Import data
data = pd.read_csv("../../../data/side_information.csv",encoding='unicode_escape')
checkpoint_path = "C:/Users/jack-/Documents/University/Project/src/deep_learning/bert_tests/checkpoints"
feature_names = ['Sentence',
                 'Length in Words', 'Length in Characters', 'F-score', 'I-score',
                 'Lexical Density','FK Reading Ease', 'FOG Scale', 'SMOG Index', 'ARI',
                 'CL Index', 'LW Formula', 'DC Score', 'Readability Consensus',
                 'Spache Formula']

samples = data[feature_names]
labels = data["Formality"]
train_samples, test_samples, train_labels,test_labels = train_test_split(samples, labels, test_size=0.2,random_state=5)

bert_train_samples = np.array(train_samples["Sentence"])
bert_test_samples = np.array(test_samples["Sentence"])
side_train_samples = np.array(train_samples[feature_names[1:]])
side_test_samples = np.array(test_samples[feature_names[1:]])

train_samples = np.array(train_samples)
test_samples = np.array(test_samples)
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)

In [3]:
# Model setup

# Attention layer
class peel_the_layer(tf.keras.layers.Layer): 

    def __init__(self,units=1):    
        ##Nothing special to be done here
        super(peel_the_layer, self).__init__()
        
    def build(self, input_shape):
        ##Define the shape of the weights and bias in this layer
        ##This is a 1 unit layer. 
        units=1
        ##last index of the input_shape is the number of dimensions of the prev
        ##RNN layer. last but 1 index is the num of timesteps
        self.w=self.add_weight(name="att_weights", shape=(input_shape[-1], units), initializer="normal") #name property is useful for avoiding RuntimeError: Unable to create link.
        self.b=self.add_weight(name="att_bias", shape=(input_shape[-2], units), initializer="zeros")
        super(peel_the_layer,self).build(input_shape)

    def call(self, x):
        ##x is the input tensor..each word that needs to be attended to
        ##Below is the main processing done during training
        ##K is the Keras Backend import
        e = K.tanh(K.dot(x,self.w)+self.b)
        a = K.softmax(e, axis=1)
        output = x*a

        ##return the ouputs. 'a' is the set of attention weights
        ##the second variable is the 'attention adjusted o/p state' or context
        return a, K.sum(output, axis=1)


bert_model_name = 'small_bert/bert_en_uncased_L-4_H-512_A-8' 



tfhub_handle_encoder = 'https://tfhub.dev/tensorflow/bert_en_cased_L-12_H-768_A-12/4'
tfhub_handle_preprocess = 'https://tfhub.dev/tensorflow/bert_en_cased_preprocess/3'
bert_preprocess_model = hub.KerasLayer(tfhub_handle_preprocess)
bert_model = hub.KerasLayer(tfhub_handle_encoder)

In [4]:
normaliser = tf.keras.layers.Normalization()
normaliser.adapt(side_train_samples)

In [5]:
# -- NORMALISED SIDE INFORMATION MODEL --

# Bert model
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')
encoder_inputs = preprocessing_layer(text_input)
encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')
outputs = encoder(encoder_inputs)
net = outputs['pooled_output']
reshaped = tf.reshape(net,[-1, 768, 1])
lstm = tf.keras.layers.LSTM(512,return_sequences=True)(reshaped)
normalised_bert = tf.keras.Model(text_input, lstm)

# Side information model
side_input = tf.keras.layers.Input(shape=(14))
normalised = normaliser(side_input)
reshaped = tf.reshape(normalised,[-1, 1, 14])
lstm_1 = tf.keras.layers.LSTM(512,return_sequences=True)(reshaped)
lstm_2 = tf.keras.layers.LSTM(512,return_sequences=True)(lstm_1)
normalised_side = tf.keras.Model(side_input, lstm_2)

# Combine models and predict
combined = tf.keras.layers.concatenate([normalised_bert.output, normalised_side.output],axis=1)
a, context = peel_the_layer()(combined)
dense = tf.keras.layers.Dense(1)(context)
normalised_model = tf.keras.Model(inputs=[normalised_bert.input, normalised_side.input], outputs=dense)

In [6]:
# -- PRETRAINED SIDE MODEL -- 

# Side information model
side_input = tf.keras.layers.Input(shape=(14),name="Side Information")
reshaped = tf.reshape(side_input,[-1, 1, 14])
lstm_1 = tf.keras.layers.LSTM(512,return_sequences=True)(reshaped)
lstm_2 = tf.keras.layers.LSTM(512,return_sequences=True)(lstm_1)
pretrained_side = tf.keras.Model(side_input, lstm_2)

# Pre-train side model
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3,min_delta=0.01)
pretrained_side.compile(optimizer=Adam(learning_rate=0.0001),loss='mean_squared_error',metrics=[tf.keras.losses.MeanAbsoluteError(),tf.keras.losses.MeanAbsolutePercentageError()])
pretrained_side.fit(x=side_train_samples,y=train_labels,batch_size=5,epochs=100,verbose=0,callbacks=[callback])


# Bert using pretrained side model
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
side_input = tf.keras.layers.Input(shape=(14),)
preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')
encoder_inputs = preprocessing_layer(text_input)
encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')
outputs = encoder(encoder_inputs)
net = outputs['pooled_output']
reshaped = tf.reshape(net,[-1, 768, 1])
lstm = tf.keras.layers.LSTM(512,return_sequences=True)(reshaped)
a, context = peel_the_layer()(lstm)

trained_side_input = pretrained_side(side_input)
trained_side_input = tf.reshape(trained_side_input,[-1, 512])

concat = tf.keras.layers.concatenate([context, trained_side_input]) 


dense = tf.keras.layers.Dense(1)(concat)

pretrained_model = tf.keras.Model([text_input,side_input],dense)

In [7]:
# -- AVERAGE LAYER INSTEAD OF CONCATENATION --

# Bert model
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string)
preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess)
encoder_inputs = preprocessing_layer(text_input)
encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True)
outputs = encoder(encoder_inputs)
net = outputs['pooled_output']
reshaped = tf.reshape(net,[-1, 768, 1])
lstm = tf.keras.layers.LSTM(512,return_sequences=True)(reshaped)
average_bert = tf.keras.Model(text_input, lstm)

# Side information model
side_input = tf.keras.layers.Input(shape=(14))
reshaped = tf.reshape(side_input,[-1, 1, 14])
lstm_1 = tf.keras.layers.LSTM(512,return_sequences=True)(reshaped)
lstm_2 = tf.keras.layers.LSTM(512,return_sequences=True)(lstm_1)
average_side = tf.keras.Model(side_input, lstm_2)

# Combine models and predict
combined = tf.keras.layers.average([average_bert.output, average_side.output])
a, context = peel_the_layer()(combined)
dense = tf.keras.layers.Dense(1)(context)
average_model = tf.keras.Model(inputs=[average_bert.input, average_side.input], outputs=dense)

In [8]:
# -- NORMALISED TRAINING AND TESTING --
normalised_results = pd.DataFrame()
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3,min_delta=0.01)

normalised_model.save_weights('normalised.h5')
normalised_model.compile(optimizer=Adam(learning_rate=0.0001),loss='mean_squared_error',metrics=[tf.keras.losses.MeanAbsoluteError(),tf.keras.losses.MeanAbsolutePercentageError()])
normalised_model.fit(x=[bert_train_samples,side_train_samples],y=train_labels,batch_size=32,epochs=8,verbose=2,callbacks=[callback])
scores = normalised_model.evaluate(x=[bert_test_samples,side_test_samples],y=test_labels)
normalised_results["E8 B32"] = scores
print("Completed 8 Epochs")
print(scores)

normalised_model.load_weights("normalised.h5")
normalised_model.compile(optimizer=Adam(learning_rate=0.0001),loss='mean_squared_error',metrics=[tf.keras.losses.MeanAbsoluteError(),tf.keras.losses.MeanAbsolutePercentageError()])
normalised_model.fit(x=[bert_train_samples,side_train_samples],y=train_labels,batch_size=32,epochs=20,verbose=2,callbacks=[callback])
scores = normalised_model.evaluate(x=[bert_test_samples,side_test_samples],y=test_labels)
normalised_results["E8 B32"] = scores
print("Completed 20 Epochs")
print(scores)

normalised_results.T

Epoch 1/8
176/176 - 3058s - loss: 2.4430 - mean_absolute_error: 1.0333 - mean_absolute_percentage_error: 27.8895
Epoch 2/8
176/176 - 2876s - loss: 0.4441 - mean_absolute_error: 0.5321 - mean_absolute_percentage_error: 14.8510
Epoch 3/8
176/176 - 2871s - loss: 0.2997 - mean_absolute_error: 0.4348 - mean_absolute_percentage_error: 12.0294
Epoch 4/8
176/176 - 2873s - loss: 0.1921 - mean_absolute_error: 0.3419 - mean_absolute_percentage_error: 9.3433
Epoch 5/8
176/176 - 2871s - loss: 0.1227 - mean_absolute_error: 0.2715 - mean_absolute_percentage_error: 7.3048
Epoch 6/8
176/176 - 2874s - loss: 0.0807 - mean_absolute_error: 0.2209 - mean_absolute_percentage_error: 5.9655
Epoch 7/8
176/176 - 2869s - loss: 0.0613 - mean_absolute_error: 0.1935 - mean_absolute_percentage_error: 5.3141
Epoch 8/8
176/176 - 2865s - loss: 0.0466 - mean_absolute_error: 0.1677 - mean_absolute_percentage_error: 4.6752
Completed 8 Epochs
[0.5396922826766968, 0.5855347514152527, 16.275794982910156]
Epoch 1/20
176/176 - 

Unnamed: 0,0,1,2
E8 B32,0.509776,0.571344,16.014868


In [10]:
# -- SAVE NORMALISED RESULTS --
normalised_results = normalised_results.T
normalised_results.to_csv("./lstm_side_information_tests/normalised.csv")

In [11]:
# --PRETRAINED TRAINING AND TESTING --
pretrained_results = pd.DataFrame()
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3,min_delta=0.01)

pretrained_model.save_weights('pretrained.h5')
pretrained_model.compile(optimizer=Adam(learning_rate=0.0001),loss='mean_squared_error',metrics=[tf.keras.losses.MeanAbsoluteError(),tf.keras.losses.MeanAbsolutePercentageError()])
pretrained_model.fit(x=[bert_train_samples,side_train_samples],y=train_labels,batch_size=32,epochs=8,verbose=2,callbacks=[callback])
scores = pretrained_model.evaluate(x=[bert_test_samples,side_test_samples],y=test_labels)
pretrained_results["E8 B32"] = scores
print("Completed 8 Epochs")
print(scores)

pretrained_model.load_weights('pretrained.h5')
pretrained_model.compile(optimizer=Adam(learning_rate=0.0001),loss='mean_squared_error',metrics=[tf.keras.losses.MeanAbsoluteError(),tf.keras.losses.MeanAbsolutePercentageError()])
pretrained_model.fit(x=[bert_train_samples,side_train_samples],y=train_labels,batch_size=32,epochs=20,verbose=2,callbacks=[callback])
scores = pretrained_model.evaluate(x=[bert_test_samples,side_test_samples],y=test_labels)
pretrained_results["E8 B32"] = scores
print("Completed 20 Epochs")
print(scores)

pretrained_results.T

Epoch 1/8
176/176 - 3437s - loss: 2.8942 - mean_absolute_error: 1.2067 - mean_absolute_percentage_error: 33.1861
Epoch 2/8
176/176 - 3526s - loss: 0.5185 - mean_absolute_error: 0.5779 - mean_absolute_percentage_error: 16.3963
Epoch 3/8
176/176 - 3188s - loss: 0.3432 - mean_absolute_error: 0.4655 - mean_absolute_percentage_error: 13.0815
Epoch 4/8
176/176 - 3295s - loss: 0.2184 - mean_absolute_error: 0.3653 - mean_absolute_percentage_error: 10.1201
Epoch 5/8
176/176 - 3426s - loss: 0.1294 - mean_absolute_error: 0.2802 - mean_absolute_percentage_error: 7.7037
Epoch 6/8
176/176 - 3647s - loss: 0.0773 - mean_absolute_error: 0.2159 - mean_absolute_percentage_error: 5.9346
Epoch 7/8
176/176 - 3508s - loss: 0.0522 - mean_absolute_error: 0.1767 - mean_absolute_percentage_error: 4.8852
Epoch 8/8
176/176 - 3089s - loss: 0.0423 - mean_absolute_error: 0.1584 - mean_absolute_percentage_error: 4.3352
Completed 8 Epochs
[0.5333935022354126, 0.5753917694091797, 16.179872512817383]
Epoch 1/20
176/176 -

Unnamed: 0,0,1,2
E8 B32,0.703471,0.667417,19.697212


In [12]:
# -- SAVE PRETRAINED RESULTS --
pretrained_results = pretrained_results.T
pretrained_results.to_csv("/lstm_side_information_tests/pretrained.csv")

FileNotFoundError: [Errno 2] No such file or directory: '/lstm_side_information_tests/pretrained.csv'

In [13]:
# --AVERAGE LAYER TRAINING AND TESTING --
average_results = pd.DataFrame()
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3,min_delta=0.01)


average_model.save_weights('average.h5')
average_model.compile(optimizer=Adam(learning_rate=0.0001),loss='mean_squared_error',metrics=[tf.keras.losses.MeanAbsoluteError(),tf.keras.losses.MeanAbsolutePercentageError()])
average_model.fit(x=[bert_train_samples,side_train_samples],y=train_labels,batch_size=32,epochs=8,verbose=2,callbacks=[callback])
scores = average_model.evaluate(x=[bert_test_samples,side_test_samples],y=test_labels)
average_results["E8 B32"] = scores
print("Completed 8 Epochs")
print(scores)

average_model.load_weights('average.h5')
average_model.compile(optimizer=Adam(learning_rate=0.0001),loss='mean_squared_error',metrics=[tf.keras.losses.MeanAbsoluteError(),tf.keras.losses.MeanAbsolutePercentageError()])
average_model.fit(x=[bert_train_samples,side_train_samples],y=train_labels,batch_size=32,epochs=20,verbose=2,callbacks=[callback])
scores = average_model.evaluate(x=[bert_test_samples,side_test_samples],y=test_labels)
average_results["E8 B32"] = scores
print("Completed 20 Epochs")
print(scores)

average_results.T

Epoch 1/8
176/176 - 2931s - loss: 2.7868 - mean_absolute_error: 1.1611 - mean_absolute_percentage_error: 31.2592
Epoch 2/8
176/176 - 2870s - loss: 0.7489 - mean_absolute_error: 0.6970 - mean_absolute_percentage_error: 20.0374
Epoch 3/8
176/176 - 2869s - loss: 0.7428 - mean_absolute_error: 0.6928 - mean_absolute_percentage_error: 19.8413
Epoch 4/8
176/176 - 2914s - loss: 0.7306 - mean_absolute_error: 0.6866 - mean_absolute_percentage_error: 19.6622
Epoch 5/8
176/176 - 2870s - loss: 0.7253 - mean_absolute_error: 0.6853 - mean_absolute_percentage_error: 19.5648
Epoch 6/8
176/176 - 2877s - loss: 0.7214 - mean_absolute_error: 0.6822 - mean_absolute_percentage_error: 19.5078
Epoch 7/8
176/176 - 2875s - loss: 0.7207 - mean_absolute_error: 0.6826 - mean_absolute_percentage_error: 19.4445
Completed 8 Epochs
[0.755474328994751, 0.6876088976860046, 20.332841873168945]
Epoch 1/20
176/176 - 2919s - loss: 2.6856 - mean_absolute_error: 1.1496 - mean_absolute_percentage_error: 31.1790
Epoch 2/20
176/1

Unnamed: 0,0,1,2
E8 B32,0.693508,0.669828,18.773012


In [14]:
# -- SAVE AVERAGE RESULTS --
average_results = average_results.T
average_results.to_csv("./lstm_side_information_tests/average.csv")