In [None]:
!pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [None]:
import numpy as np
import pandas as pd
import nltk
import re
import math
import json
import pickle
import shutil
import os
import tensorflow as tf
import keras_tuner as kt

from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
from sklearn.metrics import precision_score, recall_score

from sklearn.model_selection import KFold


from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dropout, Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Embedding, Bidirectional, SpatialDropout1D
from tensorflow.keras.layers import Concatenate, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau

import gensim.downloader as api


from google.colab import drive

path = '/content/drive/MyDrive/Text_Mining/BILSTM/'
drive.mount('/content/drive')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


Mounted at /content/drive


## **Load pre-trained embedding**

In [None]:
# This loads pretrained GloVe 300d Vectors. Note that this may take 1-3 minutes to load so please wait.
print("Loading GloVe 300d vectors...")
glove_vectors = api.load("glove-wiki-gigaword-300")
print("GloVe embeddings loaded.")

Loading GloVe 300d vectors...
GloVe embeddings loaded.


## **Data Retrieval**

In [None]:
# manually loading the data set
splits = {'train': 'train-00000-of-00001.parquet', 'test': 'test-00000-of-00001.parquet'}

train = pd.read_parquet(path+splits["train"])
test = pd.read_parquet(path+splits["test"])
print(train['relation'].value_counts())
print(train['relation'].value_counts())
train

relation
18    1410
6      844
1      659
13     612
8      568
14     490
3      471
2      470
11     407
17     394
4      374
0      344
16     323
5      166
9      148
15     144
10      97
12      78
7        1
Name: count, dtype: int64
relation
18    1410
6      844
1      659
13     612
8      568
14     490
3      471
2      470
11     407
17     394
4      374
0      344
16     323
5      166
9      148
15     144
10      97
12      78
7        1
Name: count, dtype: int64


Unnamed: 0,sentence,relation
0,The system as described above has its greatest...,3
1,The <e1>child</e1> was carefully wrapped and b...,18
2,The <e1>author</e1> of a keygen uses a <e2>dis...,11
3,A misty <e1>ridge</e1> uprises from the <e2>su...,18
4,The <e1>student</e1> <e2>association</e2> is t...,12
...,...,...
7995,When the <e1>notice</e1> is sent by <e2>fax</e...,18
7996,The <e1>herbicide</e1> is derived from a natur...,8
7997,"To test this, we placed a kitchen <e1>match</e...",6
7998,The farmers and city officials in the region h...,18


## **Data Pre-processing**

In [None]:
# method to one hot encode the relation label
def one_hot_encode_relations(relation_label, num_classes=19):
  one_hot = np.zeros(num_classes)
  one_hot[relation_label] = 1
  return one_hot

# method to produce relative postion encodings for sentences
def get_relative_positions(sentence):

  tokens = re.findall(r'<e\d+>|</e\d+>|\S+', sentence)

  # Identify entity indices and words inside tags
  e1_index = None
  e2_index = None
  inside_e1 = False
  inside_e2 = False

  for i, token in enumerate(tokens):
      if token == "<e1>":
          e1_index = i  # Start of entity
          inside_e1 = True
      elif token == "</e1>":
          inside_e1 = False
      elif token == "<e2>":
          e2_index = i  # Start of entity
          inside_e2 = True
      elif token == "</e2>":
          inside_e2 = False

  # make sure valid indices were found in the sentence
  if e1_index is None or e2_index is None:
      raise ValueError("Both <e1> and <e2> entities must be present.")

  pos1 = []
  pos2 = []
  inside_e1 = False
  inside_e2 = False

  # calculate the relative positions based on tokens
  for i, token in enumerate(tokens):
      if token == "<e1>":
          pos1.append(-1)
          inside_e1 = True
      elif token == "</e1>":
          pos1.append(1)
          inside_e1 = False
      elif inside_e1:
          pos1.append(0)
      elif pos1 and pos1[-1] >= 0:
          pos1.append(pos1[-1] + 1)
      else:
          pos1.append(i - e1_index)

      if token == "<e2>":
          pos2.append(-1)
          inside_e2 = True
      elif token == "</e2>":
          pos2.append(1)
          inside_e2 = False
      elif inside_e2:
          pos2.append(0)
      elif pos2 and pos2[-1] >= 0:
          pos2.append(pos2[-1] + 1)
      else:
          pos2.append(i - e2_index)

  return tokens, pos1, pos2

# method to produce token and positional encodings for sentences
def preprocess_dataset(dataset, tokenizer, max_seq_length=128, num_classes=19):

  X_word, X_pos1, X_pos2, Y = [], [], [], []

  for _, sample in dataset.iterrows():
    sentence = sample['sentence']
    relation = sample['relation']

    # tokens, entity1_pos, entity2_pos, close_entity1_pos, close_entity2_pos = preprocess_sentence(sentence)

    tokens, pos1, pos2 = get_relative_positions(sentence)

    token_ids = tokenizer.texts_to_sequences([tokens])[0] #convert to indices?


    # pos1, pos2 = compute_relative_positions(len(token_ids), entity1_pos, entity2_pos, close_entity1_pos, close_entity2_pos)

    token_ids = pad_sequences([token_ids], maxlen=max_seq_length, padding='post')[0]
    pos1 = pad_sequences([pos1], maxlen=max_seq_length, padding='post')[0]
    pos2 = pad_sequences([pos2], maxlen=max_seq_length, padding='post')[0]


    relation_one_hot = one_hot_encode_relations(relation, num_classes)

    X_word.append(token_ids)
    X_pos1.append(pos1)
    X_pos2.append(pos2)
    Y.append(relation_one_hot)

  return np.array(X_word), np.array(X_pos1), np.array(X_pos2), np.array(Y)

# method to delimit entity markers with spaces for proper tokenization later on
def replace_entity_markers(text):
    text = text.replace("<e1>", " <e1> ").replace("</e1>", " </e1> ")
    text = text.replace("<e2>", " <e2> ").replace("</e2>", " </e2> ")
    return text

# **Bi-LSTM**

Hyper Parameter Selection via Bayesian Optimisation

In [None]:
# method to generate a pre-defined embedding matrix for the model based on the GloVe vectors
def create_embedding_matrix(word_index, vocab_size, embedding_dim=300):
  embedding_matrix = np.random.normal(size=(vocab_size, embedding_dim))
  for word, i in word_index.items():
      if word in glove_vectors:
          embedding_matrix[i] = glove_vectors[word]
  return embedding_matrix

# This function is used to create BiLSTM model with tunable hyperparameters for Bayesian Optimisation
def build_model(hp, vocab_size, word_index, max_seq_length, num_classes):

  # tuneable hyperparameters
  vector_length = 300
  position_embedding_dim = hp.Int('embedding_dim', 50, 300, step=50)
  lstm_units = hp.Int('lstm_units', 32, 128, step=32)
  dropout_rate = hp.Float('dropout_rate', 0.1, 0.9, step=0.1)
  learning_rate = hp.Choice('learning_rate', [0.1, 0.01, 0.001, 0.0001])
  bilstm_dropout = hp.Float('bilstm_dropout', 0.1, 0.9, step=0.1)
  kernel_regularizer = hp.Choice('kernel_regularizer', [0.0001, 0.001, 0.01])

  # word embedding layer
  embedding_matrix = create_embedding_matrix(word_index, vocab_size)


  word_input = Input(shape=(max_seq_length,), dtype=tf.int32, name="word_input")
  embedding_layer = Embedding(vocab_size, vector_length, weights=[embedding_matrix], input_length=max_seq_length, trainable=True)(word_input)

  position_vocab_size = max_seq_length * 2
  position_embedding_dim = 300

  # positional embedding layesr
  pos1_input = Input(shape=(max_seq_length,), dtype=tf.int32, name="pos1_input")
  pos2_input = Input(shape=(max_seq_length,), dtype=tf.int32, name="pos2_input")

  pos1_embedding = Embedding(position_vocab_size, position_embedding_dim, input_length=max_seq_length, trainable=True)(pos1_input)
  pos2_embedding = Embedding(position_vocab_size, position_embedding_dim, input_length=max_seq_length, trainable=True)(pos2_input)

  # concantenate embeddings before passing into spatial dropout layer
  concatenated_embeddings = Concatenate()([embedding_layer, pos1_embedding, pos2_embedding])

  x = SpatialDropout1D(dropout_rate)(concatenated_embeddings)

  # sinlge layer BiLSTM
  x = Bidirectional(LSTM(lstm_units, return_sequences=False, dropout=bilstm_dropout, kernel_regularizer=tf.keras.regularizers.l2(kernel_regularizer)))(x)

  # (Dense) Output Layer
  output = Dense(num_classes, activation="softmax")(x)

  model = Model(inputs=[word_input, pos1_input, pos2_input], outputs=output)

  optimizer = Adam(learning_rate=learning_rate, clipnorm=1.0)
  model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

  return model

# method to create a tuner for Bayesian Optimisation
def create_tuner(vocab_size, word_index, max_seq_length, num_classes):

  tuner_dir = 'bilstm_tuning'

  if os.path.exists(tuner_dir):
      shutil.rmtree(tuner_dir)
      print(f"Deleted previous tuning directory: {tuner_dir}")

  tuner = kt.BayesianOptimization(
      lambda hp: build_model(hp, vocab_size, word_index, max_seq_length, num_classes),
      objective='val_accuracy',
      max_trials=20,
      executions_per_trial=1,
      directory='bilstm_tuning',
      project_name='bilstm_bayesian_cv'
  )
  return tuner

In [None]:
tf.keras.backend.clear_session()

# method to delimit entity markers with spaces
train["sentence"] = train["sentence"].apply(replace_entity_markers)
test["sentence"] = test["sentence"].apply(replace_entity_markers)
# train

# randomize the train and test datasets
train = train.sample(frac=1)
test = test.sample(frac=1)
sentences_train = train['sentence']


max_seq_length = 256
num_classes = 19

# build tokenizer by filtering out all punctuation and special characters except <,>,\ that are required in tags
tokenizer = Tokenizer(filters='!"#$%&()*+,-.:;=?@[\\]^_`{|}~\t\n', num_words=50000, oov_token="<UNK>")
tokenizer.fit_on_texts(sentences_train)

words = tokenizer.word_index
vocab_size = len(words) + 1
print(vocab_size)

tuner = create_tuner(vocab_size, words, max_seq_length, num_classes)

# preprocess the dataset
X_train, X_train_pos1, X_train_pos2, Y_train = preprocess_dataset(train, tokenizer, max_seq_length)
X_test, X_test_pos1, X_test_pos2, Y_test = preprocess_dataset(test, tokenizer, max_seq_length)

# start bayesian optimisation to find best hyperparameters
tuner = create_tuner(vocab_size, words, max_seq_length, num_classes)

tuner.search(
    [X_train, X_train_pos1, X_train_pos2],
    Y_train,
    epochs=30,
    batch_size=10,
    validation_split=0.2,
    verbose=1,
    # callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True)]
)

best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print("Best Hyperparameters:", best_hps.values)

Trial 20 Complete [00h 07m 30s]
val_accuracy: 0.6643750071525574

Best val_accuracy So Far: 0.7275000214576721
Total elapsed time: 02h 27m 45s
Best Hyperparameters: {'embedding_dim': 300, 'lstm_units': 96, 'dropout_rate': 0.8, 'learning_rate': 0.001, 'bilstm_dropout': 0.4, 'kernel_regularizer': 0.0001}


could maybe run K-Fold Cross Validation on tagged/swapped dataset to see how that does.

**K-Fold Cross Validation Over Training Set**

In [None]:
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=42)

accuracy_scores = []
precision_scores = []
recall_scores = []
macro_f1_scores = []

for foldNo, (train_idx, val_idx) in enumerate(kf.split(train)):
    train_data = train.iloc[train_idx]

    X_train_fold = [X_train[train_idx], X_train_pos1[train_idx], X_train_pos2[train_idx]]
    Y_train_fold = Y_train[train_idx]

    X_val_fold = [X_train[val_idx], X_train_pos1[val_idx], X_train_pos2[val_idx]]
    Y_val_fold = Y_train[val_idx]

    model = tuner.hypermodel.build(best_hps)

    history = model.fit(
        X_train_fold, Y_train_fold,
        epochs=30,
        batch_size=10,
        validation_data=(X_val_fold, Y_val_fold),
        # callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=6, restore_best_weights=True)]
    )

    y_pred_prob = model.predict(X_val_fold)
    y_pred = np.argmax(y_pred_prob, axis=1)
    y_true = np.argmax(Y_val_fold, axis=1)

    accuracy = np.mean(y_pred == y_true)
    precision = precision_score(y_true, y_pred, average="macro", zero_division=0)
    recall = recall_score(y_true, y_pred, average="macro", zero_division=0)
    macro_f1 = f1_score(y_true, y_pred, average="macro", zero_division=0)

    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    macro_f1_scores.append(macro_f1)

    print(f"Fold {foldNo+1} - Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, Macro-F1: {macro_f1:.4f}")

mean_accuracy = np.mean(accuracy_scores)
mean_precision = np.mean(precision_scores)
mean_recall = np.mean(recall_scores)
mean_macro_f1 = np.mean(macro_f1_scores)

print("Results for K-Fold Cross Validation on training set:")
print(f"Mean Accuracy: {mean_accuracy:.4f}")
print(f"Mean Precision: {mean_precision:.4f}")
print(f"Mean Recall: {mean_recall:.4f}")
print(f"Mean Macro-F1: {mean_macro_f1:.4f}")

Epoch 1/30




[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 23ms/step - accuracy: 0.1442 - loss: 2.8634 - val_accuracy: 0.1775 - val_loss: 2.7595
Epoch 2/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 23ms/step - accuracy: 0.2209 - loss: 2.5448 - val_accuracy: 0.3713 - val_loss: 2.0767
Epoch 3/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 22ms/step - accuracy: 0.3489 - loss: 2.1330 - val_accuracy: 0.4569 - val_loss: 1.7853
Epoch 4/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 23ms/step - accuracy: 0.4492 - loss: 1.8641 - val_accuracy: 0.5188 - val_loss: 1.6826
Epoch 5/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 22ms/step - accuracy: 0.4788 - loss: 1.7484 - val_accuracy: 0.5650 - val_loss: 1.5424
Epoch 6/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 22ms/step - accuracy: 0.5337 - loss: 1.5767 - val_accuracy: 0.5719 - val_loss: 1.5453
Epoch 7/30
[1m640/640[0m 



[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 24ms/step - accuracy: 0.1417 - loss: 2.8565 - val_accuracy: 0.2475 - val_loss: 2.5362
Epoch 2/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 23ms/step - accuracy: 0.2376 - loss: 2.5101 - val_accuracy: 0.3644 - val_loss: 2.0407
Epoch 3/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 24ms/step - accuracy: 0.3316 - loss: 2.1583 - val_accuracy: 0.4831 - val_loss: 1.6956
Epoch 4/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 24ms/step - accuracy: 0.4354 - loss: 1.8875 - val_accuracy: 0.5450 - val_loss: 1.5302
Epoch 5/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 24ms/step - accuracy: 0.4827 - loss: 1.7337 - val_accuracy: 0.5900 - val_loss: 1.4672
Epoch 6/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 24ms/step - accuracy: 0.5334 - loss: 1.6057 - val_accuracy: 0.6044 - val_loss: 1.4146
Epoch 7/30
[1m640/640[0m 



[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 24ms/step - accuracy: 0.1463 - loss: 2.8536 - val_accuracy: 0.1944 - val_loss: 2.7390
Epoch 2/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 23ms/step - accuracy: 0.2254 - loss: 2.5270 - val_accuracy: 0.3706 - val_loss: 2.0053
Epoch 3/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 23ms/step - accuracy: 0.3570 - loss: 2.1322 - val_accuracy: 0.5006 - val_loss: 1.6851
Epoch 4/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 23ms/step - accuracy: 0.4566 - loss: 1.8159 - val_accuracy: 0.5719 - val_loss: 1.4847
Epoch 5/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 23ms/step - accuracy: 0.5186 - loss: 1.6461 - val_accuracy: 0.5900 - val_loss: 1.4470
Epoch 6/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 23ms/step - accuracy: 0.5464 - loss: 1.5237 - val_accuracy: 0.6044 - val_loss: 1.4246
Epoch 7/30
[1m640/640[0m 



[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 24ms/step - accuracy: 0.1628 - loss: 2.8378 - val_accuracy: 0.2250 - val_loss: 2.6420
Epoch 2/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 23ms/step - accuracy: 0.2435 - loss: 2.5253 - val_accuracy: 0.3844 - val_loss: 2.0430
Epoch 3/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 23ms/step - accuracy: 0.3561 - loss: 2.1347 - val_accuracy: 0.4888 - val_loss: 1.7037
Epoch 4/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 23ms/step - accuracy: 0.4294 - loss: 1.8746 - val_accuracy: 0.5437 - val_loss: 1.5822
Epoch 5/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 23ms/step - accuracy: 0.5039 - loss: 1.6871 - val_accuracy: 0.5975 - val_loss: 1.4481
Epoch 6/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 23ms/step - accuracy: 0.5358 - loss: 1.5736 - val_accuracy: 0.6162 - val_loss: 1.4194
Epoch 7/30
[1m640/640[0m 



[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 24ms/step - accuracy: 0.1447 - loss: 2.8572 - val_accuracy: 0.2525 - val_loss: 2.5025
Epoch 2/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 23ms/step - accuracy: 0.2409 - loss: 2.4999 - val_accuracy: 0.4044 - val_loss: 1.8891
Epoch 3/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 23ms/step - accuracy: 0.3683 - loss: 2.0993 - val_accuracy: 0.5188 - val_loss: 1.5763
Epoch 4/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 23ms/step - accuracy: 0.4621 - loss: 1.8154 - val_accuracy: 0.5906 - val_loss: 1.4097
Epoch 5/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 23ms/step - accuracy: 0.5065 - loss: 1.6620 - val_accuracy: 0.5969 - val_loss: 1.3828
Epoch 6/30
[1m640/640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 23ms/step - accuracy: 0.5420 - loss: 1.5332 - val_accuracy: 0.6119 - val_loss: 1.3523
Epoch 7/30
[1m640/640[0m 

### **Train and Test Model**

In [None]:
best_model = tuner.hypermodel.build(best_hps)

best_model.fit([X_train, X_train_pos1, X_train_pos2],
               Y_train,
              #  validation_split=0.2,
               epochs=30,
               batch_size=10,
              #  callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=8, restore_best_weights=True)]
               )

test_loss, test_acc = best_model.evaluate([X_test, X_test_pos1, X_test_pos2], Y_test, batch_size=10)
print(f"Final Test Accuracy: {test_acc}")

Epoch 1/30
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 20ms/step - accuracy: 0.1437 - loss: 2.8299
Epoch 2/30
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 20ms/step - accuracy: 0.2755 - loss: 2.4059
Epoch 3/30
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 20ms/step - accuracy: 0.4187 - loss: 1.9350
Epoch 4/30
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 20ms/step - accuracy: 0.4985 - loss: 1.6924
Epoch 5/30
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 20ms/step - accuracy: 0.5318 - loss: 1.5807
Epoch 6/30
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 20ms/step - accuracy: 0.5655 - loss: 1.4691
Epoch 7/30
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 20ms/step - accuracy: 0.5965 - loss: 1.3996
Epoch 8/30
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 20ms/step - accuracy: 0.6160 - loss: 1.3433
Epoch 9/30
[1m800/800[

### **Model Evaluation**

In [None]:
y_pred_prob = best_model.predict([X_test, X_test_pos1, X_test_pos2], batch_size=10)
y_pred = np.argmax(y_pred_prob, axis=1)
y_true = np.argmax(Y_test, axis=1)

f1 = f1_score(y_true, y_pred, average='macro')
print("F1 Score:", f1)

f1_scores = f1_score(y_true, y_pred, average=None)

print("F1 Scores per class:", f1_scores)

print(classification_report(y_true, y_pred))

[1m272/272[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step
F1 Score: 0.7042425911559542
F1 Scores per class: [0.89138577 0.87958115 0.79375    0.63157895 0.82253521 0.76315789
 0.8729097  0.         0.83069977 0.83673469 0.53333333 0.70758123
 0.51851852 0.84918794 0.8209607  0.76       0.7706422  0.68548387
 0.41256831]
              precision    recall  f1-score   support

           0       0.89      0.89      0.89       134
           1       0.89      0.87      0.88       194
           2       0.80      0.78      0.79       162
           3       0.62      0.64      0.63       150
           4       0.72      0.95      0.82       153
           5       0.78      0.74      0.76        39
           6       0.85      0.90      0.87       291
           7       0.00      0.00      0.00         1
           8       0.79      0.87      0.83       211
           9       0.80      0.87      0.84        47
          10       0.52      0.55      0.53        22
          11

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## **Save Best Model**

In [None]:
best_model.save(path + "models/"+"best_bilstm_models.keras")

with open(path + 'models/best_bilstm_tokenizer.pickle', 'wb') as handle:
  pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

## **Interact with Model**

In [None]:
class_label_mapping = {
    0: "Cause-Effect(e1,e2)", 1: "Cause-Effect(e2,e1)",
    2: "Component-Whole(e1,e2)", 3: "Component-Whole(e2,e1)",
    4: "Content-Container(e1,e2)", 5: "Content-Container(e2,e1)",
    6: "Entity-Destination(e1,e2)", 7: "Entity-Destination(e2,e1)",
    8: "Entity-Origin(e1,e2)", 9: "Entity-Origin(e2,e1)",
    10: "Instrument-Agency(e1,e2)", 11: "Instrument-Agency(e2,e1)",
    12: "Member-Collection(e1,e2)", 13: "Member-Collection(e2,e1)",
    14: "Message-Topic(e1,e2)", 15: "Message-Topic(e2,e1)",
    16: "Product-Producer(e1,e2)", 17: "Product-Producer(e2,e1)",
    18: "Other"
}

def sentence__relation_prediction(sentence, tokenizer, model, max_seq_length=256):

  sentence = replace_entity_markers(sentence)

  tokenizer = Tokenizer(filters='!"#$%&()*+,-.:;=?@[\\]^_`{|}~\t\n', num_words=50000, oov_token="<UNK>")
  tokenizer.fit_on_texts(sentences_train)

  words = tokenizer.word_index
  vocab_size = len(words) + 1
  print(vocab_size)

  sentence_df = pd.DataFrame([[sentence, 18]], columns=["sentence", "relation"])

  tokenized_sentence, e1_pos_encoding, e2_pos_encoding, _ = preprocess_dataset(sentence_df, tokenizer, max_seq_length)

  y_pred_prob = model.predict([tokenized_sentence, e1_pos_encoding, e2_pos_encoding])

  predicted_class = np.argmax(y_pred_prob, axis=1)[0]

  predicted_relation = class_label_mapping.get(predicted_class)

  return predicted_relation

In [None]:
test_sentence = 'The <e1>bottle</e1> was filled with <e2>water</e2> and placed on the table.'
predicted_relation = sentence__relation_prediction(test_sentence, tokenizer, best_model)
print(predicted_relation)

19572
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
Content-Container(e2,e1)


In [None]:
test_sentence = input("Please Enter a sentence with two entities in the format provided in the previous cell: ")
predicted_relation = sentence__relation_prediction(test_sentence, tokenizer, best_model)
print(predicted_relation)

Please Enter a sentence with two entities in the format provided in the previous cell: 'The <e2>bottle</e2> was filled with <e1>water</e1> and placed on the table.'
19572
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
Content-Container(e2,e1)
