In [None]:
# Import the necessary libraries
import numpy as np
from datasets import load_dataset
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Bidirectional, TimeDistributed, Embedding, Dense, Dropout
from matplotlib import pyplot as plt

In [None]:
# Load the CoNLL-2003 dataset
dataset = load_dataset('conll2003', trust_remote_code=True)

# Extract the train and test data
train_data = dataset['train']
test_data = dataset['test']

# Create a function to extract sentences and labels from the dataset
def get_sentences_and_labels(data):
    sentences = [" ".join(x) for x in data['tokens']]
    labels = data['ner_tags']
    return sentences, labels

# Get sentences and labels for training and test data
train_sentences, train_labels = get_sentences_and_labels(train_data)
test_sentences, test_labels = get_sentences_and_labels(test_data)

# Tokenize the sentences, convert them to sequences, and pad the sequences
max_len = 50
word_tokenizer = tf.keras.preprocessing.text.Tokenizer()
word_tokenizer.fit_on_texts(train_sentences)
train_sequences = word_tokenizer.texts_to_sequences(train_sentences)
test_sequences = word_tokenizer.texts_to_sequences(test_sentences)
X_train = pad_sequences(train_sequences, maxlen=max_len, padding='post')
X_test = pad_sequences(test_sequences, maxlen=max_len, padding='post')

# Encode the training and test labels
label_encoder = LabelEncoder()
label_encoder.fit([item for sublist in train_labels for item in sublist])
train_labels_enc = [label_encoder.transform(label) for label in train_labels]
test_labels_enc = [label_encoder.transform(label) for label in test_labels]

# Pad the training and test labels
train_labels_padded = pad_sequences(train_labels_enc, maxlen=max_len, padding='post', value=-1)
test_labels_padded = pad_sequences(test_labels_enc, maxlen=max_len, padding='post', value=-1)
num_classes = len(label_encoder.classes_) + 1
train_labels_onehot = [to_categorical(i, num_classes=num_classes) for i in train_labels_padded]
test_labels_onehot = [to_categorical(i, num_classes=num_classes) for i in test_labels_padded]
y_train = np.array(train_labels_onehot)
y_test = np.array(test_labels_onehot)

In [None]:
# Model 1 - GRU single layer
model_1 = Sequential()
model_1.add(Embedding(input_dim=len(word_tokenizer.word_index) + 1, output_dim=64, input_length=max_len))
model_1.add(GRU(units=64, return_sequences=True))
model_1.add(TimeDistributed(Dense(num_classes, activation='softmax')))
model_1.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history_1 = model_1.fit(X_train, y_train, batch_size=32, epochs=10, validation_data=(X_test, y_test))

# Model 2 - GRU two layers
model_2 = Sequential()
model_2.add(Embedding(input_dim=len(word_tokenizer.word_index) + 1, output_dim=64, input_length=max_len))
model_2.add(GRU(units=64, return_sequences=True))
model_2.add(GRU(units=64, return_sequences=True))
model_2.add(TimeDistributed(Dense(num_classes, activation='softmax')))
model_2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history_2 = model_2.fit(X_train, y_train, batch_size=32, epochs=10, validation_data=(X_test, y_test))

# Model 3 - Bidirectional GRU single layer
model_3 = Sequential()
model_3.add(Embedding(input_dim=len(word_tokenizer.word_index) + 1, output_dim=64, input_length=max_len))
model_3.add(Bidirectional(GRU(units=64, return_sequences=True)))
model_3.add(TimeDistributed(Dense(num_classes, activation='softmax')))
model_3.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history_3 = model_3.fit(X_train, y_train, batch_size=32, epochs=10, validation_data=(X_test, y_test))

# Model 4 - Bidirectional GRU two layers
model_4 = Sequential()
model_4.add(Embedding(input_dim=len(word_tokenizer.word_index) + 1, output_dim=64, input_length=max_len))
model_4.add(Bidirectional(GRU(units=64, return_sequences=True)))
model_4.add(Bidirectional(GRU(units=64, return_sequences=True)))
model_4.add(TimeDistributed(Dense(num_classes, activation='softmax')))
model_4.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history_4 = model_4.fit(X_train, y_train, batch_size=32, epochs=10, validation_data=(X_test, y_test))

# Model 5 - LSTM single layer
model_5 = Sequential()
model_5.add(Embedding(input_dim=len(word_tokenizer.word_index) + 1, output_dim=64, input_length=max_len))
model_5.add(LSTM(units=64, return_sequences=True))
model_5.add(TimeDistributed(Dense(num_classes, activation='softmax')))
model_5.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history_5 = model_5.fit(X_train, y_train, batch_size=32, epochs=10, validation_data=(X_test, y_test))

# Model 6 - LSTM two layers
model_6 = Sequential()
model_6.add(Embedding(input_dim=len(word_tokenizer.word_index) + 1, output_dim=64, input_length=max_len))
model_6.add(LSTM(units=64, return_sequences=True))
model_6.add(LSTM(units=64, return_sequences=True))
model_6.add(TimeDistributed(Dense(num_classes, activation='softmax')))
model_6.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history_6 = model_6.fit(X_train, y_train, batch_size=32, epochs=10, validation_data=(X_test, y_test))

# Model 7 - Bidirectional LSTM single layer
model_7 = Sequential()
model_7.add(Embedding(input_dim=len(word_tokenizer.word_index) + 1, output_dim=64, input_length=max_len))
model_7.add(Bidirectional(LSTM(units=64, return_sequences=True)))
model_7.add(TimeDistributed(Dense(num_classes, activation='softmax')))
model_7.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history_7 = model_7.fit(X_train, y_train, batch_size=32, epochs=10, validation_data=(X_test, y_test))

# Model 8 - Bidirectional LSTM two layers
model_8 = Sequential()
model_8.add(Embedding(input_dim=len(word_tokenizer.word_index) + 1, output_dim=64, input_length=max_len))
model_8.add(Bidirectional(LSTM(units=64, return_sequences=True)))
model_8.add(Bidirectional(LSTM(units=64, return_sequences=True)))
model_8.add(TimeDistributed(Dense(num_classes, activation='softmax')))
model_8.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history_8 = model_8.fit(X_train, y_train, batch_size=32, epochs=10, validation_data=(X_test, y_test))

In [None]:
# Create a function to plot the training and validation loss and accuracy
def plot_history(history, model_name):
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title(f'{model_name} Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(['Train', 'Val'], loc='upper left')
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title(f'{model_name} Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(['Train', 'Val'], loc='upper left')
    plt.tight_layout()
    plt.show()

# Plot the accuracy and loss curves for each model
plot_history(history_1, "Model 1")
plot_history(history_2, "Model 2")
plot_history(history_3, "Model 3")
plot_history(history_4, "Model 4")
plot_history(history_5, "Model 5")
plot_history(history_6, "Model 6")
plot_history(history_7, "Model 7")
plot_history(history_8, "Model 8")

# Create a function to calculate mean validation accuracy
def mean_validation_accuracy(history):
    val_acc = history.history['val_accuracy']
    mean_acc = np.mean(val_acc)
    return mean_acc

# Calculate the mean validation accuracy for each model
mean_acc_1 = mean_validation_accuracy(history_1)
mean_acc_2 = mean_validation_accuracy(history_2)
mean_acc_3 = mean_validation_accuracy(history_3)
mean_acc_4 = mean_validation_accuracy(history_4)
mean_acc_5 = mean_validation_accuracy(history_5)
mean_acc_6 = mean_validation_accuracy(history_6)
mean_acc_7 = mean_validation_accuracy(history_7)
mean_acc_8 = mean_validation_accuracy(history_8)

In [None]:
# Print mean validation accuracies for each model
print(f"Mean Validation Accuracy - Model 1: {mean_acc_1:.4f}")
print(f"Mean Validation Accuracy - Model 2: {mean_acc_2:.4f}")
print(f"Mean Validation Accuracy - Model 3: {mean_acc_3:.4f}")
print(f"Mean Validation Accuracy - Model 4: {mean_acc_4:.4f}")
print(f"Mean Validation Accuracy - Model 5: {mean_acc_5:.4f}")
print(f"Mean Validation Accuracy - Model 6: {mean_acc_6:.4f}")
print(f"Mean Validation Accuracy - Model 7: {mean_acc_7:.4f}")
print(f"Mean Validation Accuracy - Model 8: {mean_acc_8:.4f}")