In [1]:
import os
import random
import numpy as np
import tensorflow as tf
seed = 1
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

In [2]:
import tensorflow_hub as hub
import pandas as pd

In [3]:
import tensorflow_hub as hub
import tensorflow_addons as tfa
import tensorflow_text as tf_text
from tensorflow.keras import Sequential
from tensorflow.keras.callbacks import CSVLogger
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Input, Dense, Conv1D, GlobalMaxPooling1D

In [4]:
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

In [5]:
def build_model():
  
    model = Sequential()
    model.add(Input(shape= (75, 1024), name= 'embedding'))
    model.add(Conv1D(32, 5, activation ='relu'))
    model.add(GlobalMaxPooling1D())
    model.add(Dense(6, activation ='softmax'))
    return model

In [6]:
def get_embedding(questions):
    elmo_model = hub.load("D:/elmo_model/elmo")
    embeddings = elmo_model.signatures["default"](tf.constant(questions))
    return embeddings['elmo']

In [7]:
def evaluate_model(model, X_test, y_test):
    pred = model.predict(X_test)
    predicted_classes = np.argmax(pred, axis = 1)
    true_classes = np.argmax(y_test, axis = 1)
    accuracy = accuracy_score(true_classes, predicted_classes)
    print(f"Accuracy: {accuracy}")
    f1_ = f1_score(true_classes, predicted_classes, average = 'weighted')
    print(f"F1 score: {f1_}")
    print(classification_report(true_classes, predicted_classes))

In [8]:
def plot_result(log_data): 
    plt.figure(figsize=(10, 6))
    plt.plot(log_data['loss'], label ='train')
    plt.plot(log_data['val_loss'], label ='test')
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend()
    
    plt.figure(figsize=(10, 6))
    plt.plot(log_data['accuracy'], label ='train')
    plt.plot(log_data['val_accuracy'], label ='test')
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend()
    plt.show()
    
    plt.figure(figsize=(10, 6))
    plt.plot(log_data['f1_score'], label ='train')
    plt.plot(log_data['val_f1_score'], label ='test')
    plt.title('Model F1 Score')
    plt.ylabel('F1 Score')
    plt.xlabel('Epoch')
    plt.legend()
    plt.show()

In [9]:
questions = pd.read_csv('data/combined_dataset.csv')

In [10]:
X = questions['QUESTION']
cognitive_level = {"Knowledge": 0, "Comprehension": 1, "Application": 2, "Analysis": 3, "Synthesis": 4, "Evaluation": 5}
questions["BT LEVEL"].replace(cognitive_level, inplace = True)
y = questions['BT LEVEL']

In [None]:
embedding = get_embedding(X)
np.save('embedding/elmo/embeddings.npy', embedding)

In [None]:
embedding = np.load('embedding/elmo/embeddings.npy')

In [None]:
embedding.shape

In [None]:
all_indices = list(range(len(embedding)))

In [None]:
train_ind, test_ind = train_test_split(all_indices,  test_size = 0.10, stratify = questions['BT LEVEL'], 
                                                                             shuffle = True, random_state = 1)

In [None]:
embedding =  np.array(embedding)

In [None]:
X_train = embedding[train_ind]
X_test = embedding[test_ind]

In [None]:
y_train = y.iloc[train_ind]
y_test = y.iloc[test_ind]

In [None]:
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [None]:
#model
model = build_model()
# compiling the model
model.compile(optimizer = 'RMSprop', loss='categorical_crossentropy', metrics= ['accuracy', tfa.metrics.F1Score(6, 'weighted')])
# printing summary of the model
model.summary()

In [None]:
training_logger = CSVLogger('log/Elmo/elmo/training.log', separator = ',', append = False)

In [None]:
# fit the model
model.fit(X_train, y_train, epochs = 50, batch_size = 8, validation_data = (X_test, y_test),
                                                  callbacks = [training_logger], verbose = 1)
#saving the trained model
model.save('saved_models/Elmo/elmo/EQCM.h5')

In [None]:
log_data = pd.read_csv('log/Elmo/elmo/training.log', sep = ',', engine = 'python')
log_data.head(50)

In [None]:
model.load_weights("saved_models/Elmo/elmo/EQCM.h5")
evaluate_model(model, X_test, y_test)

In [None]:
log_data['val_accuracy'].describe()

In [None]:
log_data['val_f1_score'].describe()

In [None]:
log_data[['val_accuracy']].idxmax() + 1

In [None]:
plot_result(log_data)