In [None]:
import os
import random
import numpy as np
import tensorflow as tf
seed = 1
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

In [None]:
import tensorflow_hub as hub
import tensorflow_addons as tfa
import tensorflow_text as tf_text
from tensorflow.keras.callbacks import CSVLogger
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Input, Dense, Dropout

In [None]:
import pandas as pd
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [None]:
questions = pd.read_csv("Data/combined_dataset.csv")
questions

In [None]:
X = questions['QUESTION']
cognitive_level = {"Knowledge": 0, "Comprehension": 1, "Application": 2, "Analysis": 3, "Synthesis": 4, "Evaluation": 5}
questions["BT LEVEL"].replace(cognitive_level, inplace = True)
y = questions['BT LEVEL'].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size = 0.10, stratify = questions['BT LEVEL'], 
                                                         shuffle = True, random_state = 1)

In [None]:
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [None]:
preprocess_path = "D:/bert_models/bert_en_uncased_preprocess_3"
bert_path = "D:/bert_models/bert_en_uncased_L-12_H-768_A-12_4"
preprocessor = hub.KerasLayer(preprocess_path, )    
encoder = hub.KerasLayer(bert_path, trainable=True)

In [None]:
text_input = Input(shape=(), dtype=tf.string)
encoder_inputs = preprocessor(text_input)
embedding = encoder(encoder_inputs)
dropout = Dropout(0.3)(embedding['pooled_output'])
dense_layer = Dense(16, activation = 'relu')(dropout)
output = Dense(6, activation = 'softmax')(dense_layer)
model = tf.keras.Model(inputs=[text_input], outputs = [output])
model.summary()

In [None]:
model.compile(optimizer = Adam(learning_rate = 1e-5), loss='categorical_crossentropy', metrics= ['accuracy', tfa.metrics.F1Score(6, 'weighted')])
model.fit(X_train, y_train, epochs = 10, batch_size = 8, validation_data = (X_test, y_test), verbose = 1)