In [None]:
import os
import random
import numpy as np
import tensorflow as tf
seed = 1
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

In [None]:
import pandas as pd
import tensorflow_addons as tfa
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification, set_seed
set_seed(42)

## Read Dataset

In [None]:
questions = pd.read_csv("Data/combined_dataset.csv")
questions

In [None]:
X = questions['QUESTION']
cognitive_level = {"Knowledge": 0, "Comprehension": 1, "Application": 2, "Analysis": 3, "Synthesis": 4, "Evaluation": 5}
questions["BT LEVEL"].replace(cognitive_level, inplace = True)
y = questions['BT LEVEL'].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size = 0.10, stratify = questions['BT LEVEL'], 
                                                         shuffle = True, random_state = 1)

In [None]:
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [None]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

In [None]:
tokenized_data_train = tokenizer(X_train.to_list(), return_tensors="np", padding='max_length', max_length = 128)

In [None]:
tokenized_data_test = tokenizer(X_test.to_list(), return_tensors="np",  padding='max_length', max_length = 128)

In [None]:
labels_train = np.array(y_train)  
labels_test = np.array(y_test)

In [None]:
model = TFAutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels = 6)

In [None]:
loss = tf.keras.losses.CategoricalCrossentropy(from_logits = True)

In [None]:
model.compile(optimizer = RMSprop(3e-5), loss=loss, metrics=['accuracy', tfa.metrics.F1Score(6, 'weighted')])

In [None]:
model.fit(dict(tokenized_data_train), 
          labels_train, 
          validation_data = (dict(tokenized_data_test), labels_test),
          batch_size = 16, 
          epochs = 5)