In [None]:
"""
This notebook contains minimum implementation of HAN model and training/evaluation functions.
This notebook is/was not used for training/validating/inferencing/Visualizing for our research project.
The implementation of HAN architecture works for different datasets as well.
"""

In [None]:
import torch
from torch import nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import Adam
from torch.nn.utils.rnn import pad_sequence
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from torch.utils.data import Dataset, DataLoader, random_split
import re
import pandas as pd

from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split

In [None]:
tokenizer = get_tokenizer('basic_english')

In [None]:
class CommentDataset(Dataset):
    def __init__(self, folder_path, train=False, test=False, valid=False):
        
        if (train==False and test==False and valid==False):
            raise Exception('One of the `train`, `test` or `valid` needs to be True, got `train = {}` `test = {}` and `valid = {}`'.format(train, test, valid))
        if (train==True and test==True and valid == True):
            raise Exception('Only one of the `train`, `test` or `valid` needs to be True, got `train = {}` `test = {}` and `valid = {}`'.format(train, test, valid))
        if (train==True and test==True):
            raise Exception('Only one of the `train` or `test` needs to be True, got `train = {}`, and `test = {}`'.format(train, test))
        if (train==True and valid==True):
            raise Exception('Only one of the `train` or `valid` needs to be True, got `train = {}`, and `valid = {}`'.format(train, valid))
        if (test==True and valid==True):
            raise Exception('Only one of the `test` or `valid` needs to be True, got `test = {}`, and `valid = {}`'.format(test, valid))

        self.train_df = None
        self.test_df = None
        self.valid_df = None

        # boolean values
        self.train = train
        self.test = test
        self.valid = valid

        self.data_selected = None
        self.comment_selected = None
    
        self.train_comment = []
        self.test_comment = []
        self.val_comment = []
    
        # Read the dataset
        self.data = pd.read_csv(folder_path, sep = ",")#.head(20)
        
        self.data = shuffle(self.data)
        self.data.reset_index(inplace=True, drop=True)
        
        self.data['Authors Biasness'].replace('LEFT', 0, inplace=True)
        self.data['Authors Biasness'].replace('RIGHT', 1, inplace=True)
        
        # split the dataset into train, test, and valid.
        self.train_df, test_df = train_test_split(self.data, test_size=0.2,  random_state=11)
        self.test_df, self.valid_df = train_test_split(test_df, test_size=0.5,  random_state=96)
        
        # a basic preprocessor, beeds to be done  outside the dataset function.
        #idx = [4, 7, 12, 19]
        #list_d = []
        #for i in range(len(self.data["comment"])):
        #    if i not in idx:
        #        d = self.data["comment"][i].split("', ")
        #        list_d.append(d)
        #    else:
        #        d = self.data["comment"][i].split("\", ")
        #        list_d.append(d)
        
        if self.train == True:
            # do the sorting
            # Sort the dataframe according to the number of comments on documents.
            self.train_df.sort_values(by=['Num of Comments'], ascending=False, inplace=True)       
            comments = []
            for com in self.train_df["Authors Comment"]:
                comments.append(com.split("-|-")[:-1])
            self.train_comment = comments
        elif self.test == True:
            # no need to sort
            comments = []
            for com in self.test_df["Authors Comment"]:
                comments.append(com.split("-|-")[:-1])
            self.test_comment = comments
        elif self.valid == True:
            # no need to sort
            comments = []
            for com in self.valid_df["Authors Comment"]:
                comments.append(com.split("-|-")[:-1])
            self.val_comment = comments
        
        
        # split the dataset into train, test, and valid.
        #self.train_df, test_df = train_test_split(self.data, test_size=0.2,  random_state=11)
        #self.test_df, self.valid_df = train_test_split(test_df, test_size=0.5,  random_state=96)

        #if self.train == True:
        # do the sorting
        #self.train_df["combined_text_len"] = 0
        #for i, row in self.train_df.iterrows():
        #    self.train_df.at[i, "combined_text_len"] = len(tokenizer(row["combined_text"]))
        # Sort the dataframe according to the length of the sentence
        #self.train_df.sort_values(by=['combined_text_len'], ascending=False, inplace=True)            
        #elif self.test == True:
            # no need ot sort
        #    self.test_df = self.test_df
        #elif self.valid == True:
            # no need ot sort
        #    self.valid_df = self.valid_df

    def __getitem__(self, idx):
        if self.train == True:
            self.data_selected = self.train_df
            self.comment_selected = self.train_comment

        elif self.test == True:
            self.data_selected = self.test_df
            self.comment_selected = self.test_comment

        elif self.valid == True:
            self.data_selected = self.valid_df
            self.comment_selected = self.val_comment

        label = self.data_selected.iloc[idx]["Authors Biasness"]
        sentence = self.comment_selected[idx]
        return sentence, label

    def __len__(self):
        
        
        if self.train == True:
            len_ = len(self.train_comment)

        elif self.test == True:
            len_ = len(self.test_comment)

        elif self.valid == True:
            len_ = len(self.val_comment)
        
        
        #if self.train == True:
        #len_ = len(self.comment)

        #elif self.test == True:
        #    len_ = len(self.test_df)

        #elif self.valid == True:
        #    len_ = len(self.valid_df)
        return len_

In [None]:
data_folder = "16. Training Dataset revisit.csv"

# create dataset loader for train and evaluation set.
dataset_train = CommentDataset(data_folder, train=True, test=False, valid=False)
dataset_valid = CommentDataset(data_folder, train=False, test=False, valid=True)

In [None]:
def yield_tokens(data_iter):
    for iter_, _ in data_iter:
        for sentence in iter_:
            yield tokenizer(sentence)

In [None]:
# create vocabulary from the training data.
vocab = build_vocab_from_iterator(yield_tokens(dataset_train), specials=["<unk>"])
vocab.set_default_index(vocab["<unk>"])

In [None]:
vocab_size = len(vocab.get_itos()) # len(vocab.get_stoi()) - length of the vocabulary
vocab_size

In [None]:
text_pipeline = lambda x: vocab(tokenizer(x))
label_pipeline = lambda x: int(x)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
# works perfectly. backup
def collate_batch(batch):
    label_list, text_list = [], []
    for (_text, _label) in batch:
        
        label_list.append(torch.tensor(label_pipeline(_label), dtype=torch.int64 ))
        
        texts = []
        for t in _text:
            texts.append(torch.tensor(text_pipeline(t), dtype=torch.int64))
        text_list.append(texts)
    
    sentence_length, word_length = get_max_length(text_list)
    
    text_list_p = []
    for t in text_list:
        # input shape: a list of tensors with unequal length of sentences.
        # padding to the highest length of the sequence.
        p = [ torch.cat((batch, torch.LongTensor([vocab_size-1]).repeat(word_length - len(batch))), dim=0) 
                if((word_length - batch.shape[0]) !=  0 ) else batch for batch in t]
        
        # input shape: a list of tensors with unequal length of documents.
        # padding to the highest length of the document.
        if(sentence_length - len(p)) !=  0:
            extended_sentences = [torch.LongTensor([vocab_size-1 for _ in range(word_length)] )
                                  for _ in range(sentence_length - len(p))]
            p.extend(extended_sentences)

            #p = pad_sequence(text_list[0], batch_first=False, padding_value = vocab_size-1)
            #  OUTPUT shape: [MAX_LENGTH_OF_THE_SENTENCE_IN_BATCH, NUM_SENTENCES] => [57, 5]
        
        p = torch.stack(p)
        # OUTPUT shape: [NUM_SENTENCES X MAX_LENGTH_OF_THE_SENTENCE_IN_BATCH] => [5,57]
        text_list_p.append(p) # for every batch
    
    text_list_p = torch.stack(text_list_p)
    # OUTPUT shape: [BATCH_SIZE X NUM_SENTENCES X MAX_LENGTH_OF_THE_SENTENCE_IN_DOCUMENT ] => [3, 5, 57]

    #text_list_p = torch.permute(text_list_p, (2, 1, 0))
    # NOt sure, whether it should be this: OUTPUT shape: [MAX_LENGTH_OF_THE_SENTENCE_IN_BATCH X NUM_SENTENCES X BATCH_SIZE] => [57, 5, 2]
    
    # convert a list of tensors to tensors.
    # input : a list of tensors of len BATCH_SIZE
    label_list = torch.stack(label_list)   
    # OUTPUT shape: [BATCH_SIZE]
    
    return text_list_p, label_list

In [None]:
def get_max_length(doc):
    """
    doc = [
        [
                [1,2,3,4,5],
               [1,2,3,4],
               [1,2,3,4,5,6,7,8],
               [1,2,3,4,5]
        ], 
        [
                [1,2],
               [1,2,3,4,5,6,7,8,9],
               [1,2,3,4,5],
               [1,2,3,4],
                [1, 2,3,4,5,6]
        ]
    ]

    #sentence_in_doc, word_in_sentence = get_max_length(doc)
    sentence_in_doc -> 5, and word_in_sentence -> 9
    """
    
    sent_length_list = []
    word_length_list = []

    for sent in doc:
        sent_length_list.append(len(sent))

        for word in sent:
            word_length_list.append(len(word))

    sorted_word_length = sorted(word_length_list)
    sorted_sent_length = sorted(sent_length_list)
    
    #return sorted_sent_length[int(0.8*len(sorted_sent_length))], sorted_word_length[int(0.8*len(sorted_word_length))]
    return sorted_sent_length[-1], sorted_word_length[-1]

In [None]:
def Download_and_extract():
    print("This might take some time...")
    print("Downloading...")
    os.system('wget https://nlp.stanford.edu/data/glove.840B.300d.zip')
    
    Extract()
    
def Extract():
    print("Extracting...")
    # extract and save to the same directory.
    with zipfile.ZipFile('glove.840B.300d.zip', 'r') as zip_ref:
        zip_ref.extractall("./")
    print("Done!")
    
def load_pretrained_embedding_matrix():
    # Downloadin Glove word vector
    # this might take some time........... ~5 mins.
    if((os.path.isfile('glove.840B.300d.zip') == False)):
        Download_and_extract()
    elif((os.path.isfile('glove.840B.300d.zip') == True) and (os.path.isfile('glove.840B.300d.txt') == False)):
         Extract()
    else:
        print("Already Downloaded and extracted!")

    #!wget https://nlp.stanford.edu/data/glove.840B.300d.zip
    #!unzip glove.840B.300d.zip

# https://github.com/MohammadWasil/Visual-Question-Answering-VQA/blob/master/2.%20Dataset%20Used%20in%20Training..ipynb
def GloveModel(file_path, vocab):
    embedding_index = {}
    f = open(file_path,'r', encoding='utf8')
    embedding_index = {}
    print("Opened!")

    for j, line in enumerate(f):
        splitLine = line.split(' ')
        word = splitLine[0]
        embedding = np.asarray(splitLine[1:], dtype='float32')
        embedding_index[word] = embedding
      
    print("Done.",len(embedding_index)," words loaded!")
    EMBEDDING_DIM = 300
    embedding_matrix = np.zeros((len(vocab.get_stoi()) + 1, EMBEDDING_DIM))
    print(embedding_matrix.shape)

    for index, word in enumerate(vocab.get_itos()):
        embedding_vector = embedding_index.get(word)
        if embedding_vector is not None:
          # words not found in embedding index will be all-zeros.
          embedding_matrix[index] = embedding_vector
    return embedding_matrix

In [None]:
# can change this accordingly.
USE_PRETRAINED_EMBEDDING_MATRIX = False

if USE_PRETRAINED_EMBEDDING_MATRIX:
    # download an dextract the glove embedding if they're not.
    load_pretrained_embedding_matrix()
    # load the embedding matrix.
    embedding_matrix = GloveModel("glove.840B.300d.txt", vocab)
else:
    embedding_matrix = None

In [None]:
BATCH_SIZE = 32

train_dataloader = DataLoader(dataset_train, batch_size=BATCH_SIZE,
                            shuffle=False, collate_fn=collate_batch)
val_dataloader = DataLoader(dataset_valid,
                            shuffle=False, collate_fn=collate_batch)

In [None]:
class Encoder(nn.Module):

    def __init__(self, VOCAB_SIZE, EMBEDDING_DIMENSION, num_class, ENCODER_HIDDEN_DIMENSION, DECODER_HIDDEN_DIMENSION, USE_PRETRAINED_EMBEDDING_MATRIX, embedding_matrix):
        super().__init__()
        
        self.vocab_size = VOCAB_SIZE
        self.embed_dim = EMBEDDING_DIMENSION
        
        self.encoder_hidden_dim = ENCODER_HIDDEN_DIMENSION
        self.decoder_hidden_dim = DECODER_HIDDEN_DIMENSION
        
        self.num_class = num_class

        if USE_PRETRAINED_EMBEDDING_MATRIX:
            self.vocab_size = embedding_matrix.shape[0]
            self.embed_dim = embedding_matrix.shape[1]
            
            self.embedding = nn.Embedding(num_embeddings = self.vocab_size, embedding_dim = self.embed_dim)
            self.embedding.weight=nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
        else:
            self.embedding = nn.Embedding(self.vocab_size, self.embed_dim)
        
        self.gru = nn.GRU(self.embed_dim, self.encoder_hidden_dim, bidirectional =True)

        self.attention = Attention(self.encoder_hidden_dim*2)
        
    def forward(self, text):
        
        # input shape: [MAX_LENGTH_OF_THE_SENTENCE_IN_BATCH X NUM_SENTENCES X BATCH_SIZE] 
        # or input shape: [MAX_LENGTH_OF_THE_SENTENCE_IN_BATCH X NUM_SENTENCES] 
        embedded = self.embedding(text)
        # output shape: [MAX_LENGTH_OF_THE_SENTENCE_IN_BATCH X NUM_SENTENCES X EMBEDDING_DIMENSION]
        # 2nd output: [BATCH_SIZE, MAX_LENGTH_OF_THE_SENTENCE_IN_DOC X 100]
        
        # input shape: [MAX_LENGTH_OF_THE_SENTENCE_IN_BATCH X NUM_SENTENCES x EMBEDDING_DIMENSION]
        gru_out, hidden = self.gru(embedded)
        # gru_out shape: [MAX_LENGTH_OF_THE_SENTENCE_IN_BATCH, NUM_SENTENCES, ENCODER_HIDDEN_DIMENSION*2]
        # hidden[0] shape: [1, NUM_SENTENCES, ENCODER_HIDDEN_DIMENSION]
        # hidden[1] shape: [1, NUM_SENTENCES, ENCODER_HIDDEN_DIMENSION]
        
        # 2nd gru_out shape: [BATCH_SIZE, MAX_LENGTH_OF_THE_SENTENCE_IN_DOC, ENCODER_HIDDEN_DIMENSION*2]
        # 2nd hidden shape: [BATCH_SIZE, MAX_LENGTH_OF_THE_SENTENCE_IN_DOC, ENCODER_HIDDEN_DIMENSION]
        
        # concatenate both forward and backward hidden vectors
        #hidden_f_b = torch.cat((hidden[0,:,:], hidden[1,:,:]), dim = 1)
        # output shape: [NUM_SENTENCES, ENCODER_HIDDEN_DIMENSION*2]
        

        alpha, s_i = self.attention(gru_out) # fome the diagram, it is s_i.
        
        return alpha, s_i, gru_out

class Sentence_Encoder(nn.Module):

    def __init__(self, VOCAB_SIZE, EMBEDDING_DIMENSION, num_class, ENCODER_HIDDEN_DIMENSION, DECODER_HIDDEN_DIMENSION):
        super().__init__()
        
        self.vocab_size = VOCAB_SIZE
        self.embed_dim = EMBEDDING_DIMENSION
        
        self.encoder_hidden_dim = ENCODER_HIDDEN_DIMENSION
        self.decoder_hidden_dim = DECODER_HIDDEN_DIMENSION
        
        self.num_class = num_class
        
        #self.embedding = nn.Embedding(self.vocab_size, self.embed_dim)
        
        self.gru = nn.GRU(self.encoder_hidden_dim*2, self.encoder_hidden_dim, bidirectional =True)

        self.attention = Attention(self.encoder_hidden_dim*2)

        #self.init_weights()
        
    def forward(self, word_embed):
        # input shape: [BATCH X NUM_SENTENCES x EMBEDDING_DIMENSION*2]
        gru_out, hidden = self.gru(word_embed)
        # gru_out shape: [BATCH X NUM_SENTENCES x EMBEDDING_DIMENSION*2]
        # hidden shape: [BATCH X NUM_SENTENCES x EMBEDDING_DIMENSION]
        
        # concatenate both forward and backward hidden vectors
        #hidden_f_b = torch.cat((hidden[0,:,:], hidden[1,:,:]), dim = 1)
        # output shape: [NUM_SENTENCES, ENCODER_HIDDEN_DIMENSION*2]
        
        alpha, v = self.attention(gru_out) # from the diagram, it is v.
        # output: [BATCH X 1 X ENCODER_HIDDEN_DIMENSION*2]
        return alpha, v, gru_out
      
class Attention(nn.Module):
    def __init__(self, ENCODER_HIDDEN_DIMENSION):
        super().__init__()
        
        self.encoder_hidden_dim = ENCODER_HIDDEN_DIMENSION
        
        self.linear = nn.Linear(self.encoder_hidden_dim, self.encoder_hidden_dim)
        self.context = nn.Linear(self.encoder_hidden_dim, 1)
        
    def forward(self, gru_out):
        
        # input: [MAX_LENGTH_OF_THE_SENTENCE_IN_BATCH, NUM_SENTENCES, ENCODER_HIDDEN_DIMENSION*2]
        hidden_enc = self.linear(gru_out)
        # output: [MAX_LENGTH_OF_THE_SENTENCE_IN_BATCH, NUM_SENTENCES, ENCODER_HIDDEN_DIMENSION*2]

        # 2nd output shape: [BATCH_SIZE, MAX_LENGTH_OF_THE_SENTENCE_IN_DOC, ENCODER_HIDDEN_DIMENSION*2]
    
        
        # input: [MAX_LENGTH_OF_THE_SENTENCE_IN_BATCH, NUM_SENTENCES, ENCODER_HIDDEN_DIMENSION*2]
        u = torch.tanh(hidden_enc)
        # output: [MAX_LENGTH_OF_THE_SENTENCE_IN_BATCH, NUM_SENTENCES, ENCODER_HIDDEN_DIMENSION*2]
        
        # 2nd output shape: [BATCH_SIZE, MAX_LENGTH_OF_THE_SENTENCE_IN_DOC, ENCODER_HIDDEN_DIMENSION*2]
        
        # input: [MAX_LENGTH_OF_THE_SENTENCE_IN_BATCH, NUM_SENTENCES, ENCODER_HIDDEN_DIMENSION*2]
        context_vector = self.context(u)
        # output: [MAX_LENGTH_OF_THE_SENTENCE_IN_BATCH, NUM_SENTENCES, 1]
        
        # 2nd output shape: [BATCH_SIZE, MAX_LENGTH_OF_THE_SENTENCE_IN_DOC, 1]
        
        # input: [MAX_LENGTH_OF_THE_SENTENCE_IN_BATCH, NUM_SENTENCES, 1]
        alpha = F.softmax(context_vector, dim=1)   # this needs to be send also##################
        # output: [MAX_LENGTH_OF_THE_SENTENCE_IN_BATCH, NUM_SENTENCES, 1]
        
        # 2nd output shape: [BATCH_SIZE, MAX_LENGTH_OF_THE_SENTENCE_IN_DOC, 1]
        
        alpha=alpha.permute(0, 2, 1)
        # 2nd output shape: [BATCH_SIZE, 1, MAX_LENGTH_OF_THE_SENTENCE_IN_DOC]
        
        a = alpha@gru_out  
        # 2nd output shape: [BATCH_SIZE, 1, ENCODER_HIDDEN_DIMENSION*2]
        return alpha, a
    
class HierarchicalAttentionNetwork(nn.Module):

    def __init__(self, VOCAB_SIZE, EMBEDDING_DIMENSION, num_class, ENCODER_HIDDEN_DIMENSION, DECODER_HIDDEN_DIMENSION, USE_PRETRAINED_EMBEDDING_MATRIX, embedding_matrix, device):
        super().__init__()
        
        self.vocab_size = VOCAB_SIZE
        self.embedding_size = EMBEDDING_DIMENSION
        self.num_class = num_class
        self.ENCODER_HIDDEN_DIMENSION = ENCODER_HIDDEN_DIMENSION
        self.DECODER_HIDDEN_DIMENSION = DECODER_HIDDEN_DIMENSION
        self.embedding_matrix = embedding_matrix
        self.USE_PRETRAINED_EMBEDDING_MATRIX = USE_PRETRAINED_EMBEDDING_MATRIX

        self.model = Encoder(self.vocab_size, self.embedding_size, self.num_class, self.ENCODER_HIDDEN_DIMENSION, self.DECODER_HIDDEN_DIMENSION, self.USE_PRETRAINED_EMBEDDING_MATRIX, self.embedding_matrix).to(device)
        self.sent_model = Sentence_Encoder(self.vocab_size, self.embedding_size, self.num_class, self.ENCODER_HIDDEN_DIMENSION, self.DECODER_HIDDEN_DIMENSION).to(device)
        
        self.linear = nn.Linear(self.ENCODER_HIDDEN_DIMENSION*2, self.num_class)
        
    def forward(self, text):
        
        text = text.permute(1, 0, 2)
        word_a_list, word_s_list = [], []

        # Iterate through all the sentences in every batch
        for sent in text:

            # input: [BATCH_SIZE, MAX_LENGTH_OF_THE_SENTENCE_IN_DOC]
            alpha_word, word_s, gru_out = self.model(sent)
            # output: word_s: [BATCH_SIZE, 1, ENCODER_HIDDEN_DIMENSION*2]

            word_a_list.append(alpha_word)
            word_s_list.append(word_s)

        word_s_list = torch.cat(word_s_list, dim=1)
        # output: word_s: [BATCH_SIZE, NUM_SENTENCES, ENCODER_HIDDEN_DIMENSION*2]
        
        alpha_sentence, v, gru_out_sentence = self.sent_model(word_s_list)
        # output v: # output: [BATCH X 1 X ENCODER_HIDDEN_DIMENSION*2]
        # output gru_out_sentence: [BATCH X NUM_SENTENCES x EMBEDDING_DIMENSION*2]
        
        v_output = self.linear(v)
        # v_output shape: [BATCH, 1, Num_classes]        
        
        classifier = F.softmax(v_output, dim=2).squeeze(1)
        # classifier shape: [BATCH, 1, Num_classes]        
        
        return classifier, word_a_list, alpha_sentence

In [None]:
#from datetime import datetime
def save_model(epoch, model, optimizer, train_loss_list, val_loss_list, train_accu_list, val_accu_list, path):
    # path in .pth or pt format
    #now = datetime.now()
    #current_time = now.strftime("%d/%m/%Y %H:%M:%S")
    
    # save the file
    torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'train_loss_list': train_loss_list,
                'val_loss_list': val_loss_list,
                'train_accu_list': train_accu_list,
                'val_accu_list': val_accu_list,
                }, path.format(epoch) )

In [None]:
num_class = 2
embedding_size = 100
ENCODER_HIDDEN_DIMENSION = 64
DECODER_HIDDEN_DIMENSION = 32

han_model = HierarchicalAttentionNetwork(vocab_size, embedding_size, num_class, ENCODER_HIDDEN_DIMENSION, DECODER_HIDDEN_DIMENSION).to(device)

def train(han_model, train_dataloader, val_dataloader, dataset_train, dataset_valid):
    optimizer = Adam(han_model.parameters(), 0.0001)
    loss_function = nn.CrossEntropyLoss()

    train_loss_list = []
    val_loss_list = []
    train_accu_list = []
    val_accu_list = []

    for epoch in range(0, 20):
        han_model.train()
        han_model.to(device)
        train_loss = 0
        
        accuracy = 0
        
        for idx, (text, label) in enumerate(train_dataloader):
            text = text.to(device)
            label = label.to(device)
            
            optimizer.zero_grad()

            predicted_label, alpha_word, alpha_sentence = han_model(text)

            loss = loss_function(predicted_label, label)

            loss.backward()
            optimizer.step()
            
            #prediction = predicted_label#.argmax(1)#.item()
            actual = label.reshape(-1)
            
            predicted_label = torch.argmax(predicted_label, dim=1 ) 
            accuracy += torch.eq(predicted_label, actual).sum().item()

            train_loss += loss.item()
        train_loss = train_loss / len(train_dataloader)
        accuracy = accuracy * 100.0 / len(dataset_train)

        EPOCH_VAL_ACC, EPOCH_VAL_LOSS, F1_score = evaluate(val_dataloader, han_model, dataset_valid, loss_function)

        print(f'Epoch: {epoch+1} | Train Loss: {train_loss} | Accuracy: {accuracy} | Val Accuracy: {EPOCH_VAL_ACC} | Val Loss: {EPOCH_VAL_LOSS} | F1 Score: {F1_score}')
        train_loss_list.append(train_loss)
        val_loss_list.append(EPOCH_VAL_LOSS)
        train_accu_list.append(accuracy)
        val_accu_list.append(EPOCH_VAL_ACC)
        
        # save the model.
        save_model(epoch, model, optimizer, train_loss_list, val_loss_list, train_accu_list, val_accu_list, path)
        
    return train_loss_list, val_loss_list, train_accu_list, val_accu_list


In [None]:
def evaluate(val_dataloader, model, dataset_valid, loss_function):
    model.eval()
    correct = 0
    total_count = 0

    # for f1 score
    prediction_labels = []
    actual_labels = []

    val_loss = 0

    with torch.no_grad():    
        for text, label in val_dataloader:
            text = text.to(device)
            label = label.to(device)
            
            # feed the validation text into the model, and get the probabilities.
            predicted_label, alpha_word, alpha_sentence = model(text)

            # calculate loss
            loss = loss_function(predicted_label, label)
            
            # validation accuracy
            actual = label.reshape(-1)
            predicted_label = torch.argmax(predicted_label, dim=1 ) 
            correct += torch.eq(predicted_label, actual).sum().item()

            # to cal f1 score.
            prediction_labels.append(predicted_label)
            actual_labels.append(actual)   

            # convert probabilities into 0/1.
            #predicted_label = torch.round(predicted_label).type(torch.int64)
            
            # count the number of correctly predicted labels.
            #correct += torch.eq(predicted_label, label).sum().item()
            
            # get the total length of the sentences in val_dataloader
            #total_count += label.size(0)

            val_loss += loss.item()
        val_loss = val_loss / len(val_dataloader)

        # convert unequal length of lists of tensors to on single tensors.
        #actual_labels = torch.flatten(torch.stack(actual_labels)) 
        actual_labels = torch.cat(actual_labels).to('cpu')
        #prediction_labels = torch.flatten(torch.stack(prediction_labels)) 
        prediction_labels = torch.cat(prediction_labels).to('cpu')

        F1_score = f1_score(actual_labels, prediction_labels)
        
    
    # returns the accuracy of the model
    return correct * 100.0 / len(dataset_valid), val_loss, F1_score

In [None]:
train_loss_list, val_loss_list, train_accu_list, val_accu_list = train(han_model, train_dataloader, val_dataloader, dataset_train, dataset_valid)

In [None]:
loss_function = nn.CrossEntropyLoss()
TEST_ACC, TEST_LOSS, F1_score = evaluate(test_dataloader, han_model, loss_function = nn.CrossEntropyLoss())
print("The test accuracy is: {:.2f}%".format(TEST_ACC))
print("F1 Score on Test data is: {:.2f}".format(F1_score))
print("Loss on Test Data is: {:.2f}".format(TEST_LOSS))

In [None]:
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
fig, (ax1) = plt.subplots(1, 2, figsize=(14, 6))

fig.suptitle('Loss and accuracy for HAN Model.')

# accuracy Plot
train_accu, = ax1[0].plot(range(1, 21), train_accu_list, label="Training Accuracy")  
val_accu, = ax1[0].plot(range(1, 21), val_accu_list, label="Validation Accuracy")  

ax1[0].legend(handles=[train_accu, val_accu])
ax1[0].set_xlabel("Epochs")
ax1[0].set_ylabel("Accuracy")
ax1[0].set_title("Accuracy for every Epochs")
ax1[0].set_xticks(range(1, 21))

train_loss, = ax1[1].plot(range(1, 21), train_loss_list, label="Training Loss")  
val_loss, = ax1[1].plot(range(1, 21), val_loss_list, label="Validation Loss")  

ax1[1].legend(handles=[train_loss, val_loss])
ax1[1].set_xlabel("Epochs")
ax1[1].set_ylabel("Loss")
ax1[1].set_title("Loss for every Epochs")
ax1[1].set_xticks(range(1, 21))

# do not need the third plot.
#fig.delaxes(ax2[1])

plt.show()