In [1]:
import pickle
import torch.nn as nn
import torch.optim as optim
import torch
import numpy as np
import random

from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

In [2]:
with open("data.pkl",'rb') as fp:
    data = pickle.load(fp)

random.shuffle(data)
train_data, test_data = train_test_split(data, test_size=0.2)
print(len(data))
print(len(train_data))
print(len(test_data))

5808
4646
1162


In [3]:
def create_tensors(data_list):
    # create tensor that is compatible to load and train in the language model
    ds = {}
    keys = ['skills','subtests','questions','answers','y']
    for key in keys:
        ds[key] = []
    
    for entry in data_list:
        ds['skills'].append(entry[0])
        ds['subtests'].append(entry[1])
        ds['questions'].append(entry[2])
        ds['answers'].append(entry[3])
        ds['y'].append(entry[4])
    
    ds['skills'] = torch.tensor(ds['skills']).type(torch.float)
    ds['subtests'] = torch.tensor(ds['subtests'])
    ds['questions'] = torch.tensor(ds['questions'])
    ds['answers'] = torch.tensor(ds['answers'])
    ds['y'] = torch.tensor(ds['y']).type(torch.float)

    return ds

In [4]:
# skills = []
# subtests = []
# questions = []
# answers = []
# y = []
# for entry in data:
#     skills.append(entry[0])
#     subtests.append(entry[1])
#     questions.append(entry[2])
#     answers.append(entry[3])
#     y.append(entry[4])

# skills = torch.tensor(skills).type(torch.float)
# subtests = torch.tensor(subtests)
# questions = torch.tensor(questions)
# answers = torch.tensor(answers)
# y = torch.tensor(y).type(torch.float)
# # print((y[4000:]==0).sum())

In [5]:
class BertModel(nn.Module):
    def __init__(self, sentence_dim, skill_dim, dropout):
        super().__init__()
        self.fc_test = nn.Linear(768,sentence_dim)
        self.fc_question = nn.Linear(768,sentence_dim)
        self.fc_answer = nn.Linear(768,sentence_dim)
        self.fc_skill = nn.Linear(skill_dim,skill_dim*2)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.fc2 = nn.Linear(3*sentence_dim+skill_dim*2,128)
        self.out = nn.Linear(128,1)
        self.sig = nn.Sigmoid()
    
    def forward(self, skills,test,question,answer):
        x1 = self.fc_skill(skills)
        x2 = self.fc_test(test)
        x3 = self.fc_question(question)
        x4 = self.fc_answer(answer)
        x = torch.cat((x1,x2,x3,x4),dim=1)
        x = self.fc2(self.relu(x))
        pred = self.out(x)
        
        return pred

In [6]:
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum() / len(correct)
    return acc

In [7]:
def confusion_matrix(preds, Y):
    rounded_preds = torch.round(preds.sigmoid())
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    for i, value in enumerate(rounded_preds):
        if value == Y[i] and value == 1:
            TP += 1
        elif value == Y[i] and value == 0:
            TN += 1
        elif value != Y[i] and value == 0:
            FN += 1
        elif value != Y[i] and value == 1:
            FP += 1
        else:
            print(value,Y[i])
    print(f'TP: {TP}\tFN: {FN}')
    print(f'FP: {FP}\tTN: {TN}')

In [8]:

def precision(preds,y):
    rounded_preds = torch.round(preds.sigmoid())
#     print((rounded_preds==1).sum())
#     print((y==1).sum())
    return precision_score(y,rounded_preds)

def recall(preds,y):
    rounded_preds = torch.round(preds.sigmoid())
    return recall_score(y,rounded_preds)
    

In [11]:
def train(model, data, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
        
    optimizer.zero_grad()
    
    predictions = model(data['skills'],data['subtests'],data['questions'],data['answers']).squeeze(1)

    loss = criterion(predictions, data['y'])
    

    acc = binary_accuracy(predictions, data['y'])
    

    loss.backward()

    optimizer.step()

#     epoch_loss += loss.item()
#     epoch_acc += acc.item()
#     print(loss)
#     print(acc)
    return loss,acc 


In [12]:
def evaluate(model, data, criterion, matrix=False):
    
    model.eval()
    
    with torch.no_grad():

        predictions = model(data['skills'],data['subtests'],data['questions'],data['answers']).squeeze(1)
        
#         print('eval pred',predictions)

        loss = criterion(predictions, data['y'])
#         print('eval1',data[4])
#         print('eval',(data[4]==0).sum())
        acc = binary_accuracy(predictions, data['y'])
        
        prec = precision(predictions, data['y'])
        
        rec = recall(predictions, data['y'])
        
        confusion_matrix(predictions, data['y'])
        
#         print(f"Number of positives: {(data[4]==1).sum()}")
#         print(f"Number of negatives: {(data[4]==0).sum()}")

        
    return loss, acc, prec, rec

In [13]:
max_epochs = 400
ep_log_interval = 25
lrn_rate = 0.002
sentence_dim = 128
skill_dim = 9
dropout = 0.1

net = BertModel(sentence_dim, skill_dim, dropout)
criterion = nn.BCEWithLogitsLoss() #BCEWithLogitsLoss()
optimizer = optim.Adam(net.parameters(), lr=lrn_rate)
train_ds = create_tensors(train_data)
test_ds = create_tensors(test_data)

for epoch in range(max_epochs):
    train_loss, train_acc = train(net,train_ds,optimizer,criterion)

    if epoch % ep_log_interval == 0:
        valid_loss, valid_acc, prec, rec = evaluate(net,test_ds,criterion,matrix=True)
        print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
        print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')
        print('precision: ', prec)
        print('recall: ', rec)
        try:
            print(f"f1 = {2*(prec*rec)/(prec+rec)}")
        except:
            continue

  _warn_prf(average, modifier, msg_start, len(result))
  print(f"f1 = {2*(prec*rec)/(prec+rec)}")


TP: 0	FN: 672
FP: 0	TN: 490
	Train Loss: 0.671 | Train Acc: 57.04%
	 Val. Loss: 1.810 |  Val. Acc: 42.17%
precision:  0.0
recall:  0.0
f1 = nan
TP: 573	FN: 99
FP: 276	TN: 214
	Train Loss: 0.632 | Train Acc: 65.02%
	 Val. Loss: 0.623 |  Val. Acc: 67.73%
precision:  0.6749116607773852
recall:  0.8526785714285714
f1 = 0.7534516765285996
TP: 541	FN: 131
FP: 250	TN: 240
	Train Loss: 0.599 | Train Acc: 68.02%
	 Val. Loss: 0.607 |  Val. Acc: 67.21%
precision:  0.683944374209861
recall:  0.8050595238095238
f1 = 0.7395762132604239
TP: 517	FN: 155
FP: 202	TN: 288
	Train Loss: 0.588 | Train Acc: 69.29%
	 Val. Loss: 0.598 |  Val. Acc: 69.28%
precision:  0.7190542420027817
recall:  0.7693452380952381
f1 = 0.7433501078360892
TP: 533	FN: 139
FP: 208	TN: 282
	Train Loss: 0.580 | Train Acc: 69.48%
	 Val. Loss: 0.590 |  Val. Acc: 70.14%
precision:  0.7192982456140351
recall:  0.7931547619047619
f1 = 0.754423213021939
TP: 531	FN: 141
FP: 206	TN: 284
	Train Loss: 0.576 | Train Acc: 69.72%
	 Val. Loss: 0.5