In [2]:
from EduKTM import DKT
import torch
import torch.utils.data as Data
import numpy as np
import logging
import tqdm
from sklearn.metrics import roc_auc_score

In [3]:
NUM_QUESTIONS = 512
BATCH_SIZE = 64
HIDDEN_SIZE = 10
NUM_LAYERS = 1

def get_data_loader(data_path, batch_size, shuffle=False):
    data = torch.FloatTensor(np.load(data_path))
    data_loader = Data.DataLoader(data, batch_size=batch_size, shuffle=shuffle)
    return data_loader

In [4]:
train_loader = get_data_loader(
    './train_data.npy', BATCH_SIZE, True)
test_loader = get_data_loader(
    './test_data.npy', BATCH_SIZE, False)

logging.getLogger().setLevel(logging.INFO)

dkt = DKT(NUM_QUESTIONS, HIDDEN_SIZE, NUM_LAYERS)

In [5]:
dkt = DKT(NUM_QUESTIONS, HIDDEN_SIZE, NUM_LAYERS)
dkt.train(train_loader, epoch=20)
dkt.save("dkt.params")


Epoch 0: 100%|██████████| 17/17 [00:01<00:00,  8.81it/s]


[Epoch 0] LogisticLoss: 42.961213


Epoch 1: 100%|██████████| 17/17 [00:01<00:00,  9.76it/s]


[Epoch 1] LogisticLoss: 41.452244


Epoch 2: 100%|██████████| 17/17 [00:01<00:00, 11.18it/s]


[Epoch 2] LogisticLoss: 39.596258


Epoch 3: 100%|██████████| 17/17 [00:01<00:00, 10.42it/s]


[Epoch 3] LogisticLoss: 37.594331


Epoch 4: 100%|██████████| 17/17 [00:01<00:00, 11.45it/s]


[Epoch 4] LogisticLoss: 36.126020


Epoch 5: 100%|██████████| 17/17 [00:01<00:00, 11.33it/s]


[Epoch 5] LogisticLoss: 35.217559


Epoch 6: 100%|██████████| 17/17 [00:01<00:00, 10.63it/s]


[Epoch 6] LogisticLoss: 34.582698


Epoch 7: 100%|██████████| 17/17 [00:01<00:00,  9.27it/s]


[Epoch 7] LogisticLoss: 33.852721


Epoch 8: 100%|██████████| 17/17 [00:01<00:00, 11.62it/s]


[Epoch 8] LogisticLoss: 31.726310


Epoch 9: 100%|██████████| 17/17 [00:01<00:00,  9.84it/s]


[Epoch 9] LogisticLoss: 29.656526


Epoch 10: 100%|██████████| 17/17 [00:01<00:00, 11.88it/s]


[Epoch 10] LogisticLoss: 27.684287


Epoch 11: 100%|██████████| 17/17 [00:01<00:00, 11.50it/s]


[Epoch 11] LogisticLoss: 25.920891


Epoch 12: 100%|██████████| 17/17 [00:01<00:00, 12.03it/s]


[Epoch 12] LogisticLoss: 24.572429


Epoch 13: 100%|██████████| 17/17 [00:01<00:00, 11.97it/s]


[Epoch 13] LogisticLoss: 23.552952


Epoch 14: 100%|██████████| 17/17 [00:01<00:00, 11.59it/s]


[Epoch 14] LogisticLoss: 22.700881


Epoch 15: 100%|██████████| 17/17 [00:01<00:00, 12.21it/s]


[Epoch 15] LogisticLoss: 21.897940


Epoch 16: 100%|██████████| 17/17 [00:01<00:00, 11.76it/s]


[Epoch 16] LogisticLoss: 21.176840


Epoch 17: 100%|██████████| 17/17 [00:01<00:00, 11.14it/s]


[Epoch 17] LogisticLoss: 20.532189


Epoch 18: 100%|██████████| 17/17 [00:01<00:00, 11.13it/s]


[Epoch 18] LogisticLoss: 19.970276


Epoch 19: 100%|██████████| 17/17 [00:01<00:00, 11.44it/s]
INFO:root:save parameters to dkt.params


[Epoch 19] LogisticLoss: 19.481976


In [6]:
dkt.load("dkt.params")
auc = dkt.eval(test_loader)
print("auc: %.6f" % auc)

  self.dkt_model.load_state_dict(torch.load(filepath))
INFO:root:load parameters from dkt.params
evaluating: 100%|██████████| 8/8 [00:00<00:00, 36.74it/s]

auc: 0.922298





In [7]:
def process_raw_pred(raw_question_matrix, raw_pred, num_questions: int) -> tuple:
    questions = torch.nonzero(raw_question_matrix)[1:, 1] % num_questions
    length = questions.shape[0]
    pred = raw_pred[: length]
    pred = pred.gather(1, questions.view(-1, 1)).flatten()
    truth = torch.nonzero(raw_question_matrix)[1:, 1] // num_questions
    
    return pred, truth

def eval(model, test_data) -> float:
    model.eval()
    y_pred = torch.Tensor([])
    y_truth = torch.Tensor([])
    for batch in tqdm.tqdm(test_data, "evaluating"):
        integrated_pred = model(batch)
        batch_size = batch.shape[0]
        for student in range(batch_size):
            pred, truth = process_raw_pred(batch[student], integrated_pred[student], NUM_QUESTIONS)
            y_pred = torch.cat([y_pred, pred])
            y_truth = torch.cat([y_truth, truth])
    return [y_pred, y_truth]

In [8]:
model = dkt.dkt_model

y_pred, y_truth = eval(model, test_loader)

print(y_pred[:10])
print(y_truth.shape)

y_pred_binary = (y_pred >= 0.5).float()
print("Binary predictions:", y_pred_binary)

print("Accuracy:", torch.sum(y_pred_binary == y_truth)/len(y_truth))
print("y_pred_binary shape", y_pred_binary.shape)
roc_auc_score(y_truth.detach().numpy(), y_pred.detach().numpy())

evaluating: 100%|██████████| 8/8 [00:00<00:00, 32.75it/s]

tensor([0.0957, 0.0515, 0.0477, 0.1115, 0.0554, 0.0382, 0.1549, 0.6931, 0.3757,
        0.3828], grad_fn=<SliceBackward0>)
torch.Size([13050])
Binary predictions: tensor([0., 0., 0.,  ..., 1., 1., 1.])
Accuracy: tensor(0.8579)
y_pred_binary shape torch.Size([13050])





np.float64(0.9222978826273354)

In [9]:
model = dkt.dkt_model

first_batch = next(iter(test_loader))
first_batch_element = first_batch[0]
first_batch_size_1 = first_batch_element.unsqueeze(0)
model.eval()

pred = model(first_batch_size_1)

print(pred.shape)

round = torch.round(pred)

torch.save(round, 'round_predictions.pt')

last_pred = pred[0, -1, :]

print(pred)

# new_dataset = first_batch_element.unsqueeze(0)

# new_test_loader = Data.DataLoader(new_dataset, batch_size=1, shuffle=False)
# print(new_test_loader.dataset.shape)
# pred_y, true_y = eval(model, new_test_loader)
# print("Predictions:", pred_y)
# print("Ground truth:", true_y)
# print("ROC AUC:", roc_auc_score(true_y.detach().numpy(), pred_y.detach().numpy()))


torch.Size([1, 30, 512])
tensor([[[0.3820, 0.5953, 0.4590,  ..., 0.3167, 0.6619, 0.5179],
         [0.3185, 0.6817, 0.4508,  ..., 0.2351, 0.7373, 0.4988],
         [0.2903, 0.6939, 0.5015,  ..., 0.2341, 0.7832, 0.4544],
         ...,
         [0.2656, 0.7330, 0.5728,  ..., 0.1759, 0.8425, 0.4089],
         [0.2656, 0.7308, 0.5700,  ..., 0.1802, 0.8428, 0.4232],
         [0.2679, 0.7281, 0.5724,  ..., 0.1816, 0.8452, 0.4193]]],
       grad_fn=<SigmoidBackward0>)
