# demo2
## Performance on Recommendation scores

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data
import torchvision
import matplotlib.pyplot as plt
%matplotlib inline
import Dina
from Dina import McmcHoDina
from Dina import MlDina
import pandas as pd
from utils import r4beta
from sklearn.model_selection import KFold
from sklearn import metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import progressbar
from transformer import MyDataSet, Transformer

In [3]:
def load_data(path, ratio):
    full_data = pd.read_csv(path + 'data.txt', header=None, sep='\t').values
    q_matrix = pd.read_csv(path + 'q.txt', header=None, sep='\t').values
    num_samples, num_items, num_skills = full_data.shape[0], full_data.shape[1], q_matrix.shape[1]
    
    train_data = full_data[ : int(num_samples * ratio)]
    train_data = binary_classify(train_data)
    test_data = full_data[int(num_samples * ratio): ]
    test_data = binary_classify(test_data)
    
    return train_data, test_data, q_matrix, num_samples, num_items, num_skills

def binary_classify(data):
    data[data <= 0.5] = 0
    data[data > 0.5] = 1
    return data.astype(np.int64)

def get_scores(pred_scores, true_scores):

    fpr, tpr, thresholds = metrics.roc_curve(true_scores, pred_scores)
    # accuracy, precision, recall, f1
    accuracy = accuracy_score(true_scores, pred_scores)
    precision = precision_score(true_scores, pred_scores)
    recall = recall_score(true_scores, pred_scores)
    f1 = f1_score(true_scores, pred_scores)
    
    roc_auc = roc_auc_score(true_scores, pred_scores)

    return accuracy, precision, recall, f1, fpr, tpr, thresholds, roc_auc

In [5]:
def get_prior_skills(x, y, q_matrix):
    guess = r4beta(1, 2, 0, 0.6, (1, x.shape[1]))
    slip = r4beta(1, 2, 0, 0.6, (1, x.shape[1]))
    max_iter = 100
    tol = 1e-5
    EmDina = Dina.EmDina(guess, slip, max_iter, tol, q_matrix, x)
    est_s, est_g = EmDina.em()
    dina_est = Dina.MlDina(est_g, est_s, q_matrix, y)
    est_skills = dina_est.get_skills_by_Ml()
    return est_skills

In [6]:
train_data1, test_data1, q_matrix1, num_samples1, num_items1, num_skills1 = load_data('./FrcSub/', 0.8)
train_data2, test_data2, q_matrix2, num_samples2, num_items2, num_skills2 = load_data('./Math1/', 0.8)
train_data3, test_data3, q_matrix3, num_samples3, num_items3, num_skills3 = load_data('./Math2/', 0.8)

In [43]:
x_train, y_train, q_matrix, num_samples, num_items, num_skills = train_data1, train_data1, q_matrix1, num_samples1, num_items1, num_skills1
x_valid, y_valid = test_data1, test_data1

src_len = num_items # enc_input max sequence length
tgt_len = num_items # dec_input(=dec_output) max sequence length

est_skills = get_prior_skills(x_train, y_train, q_matrix)

enc_inputs = torch.LongTensor(x_train)
dec_inputs = torch.LongTensor(est_skills)
dec_outputs = torch.FloatTensor(y_train)
batch_size = int(num_samples / 50)
print(batch_size)

10


In [40]:
model = Transformer(num_items, num_skills, d_model=512, d_ff=2048, d_k=64, d_v=64, n_layers=6, n_heads=8, batch_size=10)

In [17]:
%%time
outputs, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs, dec_inputs)

CPU times: user 1min 4s, sys: 21.4 s, total: 1min 26s
Wall time: 1min 18s


In [23]:
model = Transformer(num_items, num_skills, d_model=2, d_ff=64, d_k=2, d_v=2, n_layers=3, n_heads=8, batch_size=50)

In [24]:
%%time
outputs, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs, dec_inputs)

CPU times: user 1.58 s, sys: 1.79 s, total: 3.37 s
Wall time: 2.34 s


In [25]:
model = Transformer(num_items, num_skills, d_model=512, d_ff=64, d_k=2, d_v=2, n_layers=3, n_heads=8, batch_size=50)

In [26]:
%%time
outputs, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs, dec_inputs)

CPU times: user 3.38 s, sys: 3.26 s, total: 6.64 s
Wall time: 3.12 s


In [27]:
model = Transformer(num_items, num_skills, d_model=512, d_ff=64, d_k=32, d_v=32, n_layers=3, n_heads=8, batch_size=50)

In [28]:
%%time
outputs, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs, dec_inputs)

CPU times: user 10.1 s, sys: 5.33 s, total: 15.4 s
Wall time: 14.5 s


In [30]:
model = Transformer(num_items, num_skills, d_model=512, d_ff=64, d_k=16, d_v=16, n_layers=3, n_heads=8, batch_size=50)

In [31]:
%%time
outputs, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs, dec_inputs)

CPU times: user 6.6 s, sys: 4.55 s, total: 11.2 s
Wall time: 9.35 s


### Considering forward time, we choose the last model as our demo

In [48]:
loader = Data.DataLoader(MyDataSet(enc_inputs, dec_inputs, dec_outputs), batch_size, True)
model = Transformer(num_items, num_skills, d_model=512, d_ff=64, d_k=16, d_v=16, n_layers=3, n_heads=8, batch_size=batch_size)
loss_func = nn.MSELoss()
#optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer =optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

loss_his = []
load = 1
for epoch in range(50):

    for enc_inputs_batch, dec_inputs_batch, dec_outputs_batch in loader:

        outputs_batch, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs_batch, dec_inputs_batch)

        #print(outputs.shape, dec_outputs.shape) # outputs[400, 20]: 400 = 50(batch_size) * 8(tgt_len), 20(tgt_vocab_size) # dec_outputs[50, 8]: [batch_size, tgt_len]
        #break
        loss = loss_func(outputs_batch, dec_outputs_batch)
        loss_his.append(loss.data.item())
        #print(loss.dtype)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print('Epoch:', '%04d' % (epoch + 1), 'loss =', '{:.6f}'.format(loss))

# test on validation dataset:
with torch.no_grad():
    est_skills = get_prior_skills(x_valid, y_valid, q_matrix)
    enc_inputs = torch.LongTensor(x_valid)
    dec_inputs = torch.LongTensor(est_skills)
    dec_outputs = torch.FloatTensor(y_valid)
    outputs, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs, dec_inputs)
    predict = outputs.detach().numpy()
    predict[predict > 0.5] = 1
    predict[predict <= 0.5] = 0
    predict = predict.astype(np.int64)
    pred = predict.reshape(-1)
    true = y_valid.reshape(-1)


accuracy, precision, recall, f1, fpr, tpr, thresholds, roc_auc = get_scores(pred, true)
print("Accuracy: {:4.6f} | Precision: {:4.6f} | Recall: {:4.6f} | F1: {:4.6f} | AUC: {:4.6f}".format(accuracy, precision, recall, f1, roc_auc))

Epoch: 0001 loss = 0.528552
Epoch: 0002 loss = 0.271489
Epoch: 0003 loss = 0.246579
Epoch: 0004 loss = 0.212890
Epoch: 0005 loss = 0.175607
Epoch: 0006 loss = 0.122475
Epoch: 0007 loss = 0.163708
Epoch: 0008 loss = 0.112872
Epoch: 0009 loss = 0.086000
Epoch: 0010 loss = 0.101872
Epoch: 0011 loss = 0.089428
Epoch: 0012 loss = 0.146569
Epoch: 0013 loss = 0.111729
Epoch: 0014 loss = 0.108395
Epoch: 0015 loss = 0.151216
Epoch: 0016 loss = 0.093755
Epoch: 0017 loss = 0.089184
Epoch: 0018 loss = 0.116921
Epoch: 0019 loss = 0.096985
Epoch: 0020 loss = 0.131669
Epoch: 0021 loss = 0.097061
Epoch: 0022 loss = 0.112368
Epoch: 0023 loss = 0.110377
Epoch: 0024 loss = 0.078567
Epoch: 0025 loss = 0.106326
Epoch: 0026 loss = 0.110957
Epoch: 0027 loss = 0.086605
Epoch: 0028 loss = 0.098845
Epoch: 0029 loss = 0.085907
Epoch: 0030 loss = 0.098897
Epoch: 0031 loss = 0.124780
Epoch: 0032 loss = 0.093945
Epoch: 0033 loss = 0.133154
Epoch: 0034 loss = 0.138789
Epoch: 0035 loss = 0.093397
Epoch: 0036 loss = 0