# DINA model scores on recommendation

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision
import matplotlib.pyplot as plt
%matplotlib inline
import Dina
from Dina import McmcHoDina
from Dina import MlDina
import pandas as pd
from utils import r4beta
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
import progressbar

In [2]:
train_scores = pd.read_csv('./FrcSub/data.txt', header=None, sep='\t').values
Qmatrix = pd.read_csv('./FrcSub/q.txt', header=None, sep='\t').values

In [3]:
guess = r4beta(1, 2, 0, 0.6, (1, train_scores.shape[1]))
slip = r4beta(1, 2, 0, 0.6, (1, train_scores.shape[1]))
max_iter = 100
tol = 1e-5
EmDina = Dina.EmDina(guess, slip, max_iter, tol, Qmatrix, train_scores)
est_s, est_g = EmDina.em()
dina_est = Dina.MlDina(est_g, est_s, Qmatrix, train_scores)
est_skills = dina_est.get_skills_by_Ml()
eta = EmDina.get_eta(est_skills)
predict = EmDina.get_P_success(eta, est_s, est_g)
predict[predict <= 0.5] = 0
predict[predict > 0.5] = 1
pred = predict.astype('int64').reshape(-1)
true = train_scores.reshape(-1)

In [4]:
%matplotlib inline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
def get_scores(pred_scores, true_scores):

    fpr, tpr, thresholds = metrics.roc_curve(true_scores, pred_scores)
    # accuracy, precision, recall, f1
    accuracy = accuracy_score(true_scores, pred_scores)
    precision = precision_score(true_scores, pred_scores)
    recall = recall_score(true_scores, pred_scores)
    f1 = f1_score(true_scores, pred_scores)
    
    roc_auc = roc_auc_score(true_scores, pred_scores)

    return accuracy, precision, recall, f1, fpr, tpr, thresholds, roc_auc

In [5]:
accuracy, precision, recall, f1, fpr, tpr, thresholds, roc_auc = get_scores(pred, true)
print("Accuracy: {:4.6f} | Precision: {:4.6f} | Recall: {:4.6f} | F1: {:4.6f} | AUC: {:4.6f}".format(accuracy, precision, recall, f1, roc_auc))

Accuracy: 0.888993 | Precision: 0.887105 | Recall: 0.907566 | F1: 0.897219 | AUC: 0.887643


# Ho-DINA model scores on recommendation

In [6]:
HoDina = McmcHoDina(Q=Qmatrix, X=train_scores, max_iter=10000, burn=8000)
est_lam0, est_lam1, est_theta, est_skills, est_s, est_g = HoDina.mcmc()

100% |########################################################################|


In [8]:
eta = HoDina.get_eta(est_skills)
predict = HoDina.get_P_success(eta, est_s, est_g)
predict[predict <= 0.5] = 0
predict[predict > 0.5] = 1
pred = predict.astype('int64').reshape(-1)
true = train_scores.reshape(-1)

In [9]:
accuracy, precision, recall, f1, fpr, tpr, thresholds, roc_auc = get_scores(pred, true)
print("Accuracy: {:4.6f} | Precision: {:4.6f} | Recall: {:4.6f} | F1: {:4.6f} | AUC: {:4.6f}".format(accuracy, precision, recall, f1, roc_auc))

Accuracy: 0.818563 | Precision: 0.923163 | Recall: 0.720077 | F1: 0.809070 | AUC: 0.825718


# transformer model scores on recommendation

In [12]:
import numpy as np
import torch
import torch.nn as nn
import torchvision
import torch.optim as optim
import torch.utils.data as Data
import matplotlib.pyplot as plt
%matplotlib inline
import Dina
from Dina import McmcHoDina
from Dina import MlDina
import pandas as pd
from utils import r4beta
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
import progressbar

In [13]:
train_scores = pd.read_csv('./FrcSub/data.txt', header=None, sep='\t').values
Qmatrix = pd.read_csv('./FrcSub/q.txt', header=None, sep='\t').values

In [14]:
# Padding Should be Zero
X = train_scores
Q = Qmatrix
num_item = X.shape[1]
num_skills = Q.shape[1]

src_len = num_item # enc_input max sequence length
tgt_len = num_item # dec_input(=dec_output) max sequence length

# Transformer Parameters
d_model = 512  # Embedding Size
d_ff = 2048 # FeedForward dimension
d_k = d_v = 64  # dimension of K(=Q), V
n_layers = 6  # number of Encoder of Decoder Layer
n_heads = 8  # number of heads in Multi-Head Attention
batch_size = 10

In [22]:
def get_attn_pad_mask(seq_q, seq_k):
    '''
    seq_q: [batch_size, seq_len]
    seq_k: [batch_size, seq_len]
    seq_len could be src_len or it could be tgt_len
    seq_len in seq_q and seq_len in seq_k maybe not equal
    '''
    batch_size, len_q = seq_q.size()
    batch_size, len_k = seq_k.size()
    # eq(zero) is PAD token
    pad_attn_mask = seq_k.data.eq(0).unsqueeze(1)  # [batch_size, 1, len_k], False is masked
    return pad_attn_mask.expand(batch_size, len_q, len_k)  # [batch_size, len_q, len_k]

def get_attn_subsequence_mask(seq):
    '''
    seq: [batch_size, tgt_len]
    '''
    attn_shape = [seq.size(0), seq.size(1), seq.size(1)]
    subsequence_mask = np.triu(np.ones(attn_shape), k=1) # Upper triangular matrix
    subsequence_mask = torch.from_numpy(subsequence_mask).byte()
    return subsequence_mask # [batch_size, tgt_len, tgt_len]

class ScaledDotProductAttention(nn.Module):
    def __init__(self):
        super(ScaledDotProductAttention, self).__init__()

    def forward(self, Q, K, V, attn_mask):
        '''
        Q: [batch_size, n_heads, len_q, d_k]
        K: [batch_size, n_heads, len_k, d_k]
        V: [batch_size, n_heads, len_v(=len_k), d_v]
        attn_mask: [batch_size, n_heads, seq_len, seq_len]
        '''
        scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size, n_heads, len_q, len_k]
        scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is True.
        
        attn = nn.Softmax(dim=-1)(scores)
        context = torch.matmul(attn, V) # [batch_size, n_heads, len_q, d_v]
        return context, attn

class MultiHeadAttention(nn.Module):
    def __init__(self):
        super(MultiHeadAttention, self).__init__()
        self.W_Q = nn.Linear(d_model, d_k * n_heads, bias=False)
        self.W_K = nn.Linear(d_model, d_k * n_heads, bias=False)
        self.W_V = nn.Linear(d_model, d_v * n_heads, bias=False)
        self.fc = nn.Linear(n_heads * d_v, d_model, bias=False)
    def forward(self, input_Q, input_K, input_V, attn_mask):
        '''
        input_Q: [batch_size, len_q, d_model]
        input_K: [batch_size, len_k, d_model]
        input_V: [batch_size, len_v(=len_k), d_model]
        attn_mask: [batch_size, seq_len, seq_len]
        '''
        residual, batch_size = input_Q, input_Q.size(0)
        # (B, S, D) -proj-> (B, S, D_new) -split-> (B, S, H, W) -trans-> (B, H, S, W)
        Q = self.W_Q(input_Q).view(batch_size, -1, n_heads, d_k).transpose(1,2)  # Q: [batch_size, n_heads, len_q, d_k]
        K = self.W_K(input_K).view(batch_size, -1, n_heads, d_k).transpose(1,2)  # K: [batch_size, n_heads, len_k, d_k]
        V = self.W_V(input_V).view(batch_size, -1, n_heads, d_v).transpose(1,2)  # V: [batch_size, n_heads, len_v(=len_k), d_v]

        attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1) # attn_mask : [batch_size, n_heads, seq_len, seq_len]

        # context: [batch_size, n_heads, len_q, d_v], attn: [batch_size, n_heads, len_q, len_k]
        context, attn = ScaledDotProductAttention()(Q, K, V, attn_mask)
        context = context.transpose(1, 2).reshape(batch_size, -1, n_heads * d_v) # context: [batch_size, len_q, n_heads * d_v]
        output = self.fc(context) # [batch_size, len_q, d_model]
        return nn.LayerNorm(d_model)(output + residual), attn

class PoswiseFeedForwardNet(nn.Module):
    def __init__(self):
        super(PoswiseFeedForwardNet, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(d_model, d_ff, bias=False),
            nn.ReLU(),
            nn.Linear(d_ff, d_model, bias=False)
        )
    def forward(self, inputs):
        '''
        inputs: [batch_size, seq_len, d_model]
        '''
        residual = inputs
        output = self.fc(inputs)
        return nn.LayerNorm(d_model)(output + residual) # [batch_size, seq_len, d_model]

class EncoderLayer(nn.Module):
    def __init__(self):
        super(EncoderLayer, self).__init__()
        self.enc_self_attn = MultiHeadAttention()
        self.pos_ffn = PoswiseFeedForwardNet()

    def forward(self, enc_inputs, enc_self_attn_mask):
        '''
        enc_inputs: [batch_size, src_len, d_model]
        enc_self_attn_mask: [batch_size, src_len, src_len]
        '''
        # enc_outputs: [batch_size, src_len, d_model], attn: [batch_size, n_heads, src_len, src_len]
        enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,V
        enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size, src_len, d_model]
        return enc_outputs, attn

class DecoderLayer(nn.Module):
    def __init__(self):
        super(DecoderLayer, self).__init__()
        self.dec_self_attn = MultiHeadAttention()
        self.dec_enc_attn = MultiHeadAttention()
        self.pos_ffn = PoswiseFeedForwardNet()

    def forward(self, dec_inputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask):
        '''
        dec_inputs: [batch_size, tgt_len, d_model]
        enc_outputs: [batch_size, src_len, d_model]
        dec_self_attn_mask: [batch_size, tgt_len, tgt_len]
        dec_enc_attn_mask: [batch_size, tgt_len, src_len]
        '''
        # dec_outputs: [batch_size, tgt_len, d_model], dec_self_attn: [batch_size, n_heads, tgt_len, tgt_len]
        dec_outputs, dec_self_attn = self.dec_self_attn(dec_inputs, dec_inputs, dec_inputs, dec_self_attn_mask)
        # dec_outputs: [batch_size, tgt_len, d_model], dec_enc_attn: [batch_size, h_heads, tgt_len, src_len]
        dec_outputs, dec_enc_attn = self.dec_enc_attn(dec_outputs, enc_outputs, enc_outputs, dec_enc_attn_mask)
        dec_outputs = self.pos_ffn(dec_outputs) # [batch_size, tgt_len, d_model]
        return dec_outputs, dec_self_attn, dec_enc_attn

class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.src_emb = nn.Embedding(num_item, d_model)
        #self.pos_emb = PositionalEncoding(d_model)
        self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)])

    def forward(self, enc_inputs):
        '''
        enc_inputs: [batch_size, src_len]
        '''
        #print("c1:", enc_inputs.shape)
        enc_outputs = self.src_emb(enc_inputs) # [batch_size, src_len, d_model]
        #print("c2:", enc_outputs.shape)
        #print("cut1:", enc_outputs.shape)
        #enc_outputs = self.pos_emb(enc_outputs.transpose(0, 1)).transpose(0, 1) # [batch_size, src_len, d_model]
        #print("cut2:", enc_outputs.shape)
        enc_self_attn_mask = get_attn_pad_mask(enc_inputs, enc_inputs) # [batch_size, src_len, src_len]
        enc_self_attns = []
        for layer in self.layers:
            # enc_outputs: [batch_size, src_len, d_model], enc_self_attn: [batch_size, n_heads, src_len, src_len]
            enc_outputs, enc_self_attn = layer(enc_outputs, enc_self_attn_mask)
            enc_self_attns.append(enc_self_attn)
        return enc_outputs, enc_self_attns

class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        self.tgt_emb = nn.Embedding(num_item, d_model)
        #self.pos_emb = PositionalEncoding(d_model)
        self.layers = nn.ModuleList([DecoderLayer() for _ in range(n_layers)])

    def forward(self, dec_inputs, enc_inputs, enc_outputs):
        '''
        dec_inputs: [batch_size, tgt_len]
        enc_intpus: [batch_size, src_len]
        enc_outputs: [batsh_size, src_len, d_model]
        '''
        #print("c1:", dec_inputs.shape)
        dec_outputs = self.tgt_emb(dec_inputs) # [batch_size, tgt_len, d_model]
        #print("c2:", dec_outputs.shape)
        #print("cut1:", enc_outputs.shape)
        #dec_outputs = self.pos_emb(dec_outputs.transpose(0, 1)).transpose(0, 1) # [batch_size, tgt_len, d_model]
        #print("cut2:", enc_outputs.shape)
        dec_self_attn_pad_mask = get_attn_pad_mask(dec_inputs, dec_inputs) # [batch_size, tgt_len, tgt_len]
        dec_self_attn_subsequence_mask = get_attn_subsequence_mask(dec_inputs) # [batch_size, tgt_len, tgt_len]
        dec_self_attn_mask = torch.gt((dec_self_attn_pad_mask + dec_self_attn_subsequence_mask), 0) # [batch_size, tgt_len, tgt_len]

        dec_enc_attn_mask = get_attn_pad_mask(dec_inputs, enc_inputs) # [batc_size, tgt_len, src_len]

        dec_self_attns, dec_enc_attns = [], []
        for layer in self.layers:
            # dec_outputs: [batch_size, tgt_len, d_model], dec_self_attn: [batch_size, n_heads, tgt_len, tgt_len], dec_enc_attn: [batch_size, h_heads, tgt_len, src_len]
            dec_outputs, dec_self_attn, dec_enc_attn = layer(dec_outputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask)
            dec_self_attns.append(dec_self_attn)
            dec_enc_attns.append(dec_enc_attn)
        return dec_outputs, dec_self_attns, dec_enc_attns
    
class Transformer(nn.Module):
    def __init__(self):
        super(Transformer, self).__init__()
        self.encoder = Encoder()
        self.decoder = Decoder()
        self.projection = nn.Linear(num_skills * d_model, num_item, bias=False)
    def forward(self, enc_inputs, dec_inputs):
        '''
        enc_inputs: [batch_size, src_len]
        dec_inputs: [batch_size, tgt_len]
        '''
        # tensor to store decoder outputs
        # outputs = torch.zeros(batch_size, tgt_len, num_item).to(self.device)
        
        # enc_outputs: [batch_size, src_len, d_model], enc_self_attns: [n_layers, batch_size, n_heads, src_len, src_len]
        enc_outputs, enc_self_attns = self.encoder(enc_inputs)
        # dec_outpus: [batch_size, tgt_len, d_model], dec_self_attns: [n_layers, batch_size, n_heads, tgt_len, tgt_len], dec_enc_attn: [n_layers, batch_size, tgt_len, src_len]
        dec_outputs, dec_self_attns, dec_enc_attns = self.decoder(dec_inputs, enc_inputs, enc_outputs)
        # dec_outputs: [50, 8, 512]
        #print(dec_outputs.view(dec_outputs.shape[0], -1).shape)
        dec_logits = self.projection(dec_outputs.view(dec_outputs.shape[0], -1)) # dec_logits: [batch_size, num_item]
        return dec_logits.view(-1, dec_logits.size(-1)), enc_self_attns, dec_self_attns, dec_enc_attns   
    


In [25]:
enc_inputs = torch.LongTensor(train_scores)
dec_inputs = torch.LongTensor(est_skills)
dec_outputs = torch.FloatTensor(train_scores)

In [26]:
class MyDataSet(Data.Dataset):
    def __init__(self, enc_inputs, dec_inputs, dec_outputs):
        super(MyDataSet, self).__init__()
        self.enc_inputs = enc_inputs
        self.dec_inputs = dec_inputs
        self.dec_outputs = dec_outputs

    def __len__(self):
        return self.enc_inputs.shape[0]

    def __getitem__(self, idx):
        return self.enc_inputs[idx], self.dec_inputs[idx], self.dec_outputs[idx]

loader = Data.DataLoader(MyDataSet(enc_inputs, dec_inputs, dec_outputs), batch_size, False)
model = Transformer()
loss_func = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=1e-3)

In [27]:
loss_his = []
for epoch in range(10):
    
    for enc_inputs_batch, dec_inputs_batch, dec_outputs_batch in loader:
        
        outputs_batch, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs_batch, dec_inputs_batch)

        #print(outputs.shape, dec_outputs.shape) # outputs[400, 20]: 400 = 50(batch_size) * 8(tgt_len), 20(tgt_vocab_size) # dec_outputs[50, 8]: [batch_size, tgt_len]
        #break

        loss = loss_func(outputs_batch, dec_outputs_batch)
        loss_his.append(loss.data.item())
        #print(loss.dtype)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print('Epoch:', '%04d' % (epoch + 1), 'loss =', '{:.6f}'.format(loss))

Epoch: 0001 loss = 0.126008
Epoch: 0002 loss = 0.096073
Epoch: 0003 loss = 0.084547
Epoch: 0004 loss = 0.079194
Epoch: 0005 loss = 0.076297
Epoch: 0006 loss = 0.074429
Epoch: 0007 loss = 0.073017
Epoch: 0008 loss = 0.071845
Epoch: 0009 loss = 0.070793
Epoch: 0010 loss = 0.069833


In [28]:
outputs, enc_self_attns, dec_self_attns, dec_enc_attns = model(enc_inputs, dec_inputs)
predict = outputs.detach().numpy()
predict[predict > 0.5] = 1
predict[predict <= 0.5] = 0
predict = predict.astype(np.int64)
pred = predict.reshape(-1)
true = train_scores.reshape(-1)

In [29]:
accuracy, precision, recall, f1, fpr, tpr, thresholds, roc_auc = get_scores(pred, true)
print("Accuracy: {:4.6f} | Precision: {:4.6f} | Recall: {:4.6f} | F1: {:4.6f} | AUC: {:4.6f}".format(accuracy, precision, recall, f1, roc_auc))

Accuracy: 0.893190 | Precision: 0.884318 | Recall: 0.920322 | F1: 0.901961 | AUC: 0.891219
