In [1]:
import riiideducation
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score
env = riiideducation.make_env()

In [2]:
data_path = '/kaggle/input/riiid-test-answer-prediction/'

# ARGS参数

In [3]:
class Args():
    name = 'train'
    model = 'SAKT'
    num_layers = 1
    hidden_dim=100
    input_dim = 100
    dropout = 0.2
    num_head = 5
    
    random_seed = 1
    num_epochs = 1
    lr = 0.01
    seq_size = 20
    warm_up_step_count = 4000
    eval_steps = 50000
    train_steps = 50000
    train_batch=2048
    num_workers=1
    device='cpu'
    
    
ARGS = Args()

In [4]:
PAD_INDEX = 0

QUESTION_NUM = {
    'riii':13523
}

In [5]:
import torch
from torch.utils.data import Dataset
import os
import gc
import time
from tqdm import tqdm

import torch.nn as nn
import torch.nn.functional as F
import copy
import math
import random
from itertools import repeat, chain, islice
from torch.utils import data

In [6]:
class UserSepDataSet_Rii_Train(Dataset):

    def __init__(self,user_dict):

        self.user_dict = user_dict
#         self.question_df = question_df

#         self.combine = self.get_combine()
        self.question_ids, self.target_ids, self.labels = self.get_data_for_train()


    def get_data_for_train(self):
        all_question_ids = []
        all_labels= []
        all_target_qids = []

        for k,v in self.user_dict.items():
            user_id = k
            if len(v) != 2:
                print(v)
                continue
            
            question_ids = v[0]
            answers = v[1]

            assert len(question_ids) == len(answers)


            for target_index in range(0, len(question_ids)):
                qids = question_ids[:target_index + 1]
                ans_flags = answers[:target_index + 1]

                length = len(qids)
                if length > ARGS.seq_size + 1:
                    qids = qids[-(ARGS.seq_size + 1):]
                    ans_flags = ans_flags[-(ARGS.seq_size + 1):]
                    pad_counts = 0
                else:
                    pad_counts = ARGS.seq_size + 1 - length

                input_list = []
                for idx in range(len(qids)):
                    # print(f"idx is:{idx}, qids[idx] is:{qids[idx]}")
                    tag_id = qids[idx]
                    is_correct = ans_flags[idx]

                    if idx == len(qids) - 1:
                        last_is_correct = is_correct
                        target_id = tag_id
                    else:
                        if is_correct:
                            input_list.append(tag_id)
                        else:
                            input_list.append(tag_id + QUESTION_NUM['riii'])

                paddings = [PAD_INDEX]*pad_counts
                input_list = paddings + input_list
                assert len(input_list) == ARGS.seq_size

                all_question_ids.append(input_list)
                all_target_qids.append([target_id])
                all_labels.append([last_is_correct])

        return torch.LongTensor(all_question_ids), \
            torch.LongTensor(all_target_qids),\
            torch.LongTensor(all_labels)


    def __len__(self):
        return len(self.question_ids)

    def __getitem__(self, index):
        return self.question_ids[index], self.target_ids[index], self.labels[index]



In [7]:
class UserSepDataSet_Rii_Test(Dataset):

    def __init__(self, test_df, user_dict, is_test=True):
        self.test_df = test_df
        
        self.user_dict = user_dict
        self.is_test = is_test
        if self.is_test:
            self.question_ids, self.target_ids = self.get_data_for_test()
        else:
            self.question_ids, self.target_ids, self.labels = self.get_data_for_valid()


    def get_data_for_test(self):
        all_question_ids = []
        all_target_ids = []

        for row in self.test_df.itertuples():
            user_id = getattr(row, 'user_id')
            q_id = getattr(row, 'content_id')
           
            # ans = getattr(row, 'answered_correctly')
            # last_is_correct = ans
            target_id = int(float(q_id))

#             train_user = self.train_combine.loc[self.train_combine['user_id']==user_id]
            if user_id in self.user_dict.keys():
#                 timestamps = train_user['timestamp'].item().strip().split(',')
#                 timestamps = [int(float(e.strip())) for e in timestamps]

                qids = self.user_dict[user_id][0]
#                 qids = [int(float(e.strip())) for e in q_ids]

#                 answers = train_user['answered_correctly'].item().strip().split(',')
                ans_flags = self.user_dict[user_id][1]

                assert len(qids) == len(ans_flags)
               

                length = len(qids)
                if length > ARGS.seq_size:
                    qids = qids[-ARGS.seq_size:]
                    ans_flags =  ans_flags[-ARGS.seq_size:]
                    pad_counts = 0
                else:
                    pad_counts = ARGS.seq_size - length

                input_list = []
                for idx in range(len(qids)):
                    tag_id = qids[idx]
                    is_correct = ans_flags[idx]

                    if is_correct:
                        input_list.append(tag_id)
                    else:
                        input_list.append(tag_id + QUESTION_NUM['riii'])

                paddings = [PAD_INDEX]*pad_counts
                input_list = paddings + input_list
                assert len(input_list) == ARGS.seq_size

            else:
                input_list = [PAD_INDEX] * ARGS.seq_size

            all_question_ids.append(input_list)
            all_target_ids.append([target_id])

        return torch.LongTensor(all_question_ids), \
            torch.LongTensor(all_target_ids),



    def get_data_for_valid(self):
        all_question_ids = []
        all_labels = []
        all_target_ids = []

        for row in self.test_df.itertuples():
            user_id = getattr(row, 'user_id')
            q_id = getattr(row, 'content_id')
#             timestamp = getattr(row, 'timestamp')
            ans = getattr(row, 'answered_correctly')
            last_is_correct = ans
            target_id = int(float(q_id))
            if target_id > 13523:
                print("user_id:{}, q_id:{}".format(user_id, q_id))

#             train_user = self.train_combine.loc[self.train_combine['user_id']==user_id]
            if user_id in self.user_dict.keys():
                # print("Bingo")
#                 timestamps = train_user['timestamp'].item().strip().split(',')
#                 timestamps = [int(float(e.strip())) for e in timestamps]

#                 q_ids = train_user['question_id'].item().strip().split(',')
                qids = self.user_dict[user_id][0]

#                 answers = train_user['answered_correctly'].item().strip().split(',')
                ans_flags = self.user_dict[user_id][1]
    
                assert len(qids)  == len(ans_flags)


                length = len(qids)
                if length > ARGS.seq_size:
                    qids = qids[-ARGS.seq_size:]
                    ans_flags =  ans_flags[-ARGS.seq_size:]
                    pad_counts = 0
                else:
                    pad_counts = ARGS.seq_size - length

                input_list = []
                for idx in range(len(qids)):
                    tag_id = qids[idx]
                    is_correct = ans_flags[idx]

                    if is_correct:
                        input_list.append(tag_id)
                    else:
                        input_list.append(tag_id + QUESTION_NUM['riii'])

                paddings = [PAD_INDEX]*pad_counts
                input_list = paddings + input_list
                assert len(input_list) == ARGS.seq_size

            else:
                input_list = [PAD_INDEX] * ARGS.seq_size

            all_question_ids.append(input_list)
            all_target_ids.append([target_id])
            all_labels.append([last_is_correct])

        return torch.LongTensor(all_question_ids), \
            torch.LongTensor(all_target_ids), \
            torch.LongTensor(all_labels)

    def __len__(self):
        return len(self.question_ids)

    def __getitem__(self, index):
        if self.is_test:
            return self.question_ids[index], self.target_ids[index]
        else:
            return self.question_ids[index], self.target_ids[index], self.labels[index]


In [8]:
train_csv = '/kaggle/input/riiid-test-answer-prediction/train.csv'
que_csv = '/kaggle/input/riiid-test-answer-prediction/questions.csv'

In [9]:
train_df = pd.read_csv(train_csv, usecols = [1,2,3,4,7],
                   dtype={'timestamp':'int64',
                         'used_id':'int16',
                         'content_id':'int16',
                         'content_type_id':'int8',
                         'answered_correctly':'int8'})

train_df['content_id'] = train_df['content_id'] + 1
train_df = train_df[train_df.content_type_id == False]
train_df = train_df.sort_values(['timestamp'], ascending=True).reset_index(drop=True)

In [10]:
train_df = train_df.sample(frac=1.0)
train = train_df.iloc[-5000:-2500]
valid = train_df.iloc[-2500:]

del(train_df)
gc.collect()

# train = train.sort_values(['timestamp'], ascending=True).reset_index(drop=True)
# valid = valid.sort_values(['timestamp'], ascending=True).reset_index(drop=True)

20

In [11]:

question_df = pd.read_csv(que_csv)
question_df['question_id'] = question_df['question_id'] + 1

In [12]:
train.head(10)

Unnamed: 0,timestamp,user_id,content_id,content_type_id,answered_correctly
31701388,943638867,1065517323,4534,0,1
15761351,159124138,290633882,10497,0,0
26202860,598693305,360387024,7954,0,0
54151021,3336806199,603646233,6263,0,1
48249668,2493718857,461944680,835,0,1
82202008,15214906155,1675115071,6125,0,1
53558770,3234591008,1752599884,6269,0,1
26943552,627850457,1599559259,8979,0,1
86903000,19610475452,271136436,2102,0,0
23357514,440371408,2071214979,5461,0,1


In [13]:
train_user = pd.merge(train, question_df,left_on="content_id", right_on='question_id', how='left')
user_dict = train_user.groupby('user_id')[['question_id','answered_correctly']].apply(
    lambda g:g.values.tolist()).to_dict()
del(train)
del(question_df)
gc.collect()

0

In [14]:
# user_dict.keys()

In [15]:
len(user_dict)

2432

In [16]:

user_dict2 = dict()
for k, v in user_dict.items():
    ques = []
    ans = []
    
    for e in v:
        ques.append(e[0])
        ans.append(e[1])
    user_dict2[k] = [ques, ans]

In [17]:
train_data = UserSepDataSet_Rii_Train(user_dict2)


In [18]:
del(user_dict)
gc.collect()

20

In [19]:
val_data = UserSepDataSet_Rii_Test(valid, user_dict2, is_test=False)

In [20]:
len(train_data)

2500

In [21]:
len(val_data)

2500

# SAKT模型

In [22]:
def get_pad_mask(seq, pad_idx):
    return (seq != pad_idx).unsqueeze(-2)


def get_subsequent_mask(seq):
    ''' For masking out the subsequent info. '''
    sz_b, len_s = seq.size()
    subsequent_mask = (1 - torch.triu(torch.ones((1, len_s, len_s), device=seq.device), diagonal=1)).bool()
    return subsequent_mask


def clones(module, N):
    "Produce N identical layers."
    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])

In [23]:
def attention(query, key, value, mask=None, dropout=None):
    "Compute 'Scaled Dot Product Attention'"
    d_k = query.size(-1)
    scores = torch.matmul(query, key.transpose(-2, -1)) \
             / math.sqrt(d_k)
    if mask is not None:
        scores = scores.masked_fill(mask == 0, -1e9)
    p_attn = F.softmax(scores, dim=-1)
    if dropout is not None:
        p_attn = dropout(p_attn)
    return torch.matmul(p_attn, value), p_attn

In [24]:


class MultiHeadedAttention(nn.Module):
    def __init__(self, h, d_model, dropout=0.1):
        "Take in model size and number of heads."
        super(MultiHeadedAttention, self).__init__()
        assert d_model % h == 0
        # We assume d_v always equals d_k
        self.d_k = d_model // h
        self.h = h
        self.linears = clones(nn.Linear(d_model, d_model, bias=False), 4) # Q, K, V, last
        self.attn = None
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, query, key, value, mask=None):
        "Implements Figure 2"
        if mask is not None:
            # Same mask applied to all h heads.
            mask = mask.unsqueeze(1)
        nbatches = query.size(0)

        # 1) Do all the linear projections in batch from d_model => h x d_k
        query, key, value = \
            [l(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2)
             for l, x in zip(self.linears, (query, key, value))]

        # 2) Apply attention on all the projected vectors in batch.
        x, self.attn = attention(query, key, value, mask=mask,
                                 dropout=self.dropout)

        # 3) "Concat" using a view and apply a final linear.
        x = x.transpose(1, 2).contiguous() \
            .view(nbatches, -1, self.h * self.d_k)
        return self.linears[-1](x)


class PositionwiseFeedForward(nn.Module):
    "Implements FFN equation."
    def __init__(self, d_model, d_ff, dropout=0.1):
        super(PositionwiseFeedForward, self).__init__()
        self.w_1 = nn.Linear(d_model, d_ff)
        self.w_2 = nn.Linear(d_ff, d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        return self.w_2(self.dropout(F.relu(self.w_1(x))))


class SAKTLayer(nn.Module):
    """
    Single Encoder block of SAKT
    """
    def __init__(self, hidden_dim, num_head, dropout):
        super().__init__()
        self._self_attn = MultiHeadedAttention(num_head, hidden_dim, dropout)
        self._ffn = PositionwiseFeedForward(hidden_dim, hidden_dim, dropout)
        self._layernorms = clones(nn.LayerNorm(hidden_dim, eps=1e-6), 2)

    def forward(self, query, key, mask=None):
        """
        query: question embeddings
        key: interaction embeddings
        """
        # self-attention block
        output = self._self_attn(query=query, key=key, value=key, mask=mask)
        output = self._layernorms[0](key + output)
        # feed-forward block
        output = self._layernorms[1](output + self._ffn(output))
        return output


class SAKT(nn.Module):
    """
    Transformer-based
    all hidden dimensions (d_k, d_v, ...) are the same as hidden_dim
    """
    def __init__(self, hidden_dim, question_num, num_layers, num_head, dropout):
        super().__init__()
        self._hidden_dim = hidden_dim
        self._question_num = question_num
        # question_num的值是110
        # Blocks
        self._layers = clones(SAKTLayer(hidden_dim, num_head, dropout), num_layers)

        # prediction layer
        self._prediction = nn.Linear(hidden_dim, 1)

        # Embedding layers
        self._positional_embedding = nn.Embedding(ARGS.seq_size+1, hidden_dim, padding_idx=PAD_INDEX)
        self._interaction_embedding = nn.Embedding(2*question_num+1, hidden_dim, padding_idx=PAD_INDEX)
        # 这个就是包含了qid对错信息的矩阵
        self._question_embedding = nn.Embedding(question_num+1, hidden_dim, padding_idx=PAD_INDEX)

    def _transform_interaction_to_question_id(self, interaction):
        """
        get question_id from interaction index
        if interaction index is a number in [0, question_num], then leave it as-is
        if interaction index is bigger than question_num (in [question_num + 1, 2 * question_num]
        then subtract question_num
        interaction: integer tensor of shape (batch_size, sequence_size)
        """
        return interaction - self._question_num * (interaction > self._question_num).long()

    def _get_position_index(self, question_id):
        """
        [0, 0, 0, 4, 12] -> [0, 0, 0, 1, 2]
        """
        batch_size = question_id.shape[0]
        position_indices = []
        for i in range(batch_size):
            non_padding_num = (question_id[i] != PAD_INDEX).sum(-1).item()
            position_index = [0] * (ARGS.seq_size - non_padding_num) + list(range(1, non_padding_num+1))
            position_indices.append(position_index)
        return torch.tensor(position_indices, dtype=int).to(ARGS.device)

    def forward(self, interaction_id, target_id):
        """
        Query: Question (skill, exercise, ...) embedding
        Key, Value: Interaction embedding + positional embedding
        """
        question_id = self._transform_interaction_to_question_id(interaction_id)
        # 这个question_id就是把以前把统一qid由于对错不同对应不同id，转换成同一qid
        # 也就是把qid数值大于q_num的，减去q_num
        question_id = torch.cat([question_id[:, 1:], target_id], dim=-1)
        # question_id的原来的维度是(batch, seq_len)，
        # 然后那两个:，第一个表示在第一个维度全选
        # 第二个1:，表示第二个维度从第一个元素选起，这主要是为了和target_id合并在一起

        # 这时question_id和interaction_id存在着一个错位的问题，
        # 也就是question_id包含了当前target_id,而interaction_id中不包含

        interaction_vector = self._interaction_embedding(interaction_id)
        question_vector = self._question_embedding(question_id)

        position_index = self._get_position_index(question_id)
        # 对于question_id获取position的下标
        position_vector = self._positional_embedding(position_index)

        mask = get_pad_mask(question_id, PAD_INDEX) & get_subsequent_mask(question_id)
        x = interaction_vector + position_vector
        # 这个position_vector只加给了interaction_vector向量
        # x的维度是(batch, seq_len, hidden)
        for layer in self._layers:
            x = layer(query=question_vector, key=x, mask=mask)

        output = self._prediction(x)
        # 这里的output为什么会是一个三维的向量呢
        output = output[:, -1, :]
        # output的最初维度是(batch, seq_len, 1)
        # 然后用[:,-1,:]只取seq_len的最后一个值
        return output


In [25]:

ARGS.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

torch.manual_seed(ARGS.random_seed)
torch.cuda.manual_seed(ARGS.random_seed)
torch.cuda.manual_seed_all(ARGS.random_seed)
random.seed(ARGS.random_seed)


In [26]:
 model = SAKT(ARGS.hidden_dim, QUESTION_NUM['riii'], ARGS.num_layers,
                     ARGS.num_head, ARGS.dropout).to(ARGS.device)

# Trainer

In [27]:

class ScheduledOptim():
    '''A simple wrapper class for learning rate scheduling'''
    def __init__(self, optimizer, d_model, n_warmup_steps):
        self._optimizer = optimizer
        self.n_warmup_steps = n_warmup_steps
        self.n_current_steps = 0
        self.init_lr = np.power(d_model, -0.5)

    def step_and_update_lr(self):
        "Step with the inner optimizer"
        self._update_learning_rate()
        self._optimizer.step()

    def zero_grad(self):
        "Zero out the gradients by the inner optimizer"
        self._optimizer.zero_grad()

    def _get_lr_scale(self):
        return np.min([
            np.power(self.n_current_steps, -0.5),
            np.power(self.n_warmup_steps, -1.5) * self.n_current_steps
        ])

    def _update_learning_rate(self):
        ''' Learning rate scheduling per step '''

        self.n_current_steps += 1
        lr = self.init_lr * self._get_lr_scale()

        for param_group in self._optimizer.param_groups:
            param_group['lr'] = lr


class NoamOpt:
    "Optim wrapper that implements rate."

    def __init__(self, model_size, factor, warmup, optimizer):
        self.optimizer = optimizer
        self._step = 0
        self.warmup = warmup
        self.factor = factor
        self.model_size = model_size
        self._rate = 0

    def zero_grad(self):
        self.optimizer.zero_grad()

    def step(self):
        "Update parameters and rate"
        self._step += 1
        rate = self.rate()
        for p in self.optimizer.param_groups:
            p['lr'] = rate
        self._rate = rate
        self.optimizer.step()

    def rate(self, step=None):
        "Implement `lrate` above"
        if step is None:
            step = self._step
        return self.factor * \
               (self.model_size ** (-0.5) *
                min(step ** (-0.5), step * self.warmup ** (-1.5)))


In [28]:
class NoamOptimizer:
    def __init__(self, model, lr, model_size, warmup):
        self._adam = torch.optim.Adam(model.parameters(), lr=lr)
        self._opt = NoamOpt(
            model_size=model_size, factor=1, warmup=warmup, optimizer=self._adam)

    def step(self, loss):
        self._opt.zero_grad()
        loss.backward()
        self._opt.step()


class Trainer:
    def __init__(self, model, device, warm_up_step_count,
                 d_model, num_epochs, weight_path, lr,
                 train_data, val_data, test_data=None):
        self._device = device
        self._num_epochs = num_epochs
        self._weight_path = weight_path

        self._model = model
        self._loss_fn = nn.BCEWithLogitsLoss(reduction='none')
        self._model.to(device)

        self._train_data = train_data
        self._val_data = val_data
        self._test_data = test_data

        self._opt = NoamOptimizer(model=model, lr=lr, model_size=d_model, warmup=warm_up_step_count)

        self.step = 0
        self._threshold = 0.5
        self.max_step = 0
        self.max_acc = 0.0
        self.max_auc = 0.0

        self.test_acc = 0.0
        self.test_auc = 0.0

    # train model and choose weight with max auc on validation dataset
    def train(self):
        train_gen = self._train_data
        val_gen = self._val_data

        # will train self._num_epochs copies of train data
        to_train = chain.from_iterable(repeat(train_gen, self._num_epochs))
        # consisting of total_steps batches
        total_steps = len(train_gen) * self._num_epochs
        print("total_steps:",total_steps)

        self.step = 0
        while self.step < total_steps:
            rem_steps = total_steps - self.step
            num_steps = min(rem_steps, ARGS.train_steps)
            self.step += num_steps

            # take num_steps batches from to_train stream
            train_batches = islice(to_train, num_steps)
            # print(f'Step: {self.step}')
#            print(f"Current Training step is: {self.step}")
            self._train(train_batches, num_steps)
            if self.step % ARGS.eval_steps == 0:
                cur_weight = self._model.state_dict()
#                 torch.save(cur_weight, f'{self._weight_path}{self.step}.pt')
                self._test('Validation', val_gen)
            # print(f'Current best weight: {self.max_step}.pt, best auc: {self.max_auc:.4f}')
            # remove all weight file except {self.max_step}.pt
                print(f"Validation-- Best validaction acc is: {self.max_acc:.4f},"
                  f"Best auc is:{self.max_auc:.4f}.\n")
#             weight_list = os.listdir(self._weight_path)
            # for w in weight_list:
            #     if int(w[:-3]) != self.max_step:
            #         os.unlink(f'{self._weight_path}{w}')
        self._test('Validation', val_gen)
    # get test results
    def test(self, weight_num):
        test_gen = data.DataLoader(
            dataset=self._test_data, shuffle=False,
            batch_size=ARGS.test_batch, num_workers=ARGS.num_workers)

        # load best weight
        if self.max_step != 0:
            weight_num = self.max_step
        weight_path = f'{ARGS.weight_path}{weight_num}.pt'
        print(f'best weight: {weight_path}')
        self._model.load_state_dict(torch.load(weight_path))
        self._test('Test', test_gen)

    def _forward(self, batch):
#        batch = {k: t.to(self._device) for k, t in batch.items()}
#        label = batch['label']  # shape: (batch_size, 1)

#        output = self._model(batch['input'], batch['target_id'])

        batch = tuple(t.to(self._device) for t in batch)
        question_id, target_id, label = batch

        output = self._model(question_id, target_id)
        pred = (torch.sigmoid(output) >= self._threshold).long()  # shape: (batch_size, 1)
        # 感觉这里的sigmoid加在output之前是不是会更好

        return label, output, pred

    def _get_loss(self, label, output):
        # 这里的label我可以理解为是一个[0,1]序列，但是output却是上面_forward的output，不是一个（0，1）范围的值
        # 是因为loss_fn里面有sigmoid
        loss = self._loss_fn(output, label.float())
        return loss.mean()

    # takes iterator
    def _train(self, batch_iter, num_batches):
        start_time = time.time()
        self._model.train()

        losses = []
        num_corrects = 0
        num_total = 0
        labels = []
        outs = []

        for batch in tqdm(batch_iter, total=num_batches):
            label, out, pred = self._forward(batch)
            train_loss = self._get_loss(label, out)
            losses.append(train_loss.item())

            self._opt.step(train_loss)

            num_corrects += (pred == label).sum().item()
            num_total += len(label)

            labels.extend(label.squeeze(-1).data.cpu().numpy())
            outs.extend(out.squeeze(-1).data.cpu().numpy())

        acc = num_corrects / num_total
        auc = roc_auc_score(labels, outs)
        loss = np.mean(losses)
        training_time = time.time() - start_time

        if self.step % ARGS.train_steps:
            print(f"Current Training step is: {self.step}")
            print(f'Training correct predict num is: {num_corrects}, total num is: {num_total}')
            print(f'[Training]     time: {training_time:.2f}, loss: {loss:.4f}, acc: {acc:.4f}, auc: {auc:.4f}')

    # takes iterable
    def _test(self, name, batches):
        start_time = time.time()
        self._model.eval()

        losses = []
        num_corrects = 0
        num_total = 0
        labels = []
        outs = []

        with torch.no_grad():
            for batch in tqdm(batches):
                label, out, pred = self._forward(batch)
                test_loss = self._get_loss(label, out)
                losses.append(test_loss.item())

                num_corrects += (pred == label).sum().item()
                num_total += len(label)

                labels.extend(label.squeeze(-1).data.cpu().numpy())
                outs.extend(out.squeeze(-1).data.cpu().numpy())

        acc = num_corrects / num_total
        auc = roc_auc_score(labels, outs)
        loss = np.mean(losses)
        # training_time = time.time() - start_time

        print(f'correct predict num is: {num_corrects}, total num is: {num_total}')
        print(f' loss is: {loss:.4f}, acc: {acc:.4f}, auc: {auc:.4f}')

        if name == 'Validation':
            if self.max_auc < auc:
                self.max_auc = auc
                self.max_acc = acc
                self.max_step = self.step
                torch.save(self._model.state_dict(), self._weight_path)

        elif name == 'Test':
            self.test_acc = acc
            self.test_auc = auc


       


In [29]:
from torch.utils import data
train_dataloader = data.DataLoader(
            dataset=train_data, shuffle=True,
            batch_size=ARGS.train_batch, num_workers=ARGS.num_workers)
valid_dataloader = data.DataLoader(
            dataset=val_data, shuffle=False,
            batch_size=ARGS.train_batch, num_workers=ARGS.num_workers)

In [30]:
del(val_data)

In [31]:

del(train_data)
gc.collect()


60

In [32]:
trainer = Trainer(model,ARGS.device, ARGS.warm_up_step_count,
                 ARGS.hidden_dim, ARGS.num_epochs, 'rid_model.pt',
                 ARGS.lr, train_dataloader, valid_dataloader, None)

In [33]:
trainer.train()

  0%|          | 0/2 [00:00<?, ?it/s]

total_steps: 2


100%|██████████| 2/2 [00:01<00:00,  1.19it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

Current Training step is: 2
Training correct predict num is: 936, total num is: 2500
[Training]     time: 1.70, loss: 0.7224, acc: 0.3744, auc: 0.5219


100%|██████████| 2/2 [00:00<00:00,  2.96it/s]


correct predict num is: 916, total num is: 2500
 loss is: 0.7179, acc: 0.3664, auc: 0.5139


In [34]:
del(train_dataloader)
del(valid_dataloader)
gc.collect()

20

# 模型解码

In [35]:
 model = SAKT(ARGS.hidden_dim, QUESTION_NUM['riii'], ARGS.num_layers,
                     ARGS.num_head, ARGS.dropout).to(ARGS.device)

In [36]:
model.load_state_dict(torch.load('rid_model.pt'))
model.eval()

SAKT(
  (_layers): ModuleList(
    (0): SAKTLayer(
      (_self_attn): MultiHeadedAttention(
        (linears): ModuleList(
          (0): Linear(in_features=100, out_features=100, bias=False)
          (1): Linear(in_features=100, out_features=100, bias=False)
          (2): Linear(in_features=100, out_features=100, bias=False)
          (3): Linear(in_features=100, out_features=100, bias=False)
        )
        (dropout): Dropout(p=0.2, inplace=False)
      )
      (_ffn): PositionwiseFeedForward(
        (w_1): Linear(in_features=100, out_features=100, bias=True)
        (w_2): Linear(in_features=100, out_features=100, bias=True)
        (dropout): Dropout(p=0.2, inplace=False)
      )
      (_layernorms): ModuleList(
        (0): LayerNorm((100,), eps=1e-06, elementwise_affine=True)
        (1): LayerNorm((100,), eps=1e-06, elementwise_affine=True)
      )
    )
  )
  (_prediction): Linear(in_features=100, out_features=1, bias=True)
  (_positional_embedding): Embedding(21, 100, pa

In [37]:
iter_test = env.iter_test()


In [38]:
for(test_df, sample_prediction_df) in iter_test:
    test_df['content_id'] = test_df['content_id'] + 1
#     test_df = pd.merge(test_df, question_df, left_on='content_id', 
#                        right_on='question_id', how='left')
    test_df = test_df.loc[:,['content_id','content_type_id',
                             'row_id','user_id','timestamp']]
#     test_df.drop(['content_id'], axis=1)
#     test_df['answered_correctly_user'].fillna(0.5, inplace=True)
#     test_df['answered_correctly_content'].fillna(0.5, inplace=True)
#     test_df.drop(['part','prior_question_elapsed_time','prior_question_had_explanation'],axis=1)
#     test_df['part'].fillna(4, inplace=True)
#     test_df['prior_question_elapsed_time'].fillna(0.0, inplace = True)
#     test_df['prior_question_had_explanation'].fillna(False, inplace=True)
#     test_df.fillna(0.0,inplace=True)
#     question_df.fillna(0.0,inplace=True)
#     train_data.combine.fillna(0.0,inplace=True)
    test_df = test_df.loc[test_df['content_type_id'] == 0].reset_index(drop=True)
    
    test_dataset = UserSepDataSet_Rii_Test(test_df, user_dict2, is_test=True)
    test_dataloader = data.DataLoader(
            dataset=test_dataset, shuffle=False,
            batch_size=ARGS.train_batch, num_workers=ARGS.num_workers)
    model =  SAKT(ARGS.hidden_dim, QUESTION_NUM['riii'], ARGS.num_layers,
                     ARGS.num_head, ARGS.dropout).to(ARGS.device)
    model.load_state_dict(torch.load('rid_model.pt'))
    model.eval()
    preds = []
    with torch.no_grad():
        for batch in tqdm(test_dataloader):
            batch = tuple(t.to(ARGS.device) for t in batch)
            question_id, target_id = batch
            output = model(question_id, target_id)
            pred = (torch.sigmoid(output) >= 0.5).long()
            pred = pred.view(-1)
            preds.extend(pred)
        preds = [int(e) for e in preds]
        test_df['answered_correctly'] =  preds
        env.predict(test_df.loc[test_df['content_type_id'] == 0, ['row_id', 'answered_correctly']])
            

100%|██████████| 1/1 [00:00<00:00, 13.01it/s]
100%|██████████| 1/1 [00:00<00:00, 12.42it/s]
100%|██████████| 1/1 [00:00<00:00, 12.85it/s]
100%|██████████| 1/1 [00:00<00:00, 10.93it/s]


In [39]:
sub = pd.read_csv("./submission.csv")
sub.shape

(104, 2)