In [None]:
import gc
import os
import random
from tqdm import tqdm
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import re
import pickle

import seaborn as sns
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.utils.rnn as rnn_utils
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
MAX_SEQ = 100

In [None]:
%%time
df = pd.read_csv('/content/drive/MyDrive/.csv')\
            .dropna(subset=["memb_no", "chg_dt","chapter_nm"]).sort_values(by=["chg_dt"])



CPU times: user 48.8 s, sys: 5.59 s, total: 54.4 s
Wall time: 1min 8s


## Preprocess

In [None]:
#df = train_df.copy()
df["memb_no"] = df["memb_no"].astype("str")
df["unit"] = df["chapter_nm4"].astype("str")
df["correct"] = df["ox_yn"].apply(lambda x: 1 if x == "O" else 0)



In [None]:

df["unit"] = df["unit"].apply(lambda x: x.split(".")[-1].strip()).apply(lambda x: re.sub('\(\s*\d+\s*\)', '', x).strip())
df.unit.nunique()

922

In [None]:
u_list = np.unique(df["memb_no"].values)
q_list = df.sort_values(by=["subject_cd", "chapter_cd"])['unit'].unique()

u2idx = {u: idx for idx, u in enumerate(u_list)}
q2idx = {q: idx for idx, q in enumerate(q_list)}

In [None]:
df['content_id'] = df['unit'].replace(q2idx)

In [None]:
group = df[['memb_no', 'content_id', 'correct']].groupby('memb_no').apply(lambda r: (
            r['content_id'].values,
            r['correct'].values))

In [None]:
SEED = 0
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(SEED)

In [None]:
class SAKTDataset(Dataset):
    def __init__(self, group, n_skill, max_seq=100):
        super(SAKTDataset, self).__init__()
        self.max_seq = max_seq
        self.n_skill = n_skill
        self.samples = group

#         self.user_ids = [x for x in group.index]
        self.user_ids = []
        for user_id in group.index:
            q, qa = group[user_id]
            if len(q) < 10:
                continue
            self.user_ids.append(user_id)

    def __len__(self):
        return len(self.user_ids)

    def __getitem__(self, index):
        user_id = self.user_ids[index]
        q_, qa_ = self.samples[user_id]
        seq_len = len(q_)

        q = np.zeros(self.max_seq, dtype=int)
        qa = np.zeros(self.max_seq, dtype=int)
        if seq_len >= self.max_seq:
            q[:] = q_[-self.max_seq:]
            qa[:] = qa_[-self.max_seq:]
        else:
            q[-seq_len:] = q_
            qa[-seq_len:] = qa_

        target_id = q[1:]
        label = qa[1:]

        x = np.zeros(self.max_seq-1, dtype=int)
        x = q[:-1].copy()
        x += (qa[:-1] == 1) * self.n_skill

        return x, target_id, label

In [None]:
train, val = train_test_split(group, test_size=0.2)

train_dataset = SAKTDataset(train, n_skill)
train_dataloader = DataLoader(train_dataset, batch_size=2048, shuffle=True, num_workers=8)
del train

val_dataset = SAKTDataset(val, n_skill)
val_dataloader = DataLoader(val_dataset, batch_size=2048, shuffle=True, num_workers=8)
del val



In [None]:
class FFN(nn.Module):
    def __init__(self, state_size=200):
        super(FFN, self).__init__()
        self.state_size = state_size

        self.lr1 = nn.Linear(state_size, state_size)
        self.relu = nn.ReLU()
        self.lr2 = nn.Linear(state_size, state_size)
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        x = self.lr1(x)
        x = self.relu(x)
        x = self.lr2(x)
        return self.dropout(x)

def future_mask(seq_length):
    future_mask = np.triu(np.ones((seq_length, seq_length)), k=1).astype('bool')
    return torch.from_numpy(future_mask)


class SAKTModel(nn.Module):
    def __init__(self, n_skill, max_seq=100, embed_dim=128):
        super(SAKTModel, self).__init__()
        self.n_skill = n_skill
        self.embed_dim = embed_dim

        self.embedding = nn.Embedding(2*n_skill+1, embed_dim)
        self.pos_embedding = nn.Embedding(max_seq-1, embed_dim)
        self.e_embedding = nn.Embedding(n_skill+1, embed_dim)

        self.multi_att = nn.MultiheadAttention(embed_dim=embed_dim, num_heads=8, dropout=0.2)

        self.dropout = nn.Dropout(0.2)
        self.layer_normal = nn.LayerNorm(embed_dim)

        self.ffn = FFN(embed_dim)
        self.pred = nn.Linear(embed_dim, 1)

    def forward(self, x, question_ids):
        device = x.device
        x = self.embedding(x)
        pos_id = torch.arange(x.size(1)).unsqueeze(0).to(device)

        pos_x = self.pos_embedding(pos_id)
        x = x + pos_x

        e = self.e_embedding(question_ids)

        x = x.permute(1, 0, 2) # x: [bs, s_len, embed] => [s_len, bs, embed]
        e = e.permute(1, 0, 2)
        att_mask = future_mask(x.size(0)).to(device)
        att_output, att_weight = self.multi_att(e, x, x, attn_mask=att_mask)
        att_output = self.layer_normal(att_output + e)
        att_output = att_output.permute(1, 0, 2) # att_output: [s_len, bs, embed] => [bs, s_len, embed]

        x = self.ffn(att_output)
        x = self.layer_normal(x + att_output)
        x = self.pred(x)

        return x.squeeze(-1), att_weight


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")


model = SAKTModel(n_skill, embed_dim=128)
# optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.99, weight_decay=0.005)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.BCEWithLogitsLoss()

model.to(device)
criterion.to(device)

BCEWithLogitsLoss()

In [None]:
def train_epoch(model, train_iterator, optim, criterion, device="cpu"):
    model.train()

    train_loss = []
    num_corrects = 0
    num_total = 0
    labels = []
    outs = []

    tbar = tqdm(train_iterator)
    for item in tbar:
        x = item[0].to(device).long()
        target_id = item[1].to(device).long()
        label = item[2].to(device).float()
        target_mask = (target_id != 0)

        optim.zero_grad()
        output, atten_weight = model(x, target_id)

        output = torch.masked_select(output, target_mask)
        label = torch.masked_select(label, target_mask)

        loss = criterion(output, label)
        loss.backward()
        optim.step()
        train_loss.append(loss.item())
        pred = (torch.sigmoid(output) >= 0.5).long()

        num_corrects += (pred == label).sum().item()
        num_total += len(label)

        labels.extend(label.view(-1).data.cpu().numpy())
        outs.extend(output.view(-1).data.cpu().numpy())

        tbar.set_description('loss - {:.4f}'.format(loss))

    acc = num_corrects / num_total
    auc = roc_auc_score(labels, outs)
    loss = np.average(train_loss)

    return loss, acc, auc

In [None]:
def val_epoch(model, val_iterator, criterion, device="cpu"):
    model.eval()

    train_loss = []
    num_corrects = 0
    num_total = 0
    labels = []
    outs = []

    tbar = tqdm(val_iterator)
    for item in tbar:
        x = item[0].to(device).long()
        target_id = item[1].to(device).long()
        label = item[2].to(device).float()
        target_mask = (target_id != 0)

        with torch.no_grad():
            output, atten_weight = model(x, target_id)

        output = torch.masked_select(output, target_mask)
        label = torch.masked_select(label, target_mask)

        loss = criterion(output, label)
        train_loss.append(loss.item())

        pred = (torch.sigmoid(output) >= 0.5).long()

        num_corrects += (pred == label).sum().item()
        num_total += len(label)

        labels.extend(label.view(-1).data.cpu().numpy())
        outs.extend(output.view(-1).data.cpu().numpy())

        tbar.set_description('loss - {:.4f}'.format(loss))

    acc = num_corrects / num_total
    auc = roc_auc_score(labels, outs)
    loss = np.average(train_loss)

    return loss, acc, auc

In [None]:
epochs = 50

over_fit = 0
last_auc = 0
for epoch in range(epochs):
    train_loss, train_acc, train_auc = train_epoch(model, train_dataloader, optimizer, criterion, device)
    print("epoch - {} train_loss - {:.2f} acc - {:.3f} auc - {:.3f}".format(epoch, train_loss, train_acc, train_auc))

    val_loss, avl_acc, val_auc = val_epoch(model, val_dataloader, criterion, device)
    print("epoch - {} val_loss - {:.2f} acc - {:.3f} auc - {:.3f}".format(epoch, val_loss, avl_acc, val_auc))

    if val_auc > last_auc:
        last_auc = val_auc
        over_fit = 0
    else:
        over_fit += 1


    if over_fit >= 2:
        print("early stop epoch ", epoch)
        break

loss - 0.5595: 100%|██████████| 12/12 [00:08<00:00,  1.41it/s]


epoch - 0 train_loss - 0.61 acc - 0.703 auc - 0.504


loss - 0.5506: 100%|██████████| 3/3 [00:01<00:00,  2.43it/s]


epoch - 0 val_loss - 0.56 acc - 0.752 auc - 0.580


loss - 0.5384: 100%|██████████| 12/12 [00:03<00:00,  3.34it/s]


epoch - 1 train_loss - 0.55 acc - 0.752 auc - 0.593


loss - 0.5392: 100%|██████████| 3/3 [00:01<00:00,  2.51it/s]


epoch - 1 val_loss - 0.54 acc - 0.757 auc - 0.614


loss - 0.5600: 100%|██████████| 12/12 [00:03<00:00,  3.12it/s]


epoch - 2 train_loss - 0.54 acc - 0.753 auc - 0.623


loss - 0.5338: 100%|██████████| 3/3 [00:01<00:00,  1.72it/s]


epoch - 2 val_loss - 0.53 acc - 0.757 auc - 0.637


loss - 0.5406: 100%|██████████| 12/12 [00:03<00:00,  3.25it/s]


epoch - 3 train_loss - 0.54 acc - 0.754 auc - 0.642


loss - 0.5310: 100%|██████████| 3/3 [00:01<00:00,  2.52it/s]


epoch - 3 val_loss - 0.53 acc - 0.757 auc - 0.653


loss - 0.5325: 100%|██████████| 12/12 [00:03<00:00,  3.31it/s]


epoch - 4 train_loss - 0.53 acc - 0.755 auc - 0.660


loss - 0.5218: 100%|██████████| 3/3 [00:01<00:00,  1.64it/s]


epoch - 4 val_loss - 0.52 acc - 0.759 auc - 0.669


loss - 0.5281: 100%|██████████| 12/12 [00:04<00:00,  2.91it/s]


epoch - 5 train_loss - 0.52 acc - 0.758 auc - 0.678


loss - 0.5149: 100%|██████████| 3/3 [00:01<00:00,  2.68it/s]


epoch - 5 val_loss - 0.51 acc - 0.764 auc - 0.685


loss - 0.5095: 100%|██████████| 12/12 [00:03<00:00,  3.25it/s]


epoch - 6 train_loss - 0.51 acc - 0.764 auc - 0.696


loss - 0.5069: 100%|██████████| 3/3 [00:01<00:00,  2.03it/s]


epoch - 6 val_loss - 0.51 acc - 0.769 auc - 0.702


loss - 0.4991: 100%|██████████| 12/12 [00:04<00:00,  2.68it/s]


epoch - 7 train_loss - 0.50 acc - 0.769 auc - 0.711


loss - 0.5018: 100%|██████████| 3/3 [00:01<00:00,  2.71it/s]


epoch - 7 val_loss - 0.50 acc - 0.773 auc - 0.714


loss - 0.5126: 100%|██████████| 12/12 [00:03<00:00,  3.29it/s]


epoch - 8 train_loss - 0.50 acc - 0.773 auc - 0.723


loss - 0.4955: 100%|██████████| 3/3 [00:01<00:00,  2.61it/s]


epoch - 8 val_loss - 0.49 acc - 0.776 auc - 0.722


loss - 0.4913: 100%|██████████| 12/12 [00:04<00:00,  2.60it/s]


epoch - 9 train_loss - 0.49 acc - 0.776 auc - 0.731


loss - 0.4841: 100%|██████████| 3/3 [00:01<00:00,  2.48it/s]


epoch - 9 val_loss - 0.49 acc - 0.778 auc - 0.727


loss - 0.4861: 100%|██████████| 12/12 [00:03<00:00,  3.34it/s]


epoch - 10 train_loss - 0.49 acc - 0.778 auc - 0.737


loss - 0.4863: 100%|██████████| 3/3 [00:01<00:00,  2.68it/s]


epoch - 10 val_loss - 0.49 acc - 0.780 auc - 0.732


loss - 0.4715: 100%|██████████| 12/12 [00:03<00:00,  3.17it/s]


epoch - 11 train_loss - 0.48 acc - 0.780 auc - 0.742


loss - 0.4873: 100%|██████████| 3/3 [00:01<00:00,  1.73it/s]


epoch - 11 val_loss - 0.48 acc - 0.781 auc - 0.736


loss - 0.4716: 100%|██████████| 12/12 [00:03<00:00,  3.23it/s]


epoch - 12 train_loss - 0.48 acc - 0.782 auc - 0.746


loss - 0.4806: 100%|██████████| 3/3 [00:01<00:00,  2.56it/s]


epoch - 12 val_loss - 0.48 acc - 0.781 auc - 0.739


loss - 0.4819: 100%|██████████| 12/12 [00:03<00:00,  3.31it/s]


epoch - 13 train_loss - 0.48 acc - 0.783 auc - 0.749


loss - 0.4777: 100%|██████████| 3/3 [00:01<00:00,  1.75it/s]


epoch - 13 val_loss - 0.48 acc - 0.783 auc - 0.741


loss - 0.4897: 100%|██████████| 12/12 [00:04<00:00,  2.88it/s]


epoch - 14 train_loss - 0.48 acc - 0.784 auc - 0.752


loss - 0.4770: 100%|██████████| 3/3 [00:01<00:00,  2.45it/s]


epoch - 14 val_loss - 0.48 acc - 0.783 auc - 0.742


loss - 0.4838: 100%|██████████| 12/12 [00:03<00:00,  3.35it/s]


epoch - 15 train_loss - 0.48 acc - 0.784 auc - 0.754


loss - 0.4824: 100%|██████████| 3/3 [00:01<00:00,  1.90it/s]


epoch - 15 val_loss - 0.48 acc - 0.783 auc - 0.743


loss - 0.4695: 100%|██████████| 12/12 [00:04<00:00,  2.66it/s]


epoch - 16 train_loss - 0.47 acc - 0.785 auc - 0.756


loss - 0.4771: 100%|██████████| 3/3 [00:01<00:00,  2.61it/s]


epoch - 16 val_loss - 0.48 acc - 0.784 auc - 0.745


loss - 0.4691: 100%|██████████| 12/12 [00:03<00:00,  3.28it/s]


epoch - 17 train_loss - 0.47 acc - 0.786 auc - 0.758


loss - 0.4791: 100%|██████████| 3/3 [00:01<00:00,  2.56it/s]


epoch - 17 val_loss - 0.48 acc - 0.784 auc - 0.746


loss - 0.4832: 100%|██████████| 12/12 [00:04<00:00,  2.61it/s]


epoch - 18 train_loss - 0.47 acc - 0.787 auc - 0.760


loss - 0.4766: 100%|██████████| 3/3 [00:01<00:00,  2.33it/s]


epoch - 18 val_loss - 0.48 acc - 0.784 auc - 0.747


loss - 0.4604: 100%|██████████| 12/12 [00:03<00:00,  3.25it/s]


epoch - 19 train_loss - 0.47 acc - 0.787 auc - 0.761


loss - 0.4770: 100%|██████████| 3/3 [00:01<00:00,  2.60it/s]


epoch - 19 val_loss - 0.48 acc - 0.784 auc - 0.747


loss - 0.4777: 100%|██████████| 12/12 [00:04<00:00,  2.99it/s]


epoch - 20 train_loss - 0.47 acc - 0.788 auc - 0.763


loss - 0.4805: 100%|██████████| 3/3 [00:01<00:00,  2.28it/s]


epoch - 20 val_loss - 0.48 acc - 0.784 auc - 0.748


loss - 0.4695: 100%|██████████| 12/12 [00:03<00:00,  3.19it/s]


epoch - 21 train_loss - 0.47 acc - 0.788 auc - 0.764


loss - 0.4826: 100%|██████████| 3/3 [00:01<00:00,  2.46it/s]


epoch - 21 val_loss - 0.48 acc - 0.785 auc - 0.748


loss - 0.4623: 100%|██████████| 12/12 [00:03<00:00,  3.03it/s]


epoch - 22 train_loss - 0.47 acc - 0.789 auc - 0.765


loss - 0.4815: 100%|██████████| 3/3 [00:01<00:00,  1.73it/s]


epoch - 22 val_loss - 0.48 acc - 0.785 auc - 0.749


loss - 0.4712: 100%|██████████| 12/12 [00:03<00:00,  3.37it/s]


epoch - 23 train_loss - 0.47 acc - 0.789 auc - 0.766


loss - 0.4792: 100%|██████████| 3/3 [00:01<00:00,  2.67it/s]


epoch - 23 val_loss - 0.48 acc - 0.785 auc - 0.749


loss - 0.4756: 100%|██████████| 12/12 [00:03<00:00,  3.26it/s]


epoch - 24 train_loss - 0.47 acc - 0.790 auc - 0.767


loss - 0.4729: 100%|██████████| 3/3 [00:01<00:00,  1.67it/s]


epoch - 24 val_loss - 0.48 acc - 0.785 auc - 0.749


loss - 0.4681: 100%|██████████| 12/12 [00:04<00:00,  2.91it/s]


epoch - 25 train_loss - 0.47 acc - 0.790 auc - 0.768


loss - 0.4745: 100%|██████████| 3/3 [00:01<00:00,  2.54it/s]


epoch - 25 val_loss - 0.48 acc - 0.785 auc - 0.750


loss - 0.4575: 100%|██████████| 12/12 [00:03<00:00,  3.35it/s]


epoch - 26 train_loss - 0.46 acc - 0.790 auc - 0.769


loss - 0.4786: 100%|██████████| 3/3 [00:01<00:00,  1.85it/s]


epoch - 26 val_loss - 0.48 acc - 0.785 auc - 0.750


loss - 0.4781: 100%|██████████| 12/12 [00:04<00:00,  2.67it/s]


epoch - 27 train_loss - 0.47 acc - 0.791 auc - 0.770


loss - 0.4727: 100%|██████████| 3/3 [00:01<00:00,  2.49it/s]


epoch - 27 val_loss - 0.48 acc - 0.785 auc - 0.750


loss - 0.4572: 100%|██████████| 12/12 [00:03<00:00,  3.30it/s]


epoch - 28 train_loss - 0.46 acc - 0.791 auc - 0.771


loss - 0.4818: 100%|██████████| 3/3 [00:01<00:00,  2.56it/s]


epoch - 28 val_loss - 0.48 acc - 0.785 auc - 0.750


loss - 0.4607: 100%|██████████| 12/12 [00:04<00:00,  2.62it/s]


epoch - 29 train_loss - 0.46 acc - 0.791 auc - 0.772


loss - 0.4704: 100%|██████████| 3/3 [00:01<00:00,  2.55it/s]


epoch - 29 val_loss - 0.48 acc - 0.785 auc - 0.750


loss - 0.4484: 100%|██████████| 12/12 [00:03<00:00,  3.37it/s]


epoch - 30 train_loss - 0.46 acc - 0.792 auc - 0.773


loss - 0.4790: 100%|██████████| 3/3 [00:01<00:00,  2.66it/s]


epoch - 30 val_loss - 0.48 acc - 0.785 auc - 0.750
early stop epoch  30


In [None]:
class SAKTDatasetR(Dataset):
    def __init__(self, group, n_skill, max_seq=MAX_SEQ): #HDKIM 100
        super(SAKTDatasetR, self).__init__()
        self.max_seq = max_seq
        self.n_skill = n_skill
        self.samples = group

#         self.user_ids = [x for x in group.index]
        self.user_ids = []
        for user_id in group.index:
            q, qa = group[user_id]
            if len(q) < 2: #HDKIM 10
                continue
            self.user_ids.append(user_id)

            #HDKIM Memory reduction
            #if len(q)>self.max_seq:
            #    group[user_id] = (q[-self.max_seq:],qa[-self.max_seq:])

    def __len__(self):
        return len(self.user_ids)

    def __getitem__(self, index):
        user_id = self.user_ids[index]
        q_, qa_ = self.samples[user_id]
        seq_len = len(q_)

        q = np.zeros(self.max_seq, dtype=int)
        qa = np.zeros(self.max_seq, dtype=int)

        if seq_len >= self.max_seq:
            #HDKIM
            if random.random()>0.1:
                start = random.randint(0,(seq_len-self.max_seq))
                end = start + self.max_seq
                q[:] = q_[start:end]
                qa[:] = qa_[start:end]
            else:
                #HDKIMHDKIM
                q[:] = q_[-self.max_seq:]
                qa[:] = qa_[-self.max_seq:]
        else:
            #HDKIM
            if random.random()>0.1:
                #HDKIMHDKIM
                start = 0
                end = random.randint(2,seq_len)
                seq_len = end - start
                q[-seq_len:] = q_[0:seq_len]
                qa[-seq_len:] = qa_[0:seq_len]
            else:
                #HDKIMHDKIM
                q[-seq_len:] = q_
                qa[-seq_len:] = qa_


        target_id = q[1:]
        label = qa[1:]

        x = np.zeros(self.max_seq-1, dtype=int)
        x = q[:-1].copy()
        x += (qa[:-1] == 1) * self.n_skill

        return x, target_id, label

In [None]:
train2, val2 = train_test_split(group, test_size=0.2)

train_dataset2 = SAKTDatasetR(train2, n_skill)
train_dataloader2 = DataLoader(train_dataset2, batch_size=64, shuffle=True, num_workers=8)
del train

val_dataset2 = SAKTDatasetR(val2, n_skill)
val_dataloader2 = DataLoader(val_dataset2, batch_size=64, shuffle=True, num_workers=8)
del val



In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")


model2 = SAKTModel(n_skill, embed_dim=128)
# optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.99, weight_decay=0.005)
optimizer = torch.optim.Adam(model2.parameters(), lr=1e-3)
criterion = nn.BCEWithLogitsLoss()

model2.to(device)
criterion.to(device)

BCEWithLogitsLoss()

In [None]:
epochs = 50

over_fit = 0
last_auc = 0
for epoch in range(epochs):
    train_loss, train_acc, train_auc = train_epoch(model2, train_dataloader2, optimizer, criterion, device)
    print("epoch - {} train_loss - {:.2f} acc - {:.3f} auc - {:.3f}".format(epoch, train_loss, train_acc, train_auc))

    val_loss, avl_acc, val_auc = val_epoch(model2, val_dataloader2, criterion, device)
    print("epoch - {} val_loss - {:.2f} acc - {:.3f} auc - {:.3f}".format(epoch, val_loss, avl_acc, val_auc))

    if val_auc > last_auc:
        last_auc = val_auc
        over_fit = 0
    else:
        over_fit += 1


    if over_fit >= 2:
        print("early stop epoch ", epoch)
        break

loss - 0.5265: 100%|██████████| 364/364 [00:07<00:00, 48.66it/s]


epoch - 0 train_loss - 0.51 acc - 0.772 auc - 0.664


loss - 0.5568: 100%|██████████| 91/91 [00:02<00:00, 33.18it/s]


epoch - 0 val_loss - 0.49 acc - 0.782 auc - 0.716


loss - 0.2780: 100%|██████████| 364/364 [00:07<00:00, 45.90it/s]


epoch - 1 train_loss - 0.48 acc - 0.786 auc - 0.724


loss - 0.4264: 100%|██████████| 91/91 [00:01<00:00, 48.61it/s]


epoch - 1 val_loss - 0.48 acc - 0.783 auc - 0.730


loss - 0.4308: 100%|██████████| 364/364 [00:09<00:00, 39.21it/s]


epoch - 2 train_loss - 0.47 acc - 0.789 auc - 0.737


loss - 0.4195: 100%|██████████| 91/91 [00:01<00:00, 48.86it/s]


epoch - 2 val_loss - 0.48 acc - 0.788 auc - 0.737


loss - 0.4027: 100%|██████████| 364/364 [00:08<00:00, 40.80it/s]


epoch - 3 train_loss - 0.47 acc - 0.792 auc - 0.743


loss - 0.4599: 100%|██████████| 91/91 [00:01<00:00, 48.07it/s]


epoch - 3 val_loss - 0.47 acc - 0.788 auc - 0.741


loss - 0.3928: 100%|██████████| 364/364 [00:08<00:00, 41.13it/s]


epoch - 4 train_loss - 0.47 acc - 0.791 auc - 0.745


loss - 0.4397: 100%|██████████| 91/91 [00:02<00:00, 40.02it/s]


epoch - 4 val_loss - 0.47 acc - 0.791 auc - 0.743


loss - 0.4250: 100%|██████████| 364/364 [00:07<00:00, 47.39it/s]


epoch - 5 train_loss - 0.47 acc - 0.793 auc - 0.747


loss - 0.4537: 100%|██████████| 91/91 [00:02<00:00, 36.95it/s]


epoch - 5 val_loss - 0.47 acc - 0.789 auc - 0.743


loss - 0.4923: 100%|██████████| 364/364 [00:08<00:00, 43.97it/s]


epoch - 6 train_loss - 0.46 acc - 0.794 auc - 0.749


loss - 0.4324: 100%|██████████| 91/91 [00:01<00:00, 48.19it/s]


epoch - 6 val_loss - 0.47 acc - 0.790 auc - 0.745


loss - 0.3499: 100%|██████████| 364/364 [00:09<00:00, 37.66it/s]


epoch - 7 train_loss - 0.46 acc - 0.794 auc - 0.750


loss - 0.5159: 100%|██████████| 91/91 [00:01<00:00, 47.46it/s]


epoch - 7 val_loss - 0.47 acc - 0.791 auc - 0.746


loss - 0.4321: 100%|██████████| 364/364 [00:10<00:00, 35.57it/s]


epoch - 8 train_loss - 0.46 acc - 0.796 auc - 0.752


loss - 0.4060: 100%|██████████| 91/91 [00:01<00:00, 48.65it/s]


epoch - 8 val_loss - 0.47 acc - 0.792 auc - 0.748


loss - 0.4430: 100%|██████████| 364/364 [00:08<00:00, 44.54it/s]


epoch - 9 train_loss - 0.46 acc - 0.796 auc - 0.753


loss - 0.4976: 100%|██████████| 91/91 [00:02<00:00, 40.96it/s]


epoch - 9 val_loss - 0.47 acc - 0.791 auc - 0.746


loss - 0.4863: 100%|██████████| 364/364 [00:07<00:00, 46.30it/s]


epoch - 10 train_loss - 0.46 acc - 0.796 auc - 0.755


loss - 0.4741: 100%|██████████| 91/91 [00:02<00:00, 33.03it/s]


epoch - 10 val_loss - 0.47 acc - 0.793 auc - 0.745
early stop epoch  10


In [None]:
#df = train_df.copy()
df["memb_no"] = df["memb_no"].astype("str")
df["unit"] = df["chapter_nm5"].astype("str")
df["correct"] = df["ox_yn"].apply(lambda x: 1 if x == "O" else 0)

In [None]:
df["unit"] = df["unit"].apply(lambda x: x.split(".")[-1].strip()).apply(lambda x: re.sub('\(\s*\d+\s*\)', '', x).strip())
df.unit.nunique()

9985

In [None]:
u_list = np.unique(df["memb_no"].values)
q_list = df.sort_values(by=["subject_cd", "chapter_cd"])['unit'].unique()

u2idx = {u: idx for idx, u in enumerate(u_list)}
q2idx = {q: idx for idx, q in enumerate(q_list)}

In [None]:
df['content_id'] = df['unit'].replace(q2idx)

In [None]:
group = df[['memb_no', 'content_id', 'correct']].groupby('memb_no').apply(lambda r: (
            r['content_id'].values,
            r['correct'].values))

In [None]:
group

In [None]:
n_skill = len(q2idx)
n_skill

In [None]:
train, val = train_test_split(group, test_size=0.2)

train_dataset = SAKTDataset(train, n_skill)
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=8)
del train

val_dataset = SAKTDataset(val, n_skill)
val_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=True, num_workers=8)
del val



In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")


model3 = SAKTModel(n_skill, embed_dim=128)
# optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.99, weight_decay=0.005)
optimizer = torch.optim.Adam(model3.parameters(), lr=1e-3)
criterion = nn.BCEWithLogitsLoss()

model3.to(device)
criterion.to(device)

BCEWithLogitsLoss()

In [None]:
epochs = 50

over_fit = 0
last_auc = 0
for epoch in range(epochs):
    train_loss, train_acc, train_auc = train_epoch(model3, train_dataloader, optimizer, criterion, device)
    print("epoch - {} train_loss - {:.2f} acc - {:.3f} auc - {:.3f}".format(epoch, train_loss, train_acc, train_auc))

    val_loss, avl_acc, val_auc = val_epoch(model3, val_dataloader, criterion, device)
    print("epoch - {} val_loss - {:.2f} acc - {:.3f} auc - {:.3f}".format(epoch, val_loss, avl_acc, val_auc))

    if val_auc > last_auc:
        last_auc = val_auc
        over_fit = 0
    else:
        over_fit += 1


    if over_fit >= 2:
        print("early stop epoch ", epoch)
        break

loss - 0.4695: 100%|██████████| 361/361 [00:10<00:00, 35.93it/s]


epoch - 0 train_loss - 0.52 acc - 0.761 auc - 0.664


loss - 0.6125: 100%|██████████| 91/91 [00:01<00:00, 66.15it/s] 


epoch - 0 val_loss - 0.49 acc - 0.772 auc - 0.739


loss - 0.4810: 100%|██████████| 361/361 [00:07<00:00, 51.16it/s]


epoch - 1 train_loss - 0.46 acc - 0.788 auc - 0.771


loss - 0.4315: 100%|██████████| 91/91 [00:01<00:00, 64.44it/s]


epoch - 1 val_loss - 0.47 acc - 0.784 auc - 0.776


loss - 0.4642: 100%|██████████| 361/361 [00:06<00:00, 56.39it/s]


epoch - 2 train_loss - 0.44 acc - 0.798 auc - 0.797


loss - 0.3959: 100%|██████████| 91/91 [00:02<00:00, 40.98it/s]


epoch - 2 val_loss - 0.46 acc - 0.788 auc - 0.785


loss - 0.4225: 100%|██████████| 361/361 [00:07<00:00, 47.30it/s]


epoch - 3 train_loss - 0.43 acc - 0.804 auc - 0.809


loss - 0.4333: 100%|██████████| 91/91 [00:01<00:00, 61.57it/s]


epoch - 3 val_loss - 0.46 acc - 0.790 auc - 0.787


loss - 0.4382: 100%|██████████| 361/361 [00:08<00:00, 41.93it/s]


epoch - 4 train_loss - 0.42 acc - 0.808 auc - 0.818


loss - 0.5107: 100%|██████████| 91/91 [00:01<00:00, 62.78it/s]


epoch - 4 val_loss - 0.46 acc - 0.789 auc - 0.787


loss - 0.4193: 100%|██████████| 361/361 [00:06<00:00, 53.41it/s]


epoch - 5 train_loss - 0.42 acc - 0.812 auc - 0.827


loss - 0.4781: 100%|██████████| 91/91 [00:01<00:00, 47.89it/s]


epoch - 5 val_loss - 0.46 acc - 0.789 auc - 0.786


loss - 0.4138: 100%|██████████| 361/361 [00:06<00:00, 56.68it/s]


epoch - 6 train_loss - 0.41 acc - 0.816 auc - 0.834


loss - 0.5549: 100%|██████████| 91/91 [00:01<00:00, 64.79it/s]


epoch - 6 val_loss - 0.47 acc - 0.786 auc - 0.784
early stop epoch  6


In [None]:
train5, val5 = train_test_split(group, test_size=0.2)

train_dataset5 = SAKTDatasetR(train5, n_skill)
train_dataloader5 = DataLoader(train_dataset5, batch_size=64, shuffle=True, num_workers=8)
del train5

val_dataset5 = SAKTDatasetR(val5, n_skill)
val_dataloader5 = DataLoader(val_dataset5, batch_size=64, shuffle=True, num_workers=8)
del val5



In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")


model5 = SAKTModel(n_skill, embed_dim=128)
# optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.99, weight_decay=0.005)
optimizer = torch.optim.Adam(model5.parameters(), lr=1e-3)
criterion = nn.BCEWithLogitsLoss()

model5.to(device)
criterion.to(device)

BCEWithLogitsLoss()

In [None]:
epochs = 50

over_fit = 0
last_auc = 0
for epoch in range(epochs):
    train_loss, train_acc, train_auc = train_epoch(model5, train_dataloader, optimizer, criterion, device)
    print("epoch - {} train_loss - {:.2f} acc - {:.3f} auc - {:.5f}".format(epoch, train_loss, train_acc, train_auc))

    val_loss, avl_acc, val_auc = val_epoch(model5, val_dataloader, criterion, device)
    print("epoch - {} val_loss - {:.2f} acc - {:.3f} auc - {:.5f}".format(epoch, val_loss, avl_acc, val_auc))

    if val_auc > last_auc:
        last_auc = val_auc
        over_fit = 0
    else:
        over_fit += 1


    if over_fit >= 2:
        print("early stop epoch ", epoch)
        break

loss - 0.4787: 100%|██████████| 361/361 [00:07<00:00, 48.44it/s]


epoch - 0 train_loss - 0.52 acc - 0.760 auc - 0.66070


loss - 0.4803: 100%|██████████| 91/91 [00:01<00:00, 64.95it/s]


epoch - 0 val_loss - 0.50 acc - 0.771 auc - 0.73624


loss - 0.4631: 100%|██████████| 361/361 [00:07<00:00, 47.20it/s]


epoch - 1 train_loss - 0.46 acc - 0.787 auc - 0.76976


loss - 0.5366: 100%|██████████| 91/91 [00:01<00:00, 66.54it/s] 


epoch - 1 val_loss - 0.47 acc - 0.785 auc - 0.77549


loss - 0.4601: 100%|██████████| 361/361 [00:06<00:00, 56.97it/s]


epoch - 2 train_loss - 0.44 acc - 0.798 auc - 0.79677


loss - 0.4838: 100%|██████████| 91/91 [00:02<00:00, 42.54it/s]


epoch - 2 val_loss - 0.46 acc - 0.789 auc - 0.78465


loss - 0.4310: 100%|██████████| 361/361 [00:06<00:00, 51.86it/s]


epoch - 3 train_loss - 0.43 acc - 0.804 auc - 0.80885


loss - 0.4078: 100%|██████████| 91/91 [00:01<00:00, 66.32it/s] 


epoch - 3 val_loss - 0.46 acc - 0.789 auc - 0.78663


loss - 0.4518: 100%|██████████| 361/361 [00:07<00:00, 47.37it/s]


epoch - 4 train_loss - 0.43 acc - 0.808 auc - 0.81793


loss - 0.4435: 100%|██████████| 91/91 [00:02<00:00, 40.75it/s]


epoch - 4 val_loss - 0.46 acc - 0.790 auc - 0.78751


loss - 0.4301: 100%|██████████| 361/361 [00:07<00:00, 50.85it/s]


epoch - 5 train_loss - 0.42 acc - 0.812 auc - 0.82611


loss - 0.4224: 100%|██████████| 91/91 [00:01<00:00, 46.19it/s]


epoch - 5 val_loss - 0.46 acc - 0.786 auc - 0.78543


loss - 0.4489: 100%|██████████| 361/361 [00:06<00:00, 55.24it/s]


epoch - 6 train_loss - 0.41 acc - 0.816 auc - 0.83396


loss - 0.4692: 100%|██████████| 91/91 [00:01<00:00, 64.43it/s]


epoch - 6 val_loss - 0.47 acc - 0.788 auc - 0.78372
early stop epoch  6


In [None]:
import pickle
with open('/content/drive/MyDrive/group_nm5_with_diff.pkl', 'rb') as f:
    group = pickle.load(f)

In [None]:
class SAKTDataset(Dataset):
    def __init__(self, group, n_skill, max_seq=100):
        super(SAKTDataset, self).__init__()
        self.max_seq = max_seq
        self.n_skill = n_skill
        self.samples = group

        self.user_ids = []
        self.diff_levels = []  # 난이도 피처(diff_level)를 저장할 리스트 추가
        for user_id in group.index:
            q, qa, diff_level = group[user_id]

            if len(q) < 10:
                continue
            self.user_ids.append(user_id)
            self.diff_levels.append(diff_level)

    def __len__(self):
        return len(self.user_ids)

    def __getitem__(self, index):
        user_id = self.user_ids[index]
        q_, qa_, diff_level = self.samples[user_id]

        seq_len = len(q_)

        q = np.zeros(self.max_seq, dtype=int)
        qa = np.zeros(self.max_seq, dtype=int)
        if seq_len >= self.max_seq:
            q[:self.max_seq] = q_[-self.max_seq:]
            qa[:self.max_seq] = qa_[-self.max_seq:]
        else:
            q[self.max_seq-seq_len:] = q_
            qa[self.max_seq-seq_len:] = qa_


        target_id = q[1:]
        label = qa[1:]

        x = np.zeros(self.max_seq-1, dtype=int)
        x = q[:-1].copy()
        x += (qa[:-1] == 1) * self.n_skill

        diff_level = np.array(diff_level[:-1], dtype=int)  # 마지막 시퀀스 제외 (예측 대상이 아님)

        x_with_diff = np.concatenate([x, diff_level], axis=0)  # x와 난이도 피처를 결합

        return x_with_diff, target_id, label



In [None]:
import torch
import torch.nn as nn
import numpy as np

class SAKTDataset(nn.Module):
    def __init__(self, group, n_skill, max_seq=100):
        super(SAKTDataset, self).__init__()
        self.max_seq = max_seq
        self.n_skill = n_skill
        self.samples = group

        self.user_ids = []
        self.diff_levels = []
        for user_id in group.index:
            q, qa, diff_level = group[user_id]

            if len(q) < 10:
                continue

            self.user_ids.append(user_id)
            self.diff_levels.append(diff_level)

    def __len__(self):
        return len(self.user_ids)

    def __getitem__(self, index):
        user_id = self.user_ids[index]
        q_, qa_, diff_level = self.samples[user_id]

        seq_len = len(q_)

        q = np.zeros(self.max_seq, dtype=int)
        qa = np.zeros(self.max_seq, dtype=int)
        if seq_len >= self.max_seq:
            q[:self.max_seq] = q_[-self.max_seq:]
            qa[:self.max_seq] = qa_[-self.max_seq:]
        else:
            q[self.max_seq-seq_len:] = q_
            qa[self.max_seq-seq_len:] = qa_


        target_id = q[1:]
        label = qa[1:]

        x = np.zeros(self.max_seq-1, dtype=int)
        x = q[:-1].copy()
        x += (qa[:-1] == 1) * self.n_skill

        diff_level = np.array(diff_level[:-1], dtype=int)  # 마지막 시퀀스 제외 (예측 대상이 아님)

        # x와 diff_level의 크기를 맞추기 위해 패딩
        x = np.pad(x, (0, self.max_seq-1-len(x)), mode='constant')
        diff_level = np.pad(diff_level, (0, self.max_seq-1-len(diff_level)), mode='constant')

        x_with_diff = np.concatenate([x, diff_level], axis=0)  # x와 난이도 피처를 결합

        return x_with_diff, target_id, label


class FFN(nn.Module):
    def __init__(self, state_size=200):
        super(FFN, self).__init__()
        self.state_size = state_size

        self.lr1 = nn.Linear(state_size, state_size)
        self.relu = nn.ReLU()
        self.lr2 = nn.Linear(state_size, state_size)
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        x = self.lr1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.lr2(x)
        return x


def future_mask(seq_length):
    future_mask = torch.triu(torch.ones((seq_length, seq_length), dtype=torch.bool), diagonal=1)
    return future_mask


class SAKTModel(nn.Module):
    def __init__(self, n_skill, max_seq=100, embed_dim=128):
        super(SAKTModel, self).__init__()
        self.n_skill = n_skill
        self.embed_dim = embed_dim

        self.embedding = nn.Embedding(2*n_skill+1, embed_dim)
        self.pos_embedding = nn.Embedding(max_seq-1, embed_dim)
        self.e_embedding = nn.Embedding(n_skill+1, embed_dim)

        self.multi_att = nn.MultiheadAttention(embed_dim=embed_dim, num_heads=8, dropout=0.2)

        self.dropout = nn.Dropout(0.2)
        self.layer_normal = nn.LayerNorm(embed_dim)

        self.ffn = FFN(embed_dim)
        self.pred = nn.Linear(embed_dim + 1, 1)  # 난이도 피처를 고려하여 선형 레이어 수정

    def forward(self, x, question_ids):
        device = x.device
        x = self.embedding(x)
        pos_id = torch.arange(x.size(1)).unsqueeze(0).to(device)
        pos_x = self.pos_embedding(pos_id)
        pos_x = pos_x.expand(x.size(0), -1, -1)

        x = x + pos_x

        e = self.e_embedding(question_ids)

        x = x.permute(1, 0, 2)  # x: [bs, s_len, embed] => [s_len, bs, embed]
        e = e.permute(1, 0, 2)

        att_mask = torch.triu(torch.ones((x.size(1), x.size(1)), dtype=torch.bool), diagonal=1).to(device)

        att_output, att_weight = self.multi_att(e, x, x, attn_mask=att_mask)
        att_output = self.layer_normal(att_output + e)
        att_output = att_output.permute(1, 0, 2)  # att_output: [s_len, bs, embed] => [bs, s_len, embed]

        x = self.ffn(att_output)
        x = self.layer_normal(x + att_output)

        # 난이도 피처를 x와 결합하여 모델의 입력에 추가
        x_with_diff = torch.cat([x, diff_level.unsqueeze(0).expand(x.size(0), -1).to(x.device)], dim=0)

        x_with_diff = self.dropout(x_with_diff)

        x = self.pred(x_with_diff)

        return x.squeeze(-1), att_weight


In [None]:
train, val = train_test_split(group, test_size=0.2)

train_dataset = SAKTDataset(train,9, n_skill)
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=8)
del train

val_dataset = SAKTDataset(val, 9,n_skill)
val_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=True, num_workers=8)
del val

In [None]:
epochs = 50

over_fit = 0
last_auc = 0
for epoch in range(epochs):
    train_loss, train_acc, train_auc = train_epoch(model, train_dataloader, optimizer, criterion, device)
    print("epoch - {} train_loss - {:.2f} acc - {:.3f} auc - {:.5f}".format(epoch, train_loss, train_acc, train_auc))

    val_loss, avl_acc, val_auc = val_epoch(model, val_dataloader, criterion, device)
    print("epoch - {} val_loss - {:.2f} acc - {:.3f} auc - {:.5f}".format(epoch, val_loss, avl_acc, val_auc))

    if val_auc > last_auc:
        last_auc = val_auc
        over_fit = 0
    else:
        over_fit += 1


    if over_fit >= 2:
        print("early stop epoch ", epoch)
        break