In [1]:
import pickle

In [2]:
def read_pickle(path):
    with open(path, "rb") as f:
        return pickle.load(f)

In [205]:
train_dataset_raw = read_pickle("linear_model_dataset/dataset_train_new.pkl")
valid_dataset_raw = read_pickle("linear_model_dataset/dataset_valid_new.pkl")
task_emb = read_pickle("tasks_embeddings.pkl").T

In [206]:
TASK_EMB_LENGTH = task_emb.shape[1]
TASK_EMB_LENGTH

64

In [207]:
import torch
import numpy as np
import itertools
from tqdm import tqdm

from torch.utils.data import Dataset
from torchvision import datasets
import random
from torch import nn


In [208]:
all_tasks_set = set()

for i in train_dataset_raw:
    for j in i[0]:
        all_tasks_set.add(j)

print(min(all_tasks_set), max(all_tasks_set))

0 2828


In [209]:
task_emb[0] = np.array([0] * 64)

In [211]:
class CustomDataset(Dataset):
    HISTORY_LENGTH = 200
    LENGTH_SAMPLE_RANDOM_RANGE = (199, 200)

    def __init__(self, raw_dataset, emb_tasks):
        # raw_dataset  List[Tuple[List_answers, List_results]]
        self.raw_dataset = raw_dataset
        self.emb_tasks = emb_tasks

    def __len__(self):
        return len(self.raw_dataset)


    def add_padding(self, arr):
        assert len(arr) <= self.HISTORY_LENGTH
        return list(arr) + [0] * (self.HISTORY_LENGTH - len(arr))

    def __getitem__(self, idx):
        # не используем индекс - каждый раз берем историю случайного человека
        rand_idx = random.randint(0, len(self.raw_dataset)-1)
        raw_sample = self.raw_dataset[rand_idx]
        raw_sample_len = len(raw_sample[0])

        if len(raw_sample[0]) < self.LENGTH_SAMPLE_RANDOM_RANGE[0]:
            return self.__getitem__(-1)

        # случайно сэмплируем длину истории
        sample_length = random.randint(self.LENGTH_SAMPLE_RANDOM_RANGE[0],
                                       min(self.LENGTH_SAMPLE_RANDOM_RANGE[1], raw_sample_len))

        # случайно сэмплируем место начала
        start_idx = random.randint(0, raw_sample_len - sample_length)
        seq_tasks = raw_sample[0][start_idx:start_idx+sample_length]
        seq_results = raw_sample[1][start_idx:start_idx+sample_length]

        train_tasks, loss_task = seq_tasks[:-1], seq_tasks[-1]
        train_results, loss_result = seq_results[:-1], seq_results[-1]

        # добавляем паддинг из нулей
        seq_tasks = self.add_padding(train_tasks)
        seq_results = self.add_padding(train_results)

        # заменяем номера заданий их ембедингами
        seq_tasks_emb = np.array([self.emb_tasks[i] for i in seq_tasks]).flatten()
        seq_resutls = np.array(list(map(int, seq_results)))
        x_sample = np.append(seq_tasks_emb, seq_resutls, 0)
        x_sample = torch.FloatTensor(x_sample)

        return x_sample, loss_task, loss_result

In [212]:
train_dataset = CustomDataset(train_dataset_raw, task_emb)
valid_dataset = CustomDataset(valid_dataset_raw, task_emb)

In [215]:
x_sample, loss_task, loss_result = train_dataset[0]
x_sample.shape

torch.Size([13000])

In [216]:
class LinearModelDKT(nn.Module):
    def __init__(self, in_features_length=13000, tasks_count=2829):
        super(LinearModelDKT, self).__init__()
        self.tasks_count = tasks_count
        self.in_features_length = in_features_length

        self.l1 = nn.Linear(self.in_features_length, 4096)
        self.l2 = nn.Linear(4096, 4096)
        self.l3 = nn.Linear(4096, 2829)

    def forward(self, x):
        res = nn.functional.relu(self.l1(x))
        res = nn.functional.relu(self.l2(res))
        res = self.l3(res)

        return res


In [218]:
model = LinearModelDKT()

In [219]:
model

LinearModelDKT(
  (l1): Linear(in_features=13000, out_features=4096, bias=True)
  (l2): Linear(in_features=4096, out_features=4096, bias=True)
  (l3): Linear(in_features=4096, out_features=2829, bias=True)
)

In [220]:
import torch.optim as optim
lr = 0.001
optimizer = optim.Adam(model.parameters(), lr)
loss_function = torch.nn.BCEWithLogitsLoss()

In [221]:
EPOCH_COUNT = 5
DEVIDE = "cuda"

model = model.to(DEVIDE)

In [223]:
for epoch in range(EPOCH_COUNT):
    loss_sum = 0
    count_epoch = 0
    answers = []

    for ind in tqdm(range(len(train_dataset))[:200]):
        x, y_task, y_ans = train_dataset[ind]
        x = x.to(DEVIDE)
        answers.append(y_ans)
        target = torch.FloatTensor([y_ans]).to(DEVIDE)
        y_task = torch.LongTensor([y_task]).to(DEVIDE)

        res = model(x)
        need_result = res[y_task]

        # print(abs(need_result - target))
        loss = loss_function(need_result, target)

        loss_sum += abs(loss)
        count_epoch += 1

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        with torch.no_grad():
            res = model(x)
            need_result = res[y_task]
            # print(abs(need_result - target))

    print("mean loss: ", loss_sum / count_epoch)

    valid_results = []

    for ind in tqdm(range(len(valid_dataset))):
        x, y_task, y_ans = train_dataset[ind]
        x = x.to(DEVIDE)
        answers.append(y_ans)
        target = torch.FloatTensor([y_ans]).to(DEVIDE)
        y_task = torch.LongTensor([y_task]).to(DEVIDE)

        res = model(x)
        need_result = res[y_task]
        valid_results.append((need_result > 0) == y_ans)

    print("valid acc: ", sum(valid_results) / len(valid_results))


  target = torch.FloatTensor([y_ans]).to(DEVIDE)
100%|██████████| 200/200 [00:11<00:00, 18.14it/s]


mean loss:  tensor(0.7265, device='cuda:0', grad_fn=<DivBackward0>)


  target = torch.FloatTensor([y_ans]).to(DEVIDE)
100%|██████████| 2427/2427 [00:06<00:00, 384.91it/s]


valid acc:  tensor([0.5097], device='cuda:0')


100%|██████████| 200/200 [00:10<00:00, 18.70it/s]


mean loss:  tensor(0.7629, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 2427/2427 [00:06<00:00, 383.26it/s]


valid acc:  tensor([0.5187], device='cuda:0')


 39%|███▉      | 78/200 [00:04<00:06, 18.61it/s]


KeyboardInterrupt: 