### <span style="color:darkblue">**Riiid - transformer**</span>

In [1]:
# IMPORTS

import os
import pickle

import torch

# my packages
from dataset import RiiidData
from model_trans import SaintNikolaTransformer
from train import train_model
from utils import seed_everything, get_ma

# dataset config
from config_trans import max_quest, num_quest, start_response_token, sequel_response_token, batch_size
# model config
from config_trans import head_dim, nhead, dim_feedforward, num_encoder_layers
# training config
from config_trans import lr, epochs, warmup_steps

from config_trans import random_seed
seed_everything(random_seed)

# visible GPU card
gpu_idx = 1
os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_idx}"
device = torch.device("cuda")

import matplotlib.pyplot as plt
%matplotlib inline 

In [None]:
# # FORMAT THE DATA - DO ONLY ONCE

# import pickle
# import pandas as pd
# from utils import csv_to_dict

# df = pd.read_parquet("../data/riiid_train.parquet")
# questions_container = csv_to_dict(df)

# # save files
# with open("./questions_container.pickle", "wb") as handle:
#     pickle.dump(questions_container, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [2]:
%%time

# load data container
with open("../models/riiid_container/questions_container.pickle", "rb") as handle:
    questions_container = pickle.load(handle)


CPU times: user 3min 3s, sys: 10.3 s, total: 3min 14s
Wall time: 3min 14s


In [3]:
%%time

# TRAIN AND VALIDATION DATALOADERS
data = RiiidData(questions_container, max_quest, num_quest, start_response_token, sequel_response_token, batch_size)
data.sampling_process()
train_dataloader, val_dataloader = data.get_dataloaders(val_size=0.025)


CPU times: user 7min 15s, sys: 24.1 s, total: 7min 39s
Wall time: 7min 37s


In [None]:
# INITIALIZE A MODEL INSTANCE
model = SaintNikolaTransformer(device, num_quest, max_quest, head_dim, nhead, dim_feedforward, num_encoder_layers)
model.init_weights()
model.to(device)
print(f"The model has {model.num_parameters()} of trainable parameters!")

In [None]:
# TRAIN THE MODEL
model, train_loss = train_model(model, device, train_dataloader, val_dataloader, lr, epochs, warmup_steps)

In [None]:
# PLOT MOVING AVERAGE TRAIN LOSS
ma_loss = get_ma(train_loss, window_size=40)
plt.rcParams["figure.figsize"] = (20, 5)
plt.plot(ma_loss[100:])
plt.hlines(0.50, xmin=0, xmax=len(ma_loss[100:]), colors="r");