# Testing phase

In [46]:
import pathlib

pipeline = pathlib.Path('/kaggle/input/riiid-test-answer-prediction-dataset')

Run the module that contains the source code for the extractors. It's not possible to unpickle them without this.

In [47]:
mod_path = str(pipeline.joinpath('module.py'))
%run $mod_path

Load the extractors.

In [49]:
import pickle

extractors = []

for path in pipeline.glob('*.pkl'):
    with open(path, 'rb') as f:
        extractors.append(pickle.load(f))

Load the model.

In [53]:
import lightgbm as lgb

model = lgb.Booster(model_file=str(list(pipeline.glob('*.lgb'))[0]))

Dance on the test set.

In [4]:
import pandas as pd
import riiideducation

env = riiideducation.make_env()

iter_test = env.iter_test()

prev_group = pd.DataFrame()

for (group, _) in iter_test:

    is_question = group['content_type_id'].eq(0)
    questions = group[is_question]
    
    # Add the answer info to the previous group now that's it available
    prev_group['user_answer'] = eval(group.pop('prior_group_responses').iloc[0])
    prev_group['answered_correctly'] = eval(group.pop('prior_group_answers_correct').iloc[0])

    # Online learning baby
    for ex in extractors:
        if isinstance(ex, StatefulExtractor):
            ex.update(questions, prev_group)

    # Extract features for the questions and make predictions
    features = pd.concat((ex.transform(questions) for ex in extractors), axis='columns').astype(float)
    y_pred = model.predict(features)
    
    # Submit
    env.predict(pd.DataFrame(
        {
            'row_id': questions['row_id'],
            'answered_correctly': y_pred
        },
        index=questions.index
    ))
    
    prev_group = group