# Testing phase

In [46]:
import pathlib

pipeline = pathlib.Path('/kaggle/input/riiid-test-answer-prediction-dataset')

Run the module that contains the source code for the extractors. It's not possible to unpickle them without this.

In [47]:
mod_path = str(pipeline.joinpath('module.py'))
%run $mod_path

Load the extractors.

In [49]:
import pickle

extractors = []

for path in pipeline.glob('*.pkl'):
    with open(path, 'rb') as f:
        extractors.append(pickle.load(f))

Load the model.

In [53]:
import lightgbm as lgb

model = lgb.Booster(model_file=str(list(pipeline.glob('*.lgb'))[0]))

Dance on the test set.

In [4]:
import pandas as pd
import riiideducation

env = riiideducation.make_env()

iter_test = env.iter_test()

def make_prev_group(test_df):

    prev_correct = []

    for user, row in test_df[test_df['prior_group_answers_correct'].notnull()].groupby('user_id').first().iterrows():
        answered_correctly = eval(row['prior_group_answers_correct'])
        prev_correct.append(pd.DataFrame({'answered_correctly': answered_correctly, 'user_id': user, 'content_type_id': 0}))

    prev_correct = pd.concat((prev_correct))
    
    return prev_correct

for (test_df, sample_prediction_df) in iter_test:
    
    is_question = test_df['content_type_id'].eq(0)
    questions = test_df[is_question]
    prev_group = make_prev_group(test_df)

    for ex in extractors:
        if isinstance(ex, StatefulExtractor):
            ex.update(questions, prev_group)

    features = pd.concat((ex.transform(questions) for ex in extractors), axis='columns').astype(float)
    y_pred = model.predict(features)

    prediction_df = pd.DataFrame(
        {
            'row_id': questions['row_id'],
            'answered_correctly': y_pred
        },
        index=questions.index
    )
    
    env.predict(prediction_df)