In [1]:
import abc

class Extractor(abc.ABC):
    
    def __str__(self):
        return self.__class__.__name__
    
    @abc.abstractmethod
    def transform(self, questions):
        pass
    
class StatefulExtractor(Extractor):
    
    @abc.abstractmethod
    def update(self, questions, prev_group):
        pass
    
class AvgCorrect(StatefulExtractor):
    
    def __init__(self, prior_mean, prior_size):
        self.prior_mean = prior_mean
        self.prior_size = prior_size
        self.stats = pd.DataFrame(columns=['mean', 'size'])
        
    def __str__(self):
        return f'{self.__class__.__name__}_prior_mean={self.prior_mean}_prior_size={self.prior_size}'
    
    def update(self, questions, prev_group):
        
        # Initialize statistics for new users
        new = pd.Index(questions['user_id']).difference(self.stats.index)
        if len(new) > 0:
            prior = pd.DataFrame(
                {'mean': self.prior_mean, 'size': self.prior_size},
                index=new
            )
            self.stats = self.stats.append(prior)
        
        # Nothing to do if nothing happened before
        if len(prev_group) == 0:
            return
        
        # Compute the new statistics
        stats = (
            prev_group
            .query('content_type_id == 0')
            .groupby('user_id')['answered_correctly']
            .agg(['mean', 'size'])
        )
        
        # Update the old statistics with the new statistics
        users = stats.index
        m = stats.loc[users, 'size']
        self.stats.loc[users, 'size'] += m
        n = self.stats.loc[users, 'size']
        avg = self.stats.loc[users, 'mean']
        new_avg = stats.loc[users, 'mean']
        self.stats.loc[users, 'mean'] += m * (new_avg - avg) / n
    
    def transform(self, questions):
        avgs = self.stats.loc[questions['user_id'], 'mean'].rename('avg_correct')
        avgs.index = questions.index
        return avgs
    
class QuestionDifficulty(Extractor):
    
    def __init__(self, train):
        stats = train.query('content_type_id == 0').groupby('content_id')['answered_correctly'].agg(['mean', 'size'])
        self.bayes_avg = stats.eval('(mean * size + .6 * 100) / (size + 100)').rename('question_difficulty')
    
    def transform(self, questions):
        avgs = self.bayes_avg.loc[questions['content_id']]
        avgs.index = questions.index
        return avgs

In [2]:
import pathlib
import pickle

extractors = []

for path in pathlib.Path('/kaggle/input/lofitest').glob('*.pkl'):
    with open(path, 'rb') as f:
        extractors.append(pickle.load(f))

In [3]:
import lightgbm as lgb

model = lgb.Booster(model_file='/kaggle/input/lofitest/model.lgb')

In [4]:
import pandas as pd
import riiideducation

env = riiideducation.make_env()

iter_test = env.iter_test()

def make_prev_group(test_df):

    prev_correct = []

    for user, row in test_df[test_df['prior_group_answers_correct'].notnull()].groupby('user_id').first().iterrows():
        answered_correctly = eval(row['prior_group_answers_correct'])
        prev_correct.append(pd.DataFrame({'answered_correctly': answered_correctly, 'user_id': user, 'content_type_id': 0}))

    prev_correct = pd.concat((prev_correct))
    
    return prev_correct

for (test_df, sample_prediction_df) in iter_test:
    
    is_question = test_df['content_type_id'].eq(0)
    questions = test_df[is_question]
    prev_group = make_prev_group(test_df)

    for ex in extractors:
        if isinstance(ex, StatefulExtractor):
            ex.update(questions, prev_group)

    features = pd.concat((ex.transform(questions) for ex in extractors), axis='columns').astype(float)
    y_pred = model.predict(features)

    prediction_df = pd.DataFrame(
        {
            'row_id': questions['row_id'],
            'answered_correctly': y_pred
        },
        index=questions.index
    )
    
    env.predict(prediction_df)