In [1]:
import os
import gc
import sys
import datetime
import pickle
import numpy as np
import pandas as pd
import polars as pl
import lightgbm as lgbm
import joblib

from catboost import CatBoostClassifier, Pool



In [2]:
model_path = '/kaggle/input/models-kaggle/'
models = []
for q in range(1, 19):
    with open(model_path+f'best_model_ques_{q}.pkl', 'rb') as f:
        model = joblib.load(f)
    models.append(model)
    
threshold = [
    0.46875,# Catboost with F1=0.6414
    0.375, # Catboost with F1=0.5336, 0.5 # LightGBM with F1=0.55
    0.53125, # LightGBM with F1=0.568, 0.5390625 # Catboost with F1=0.56
    0.4453125, # Catboost with F1=0.657
    0.4921875, # Catboost with F1=0.637, 0.5234375 # LightGBM with F1=0.6336 -*-
    0.453125, # Catboost with F1=0.6362, 0.4375 # Lightbgm with F1=0.6341 -*-
    0.4765625, # Catboost with F1=0.6143
    0.4765625, # Catboost with F1=0.5619 ~ LightGBM
    0.4453125, # Catboost with F1=0.6239, 0.4921875 # LightGBM with F1=0.6177 (performs better)
    0.515625, # Catboost with F1=0.6167
    0.4921875, # Catboost with F1=0.5969 ~ LightGBM
    0.4609375, # Catboost with F1=0.5767
    0.5546875, # Catboost with F1=0.6213
    0.484375, # Catboost with F1=0.629
    0.5, # Catboost with F1=0.6351 ~ LightGBM
    0.5, # LightGBM with F1=0.5421
    0.4765625, # LightGBM with F1=0.5529 ~ Catbooost Performs Better But(test) 0.484375 with F1=0.5458 
    0.5, #LightGBM with F1=0.5728, Catboost 0.4765625 with F1=0.567
]


In [3]:
def feature_engineer(df):
    columns = [
        pl.col("page").cast(pl.Int8),
        pl.col("level").cast(pl.Int8),
        pl.col("fullscreen").cast(pl.Int8),
        pl.col("hq").cast(pl.Int8),
        pl.col("music").cast(pl.Int8),
        pl.col("index").cast(pl.UInt16),
        (
            pl.when(pl.col("hover_duration").is_null())
            .then(0)
            .otherwise(1)
            .alias("is_hover")
            .cast(pl.Int8)
        ),
        (
            pl.when(pl.col("room_coor_x").is_null())
            .then(0)
            .otherwise(1)
            .alias("is_click")
            .cast(pl.Int8)
        ),
        (
            pl.when(pl.col("page").is_null())
            .then(0)
            .otherwise(1)
            .alias("is_page")
            .cast(pl.Int8)
        ),
        (
            (pl.col("elapsed_time") - pl.col("elapsed_time").shift(1))
            .fill_null(0)
            .clip(0, 1e9)
            .over(["session_id", "level"])
            .alias("elapsed_time_diff")
        ),
        (
            (pl.col("screen_coor_x") - pl.col("screen_coor_x").shift(1))
            .abs()
            .over(["session_id", "level"])
            .alias("screen_corr_x_dff")
        ),
        (
            (pl.col("screen_coor_y") - pl.col("screen_coor_y").shift(1))
            .abs()
            .over(["session_id", "level"])
            .alias("screen_corr_y_dff")
        ),
        (
            (pl.col("room_coor_x") - pl.col("room_coor_x").shift(1))
            .abs()
            .over(["session_id", "level"])
            .alias("room_corr_x_diff")
        ),
        (
            (pl.col("room_coor_y") - pl.col("room_coor_y").shift(1))
            .abs()
            .over(["session_id", "level"])
            .alias("room_corr_y_diff")
        )
    ]
    
    df = (df.with_columns(columns))
    
    df = df.with_columns([
        pl.col('fqid').fill_null('fqid_None'),
    ])

    df = df.with_columns([
        pl.col('text_fqid').fill_null('text_fqid_None'),
    ])

    df = df.with_columns([
        pl.col('page').fill_null(-1),
    ])
    
    def extract_text_feature(df: pl.DataFrame) -> pl.DataFrame:
        dfs = []
        cc = ['question', 'exclamatory', 'comma', 'unicode']
        i = 0
        for c in ['\?', '!', ',', '\\\\u0']:
            tmp = df['text'].str.contains(c).cast(pl.UInt8).rename(f"text_{cc[i]}")
            dfs.append(tmp)
            i+=1

        tmp_full_stop = df['text'].str.contains(r'\.').cast(pl.UInt8).rename("text_full_stop")
        tmp_ellipsis = df['text'].str.contains(r'\.\.\.').cast(pl.UInt8).rename("text_ellipsis")

        dfs.append(tmp_full_stop)
        dfs.append(tmp_ellipsis)

        tmp = df['text'].str.n_chars().eq(0).cast(pl.UInt8).rename("text_empty")

        dfs = pl.DataFrame(dfs)
        dfs = pl.concat([df, dfs], how="horizontal")
        return dfs

    df = extract_text_feature(df)
    return df

In [4]:
CATS = ['event_name', 'name', 'fqid', 'room_fqid', 'text_fqid', 'level', 'page'] 
NUMS = ['room_coor_x', 'room_coor_y', 'screen_coor_x', 
        'screen_coor_y', 'hover_duration', 'elapsed_time_diff',
        'screen_corr_x_dff', 'screen_corr_y_dff', 'room_corr_x_diff', 'room_corr_y_diff']
SNO = ['session_id', 'index']
LEVEL = {
    '0-4'  : [0, 1, 2, 3, 4],
    '5-12' : [5, 6, 7, 8, 9, 10, 11, 12],
    '13-22': [13, 14, 15, 16, 17, 18, 19, 20, 21, 22],
}
DATE = [ 'year', 'month', 'day', 'hour', 'minute', 'second']
DUO = ["is_click", "is_page", "is_hover"]
TEXT = ["text_\?", "text_!", "text_,", "text_\\u0", "text_\."]

In [5]:
class GRP_0_4:
    def __init__(self):
        self.EVENT_NAME = ['person_click', 'object_hover', 'map_click', 'notebook_click', 'checkpoint', 'notification_click', 'object_click', 'navigate_click', 'cutscene_click', 'observation_click', 'map_hover']
        self.NAME = ['open', 'next', 'prev', 'close', 'basic', 'undefined']
        self.FQID = ['block_tocollection', 'tostacks', 'directory', 'chap1_finale', 'report', 'notebook', 'tocloset', 'plaque', 'tunic.kohlcenter', 'photo', 'doorblock', 'tomap', 'block_0', 'wells', 'intro', 'tunic', 'tunic.capitol_0', 'tunic.historicalsociety', 'boss', 'block_tomap2', 'tocollection', 'outtolunch', 'block_tomap1', 'tobasement', 'toentry', 'groupconvo', 'gramps', 'teddy', 'tunic.hub.slip', 'fqid_None', 'janitor', 'plaque.face.date', 'togrampa', 'retirement_letter', 'chap1_finale_c', 'cs']
        self.ROOM_FQID = ['tunic.historicalsociety.stacks', 'tunic.historicalsociety.entry', 'tunic.capitol_0.hall', 'tunic.historicalsociety.basement', 'tunic.historicalsociety.closet', 'tunic.historicalsociety.collection', 'tunic.kohlcenter.halloffame']
        self.TEXT_FQID = ['tunic.historicalsociety.closet.intro', 'tunic.historicalsociety.closet.photo', 'tunic.historicalsociety.closet.teddy.intro_0_cs_5', 'tunic.historicalsociety.closet.retirement_letter.hub', 'tunic.historicalsociety.closet.doorblock', 'tunic.historicalsociety.entry.wells.talktogramps', 'tunic.capitol_0.hall.chap1_finale_c', 'tunic.kohlcenter.halloffame.block_0', 'tunic.historicalsociety.closet.notebook', 'text_fqid_None', 'tunic.historicalsociety.basement.janitor', 'tunic.historicalsociety.collection.tunic.slip', 'tunic.historicalsociety.collection.tunic', 'tunic.historicalsociety.collection.gramps.found', 'tunic.historicalsociety.entry.block_tocollection', 'tunic.kohlcenter.halloffame.plaque.face.date', 'tunic.historicalsociety.collection.gramps.lost', 'tunic.historicalsociety.collection.gramps.look_0', 'tunic.historicalsociety.stacks.outtolunch', 'tunic.historicalsociety.entry.block_tomap1', 'tunic.historicalsociety.closet.teddy.intro_0_cs_0', 'tunic.historicalsociety.entry.boss.talktogramps', 'tunic.kohlcenter.halloffame.togrampa', 'tunic.historicalsociety.entry.block_tomap2', 'tunic.historicalsociety.collection.cs', 'tunic.historicalsociety.entry.gramps.hub', 'tunic.historicalsociety.entry.groupconvo', 'tunic.historicalsociety.closet.gramps.intro_0_cs_0']
        self.LEVEL = [0, 1, 2, 3, 4]
        self.PAGE = [-1, 0, 1]

class GRP_5_12:
    def __init__(self):
        self.EVENT_NAME = ['person_click', 'object_hover', 'map_click', 'notebook_click', 'checkpoint', 'notification_click', 'object_click', 'navigate_click', 'cutscene_click', 'observation_click', 'map_hover']
        self.NAME = ['open', 'next', 'prev', 'close', 'basic', 'undefined']
        self.FQID = ['trigger_scarf', 'tocollection', 'businesscards', 'businesscards.card_1.next', 'tunic.capitol_1', 'tofrontdesk', 'block_badge', 'block_1', 'tunic.historicalsociety', 'block_0', 'businesscards.card_bingo.bingo', 'block', 'reader.paper1.next', 'journals', 'boss', 'tohallway', 'tunic.library', 'reader.paper1.prev', 'tomap', 'tunic.humanecology', 'plaque', 'tunic', 'tunic.capitol_0', 'chap2_finale', 'door_block_clean', 'block_magnify', 'logbook.page.bingo', 'journals.pic_0.next', 'tobasement', 'logbook', 'tocloset_dirty', 'door_block_talk', 'reader.paper2.next', 'tostacks', 'photo', 'archivist', 'block_badge_2', 'reader.paper2.bingo', 'janitor', 'journals.pic_1.next', 'tunic.hub.slip', 'chap2_finale_c', 'journals.hub.topics', 'tunic.kohlcenter', 'directory', 'gramps', 'journals.pic_2.bingo', 'worker', 'outtolunch', 'tomicrofiche', 'businesscards.card_bingo.next', 'trigger_coffee', 'toentry', 'what_happened', 'journals.pic_2.next', 'fqid_None', 'magnify', 'reader.paper0.prev', 'wellsbadge', 'tunic.drycleaner', 'reader', 'reader.paper2.prev', 'businesscards.card_0.next', 'reader.paper0.next']
        self.ROOM_FQID = ['tunic.library.microfiche', 'tunic.historicalsociety.entry', 'tunic.capitol_0.hall', 'tunic.historicalsociety.stacks', 'tunic.historicalsociety.collection', 'tunic.historicalsociety.frontdesk', 'tunic.historicalsociety.basement', 'tunic.drycleaner.frontdesk', 'tunic.kohlcenter.halloffame', 'tunic.humanecology.frontdesk', 'tunic.library.frontdesk', 'tunic.capitol_1.hall', 'tunic.historicalsociety.closet_dirty']
        self.TEXT_FQID = ['tunic.historicalsociety.closet_dirty.trigger_scarf', 'tunic.historicalsociety.closet_dirty.photo', 'tunic.library.frontdesk.block_badge_2', 'tunic.historicalsociety.closet_dirty.what_happened', 'tunic.library.frontdesk.worker.wells_recap', 'tunic.historicalsociety.frontdesk.magnify', 'tunic.historicalsociety.frontdesk.archivist.foundtheodora', 'tunic.library.frontdesk.wellsbadge.hub', 'tunic.humanecology.frontdesk.worker.intro', 'tunic.drycleaner.frontdesk.worker.done2', 'tunic.historicalsociety.frontdesk.archivist.newspaper', 'tunic.library.frontdesk.worker.droppedbadge', 'tunic.humanecology.frontdesk.block_0', 'tunic.historicalsociety.closet_dirty.trigger_coffee', 'tunic.historicalsociety.closet_dirty.gramps.news', 'tunic.library.frontdesk.worker.hello_short', 'tunic.historicalsociety.stacks.outtolunch', 'tunic.historicalsociety.frontdesk.archivist.need_glass_1', 'tunic.capitol_1.hall.chap2_finale_c', 'tunic.historicalsociety.frontdesk.archivist.have_glass', 'tunic.historicalsociety.frontdesk.block_magnify', 'tunic.drycleaner.frontdesk.worker.hub', 'tunic.historicalsociety.closet_dirty.door_block_talk', 'tunic.historicalsociety.frontdesk.archivist.hello', 'tunic.library.frontdesk.worker.hello', 'tunic.historicalsociety.closet_dirty.gramps.helpclean', 'tunic.library.frontdesk.worker.wells', 'tunic.historicalsociety.stacks.block', 'text_fqid_None', 'tunic.historicalsociety.basement.janitor', 'tunic.drycleaner.frontdesk.block_0', 'tunic.historicalsociety.closet_dirty.gramps.nothing', 'tunic.capitol_0.hall.boss.talktogramps', 'tunic.historicalsociety.stacks.journals.pic_2.bingo', 'tunic.historicalsociety.closet_dirty.gramps.archivist', 'tunic.historicalsociety.frontdesk.archivist.have_glass_recap', 'tunic.drycleaner.frontdesk.worker.done', 'tunic.drycleaner.frontdesk.logbook.page.bingo', 'tunic.library.microfiche.block_0', 'tunic.capitol_1.hall.boss.haveyougotit', 'tunic.drycleaner.frontdesk.block_1', 'tunic.library.frontdesk.block_badge', 'tunic.humanecology.frontdesk.worker.badger', 'tunic.historicalsociety.closet_dirty.door_block_clean', 'tunic.humanecology.frontdesk.businesscards.card_bingo.bingo', 'tunic.drycleaner.frontdesk.worker.takealook', 'tunic.library.frontdesk.worker.preflag', 'tunic.humanecology.frontdesk.block_1', 'tunic.historicalsociety.frontdesk.archivist.newspaper_recap', 'tunic.historicalsociety.frontdesk.archivist.need_glass_0', 'tunic.library.microfiche.reader.paper2.bingo']
        self.LEVEL = [5, 6, 7, 8, 9, 10, 11, 12]
        self.PAGE = [-1, 0, 1, 2, 3]

class GRP_13_22:
    def __init__(self):
        self.EVENT_NAME = ['person_click', 'object_hover', 'map_click', 'notebook_click', 'checkpoint', 'notification_click', 'object_click', 'navigate_click', 'cutscene_click', 'observation_click', 'map_hover']
        self.NAME = ['open', 'next', 'prev', 'close', 'basic', 'undefined']
        self.FQID = ['tunic.wildlife', 'wells', 'businesscards.card_1.next', 'tunic', 'reader_flag.paper0.next', 'journals_flag.pic_0.next', 'journals_flag.pic_2.next', 'tunic.historicalsociety', 'logbook', 'tunic.kohlcenter', 'journals_flag.pic_2.bingo', 'gramps', 'reader_flag.paper1.prev', 'tomap', 'journals_flag.pic_1.next', 'journals_flag.hub.topics_old', 'journals.pic_2.next', 'businesscards.card_0.next', 'boss', 'tocollection', 'fox', 'expert', 'block_nelson', 'reader', 'need_glasses', 'journals_flag.pic_0_old.next', 'tunic.humanecology', 'photo', 'tracks.hub.deer', 'journals_flag.pic_0.bingo', 'reader_flag.paper2.prev', 'tunic.hub.slip', 'crane_ranger', 'journals_flag.pic_1_old.next', 'directory.closeup.archivist', 'journals_flag.pic_2_old.next', 'teddy', 'tofrontdesk', 'reader_flag.paper0.prev', 'businesscards', 'reader.paper1.prev', 'tostacks', 'colorbook', 'businesscards.card_bingo.next', 'reader.paper2.prev', 'reader_flag.paper1.next', 'tunic.flaghouse', 'journals.pic_0.next', 'journals.pic_2.bingo', 'plaque', 'reader_flag.paper2.bingo', 'tracks', 'key', 'tobasement', 'tunic.capitol_1', 'tunic.drycleaner', 'chap4_finale_c', 'ch3start', 'groupconvo_flag', 'journals_flag.hub.topics', 'tunic.capitol_2', 'outtolunch', 'worker', 'toentry', 'confrontation', 'glasses', 'unlockdoor', 'reader_flag.paper2.next', 'journals', 'reader.paper1.next', 'archivist_glasses', 'journals.hub.topics', 'tocloset_dirty', 'tocage', 'reader_flag', 'directory', 'reader.paper0.next', 'reader.paper2.next', 'tocollectionflag', 'seescratches', 'fqid_None', 'remove_cup', 'journals_flag.pic_1.bingo', 'reader.paper0.prev', 'tunic.library', 'tohallway', 'journals_flag', 'tomicrofiche', 'flag_girl', 'savedteddy', 'journals.pic_1.next', 'lockeddoor', 'coffee']
        self.ROOM_FQID = ['tunic.capitol_1.hall', 'tunic.capitol_2.hall', 'tunic.historicalsociety.stacks', 'tunic.humanecology.frontdesk', 'tunic.historicalsociety.cage', 'tunic.historicalsociety.basement', 'tunic.historicalsociety.entry', 'tunic.library.microfiche', 'tunic.historicalsociety.frontdesk', 'tunic.historicalsociety.collection_flag', 'tunic.drycleaner.frontdesk', 'tunic.library.frontdesk', 'tunic.historicalsociety.collection', 'tunic.wildlife.center', 'tunic.flaghouse.entry', 'tunic.historicalsociety.closet_dirty', 'tunic.kohlcenter.halloffame']
        self.TEXT_FQID = ['tunic.library.frontdesk.worker.preflag', 'tunic.drycleaner.frontdesk.worker.done2', 'tunic.wildlife.center.wells.animals', 'tunic.historicalsociety.frontdesk.archivist_glasses.confrontation', 'tunic.historicalsociety.closet_dirty.photo', 'tunic.historicalsociety.cage.lockeddoor', 'tunic.library.frontdesk.worker.flag_recap', 'tunic.historicalsociety.cage.glasses.afterteddy', 'tunic.historicalsociety.stacks.journals_flag.pic_2.bingo', 'tunic.historicalsociety.stacks.outtolunch', 'tunic.historicalsociety.entry.boss.flag', 'tunic.historicalsociety.basement.seescratches', 'tunic.wildlife.center.wells.animals2', 'tunic.library.frontdesk.worker.flag', 'tunic.historicalsociety.basement.gramps.whatdo', 'tunic.flaghouse.entry.flag_girl.symbol', 'tunic.historicalsociety.stacks.journals.pic_2.bingo', 'tunic.wildlife.center.crane_ranger.crane', 'tunic.historicalsociety.collection_flag.gramps.recap', 'tunic.wildlife.center.fox.concern', 'tunic.wildlife.center.expert.removed_cup', 'tunic.wildlife.center.wells.nodeer_recap', 'tunic.historicalsociety.stacks.journals_flag.pic_0.bingo', 'tunic.historicalsociety.basement.savedteddy', 'tunic.capitol_2.hall.chap4_finale_c', 'tunic.historicalsociety.stacks.journals_flag.pic_1.bingo', 'tunic.historicalsociety.basement.ch3start', 'tunic.historicalsociety.cage.glasses.beforeteddy', 'tunic.historicalsociety.cage.confrontation', 'tunic.capitol_1.hall.boss.writeitup', 'tunic.historicalsociety.frontdesk.key', 'tunic.historicalsociety.cage.teddy.trapped', 'tunic.flaghouse.entry.flag_girl.hello_recap', 'tunic.library.frontdesk.worker.nelson_recap', 'tunic.historicalsociety.frontdesk.archivist_glasses.confrontation_recap', 'tunic.wildlife.center.wells.nodeer', 'tunic.historicalsociety.cage.unlockdoor', 'tunic.historicalsociety.cage.need_glasses', 'tunic.library.microfiche.reader_flag.paper2.bingo', 'tunic.wildlife.center.tracks.hub.deer', 'tunic.historicalsociety.basement.gramps.seeyalater', 'tunic.historicalsociety.entry.wells.flag_recap', 'tunic.library.frontdesk.worker.nelson', 'tunic.flaghouse.entry.flag_girl.symbol_recap', 'tunic.library.frontdesk.block_nelson', 'tunic.historicalsociety.entry.groupconvo_flag', 'tunic.wildlife.center.expert.recap', 'tunic.flaghouse.entry.flag_girl.hello', 'tunic.historicalsociety.entry.wells.flag', 'text_fqid_None', 'tunic.wildlife.center.remove_cup', 'tunic.humanecology.frontdesk.worker.badger', 'tunic.flaghouse.entry.colorbook', 'tunic.wildlife.center.coffee', 'tunic.capitol_2.hall.boss.haveyougotit', 'tunic.historicalsociety.entry.boss.flag_recap', 'tunic.historicalsociety.entry.directory.closeup.archivist', 'tunic.historicalsociety.collection_flag.gramps.flag']
        self.LEVEL = [13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
        self.PAGE = [-1, 0, 1, 2, 3, 4, 5, 6]

In [6]:
def model_data_1(x:pl.DataFrame, group:str):
    if group=="0-4":
        grp = GRP_0_4()
    elif group=="5-12":
        grp = GRP_5_12()
    elif group=="13-22":
        grp = GRP_13_22()
        
    aggs = [pl.col("index").count().alias(f"session_number")]
    for cat in CATS:
        if cat == 'event_name':
            item = grp.EVENT_NAME
        elif cat == 'name':
            item = grp.NAME
        elif cat == 'text_fqid':
            item = grp.TEXT_FQID
        elif cat == 'level':
            item = grp.LEVEL
        elif cat == 'page':
            item = grp.PAGE
        elif cat == 'fqid':
            item = grp.FQID
        elif cat == 'room_fqid':
            item = grp.ROOM_FQID
        
        # for every attribute in the item list, count the number of times it appears in the session and create a column for it 
        aggs.extend([
            pl.col(cat)
              .filter(pl.col(cat) == c)
              .count()
              .alias(f"{c}_{cat}_counts")
              for c in item
            ])
    
    BI_CAT = ['fullscreen', 'hq', 'music']
    for col in BI_CAT:
        aggs.append(pl.col(col).max().alias(f"is_{col}"))
    
    for bi in DUO:
        aggs.extend([
            pl.col(bi)
            .filter(pl.col(bi) == c)
            .count()
            .alias(f"{c}_{bi}_counts")
            for c in x[bi].unique().to_pandas().to_list()
        ])
    for num in NUMS:
        aggs.extend([
            pl.col(num)
            .std()
            .alias(f"{num}_std")
        ])
        aggs.extend([
            pl.col(num)
            .mean()
            .alias(f"{num}_mean")
        ])
        aggs.extend([
            pl.col(num)
            .sum()
            .alias(f"{num}_sum")
        ])

    for num in list(filter(lambda x: x not in 
                ['elapsed_time_diff', 'screen_corr_x_dff', 'screen_corr_y_dff', 
                 'room_corr_x_diff', 'room_corr_y_diff'], NUMS)):
        aggs.extend([
            pl.col(num)
            .max()
            .alias(f"{num}_max")
        ])
        aggs.extend([
            pl.col(num)
            .min()
            .alias(f"{num}_min")
        ])

    x = x.groupby("session_id").agg(aggs).sort("session_id")
    x = x.drop(['0_is_click_counts', '0_is_page_counts', '0_is_hover_counts']
              )
    
    
    x = x.rename({
            "1_is_click_counts" : "click_counts",
            "1_is_page_counts" : "page_counts",
            "1_is_hover_counts" : "hover_counts"
        })

    x = x.with_columns(
        pl.col(['is_hq']).cast(pl.UInt8),
        pl.col(['is_fullscreen']).cast(pl.UInt8),
        pl.col(['is_music']).cast(pl.UInt8),
        pl.col('session_id').apply(lambda x: int(str(x)[:2])).cast(pl.UInt8).alias('year'),
        pl.col('session_id').apply(lambda x: int(str(x)[2:4])+1).cast(pl.UInt8).alias('month'),
        pl.col('session_id').apply(lambda x: int(str(x)[4:6])).cast(pl.UInt8).alias('day'),
        pl.col('session_id').apply(lambda x: int(str(x)[6:8])).cast(pl.UInt8).alias('hour'),
        #pl.col('session_id').apply(lambda x: int(str(x)[8:10])).cast(pl.Int8).alias('minute'),
        #pl.col('session_id').apply(lambda x: int(str(x)[10:12])).cast(pl.Int8).alias('second'),
    )
    x = x.sort("session_id")
    x = x.to_pandas()
    x = x.set_index("session_id")
    
    return x

## **Inference**

In [7]:
import jo_wilder_310
env = jo_wilder_310.make_env()
iter_test = env.iter_test()

In [8]:
limits = {'0-4':(1,4), '5-12':(4,14), '13-22':(14,19)}
t1 = datetime.datetime.now()
for (test, sample_submission) in iter_test:
    print
    test = test.sort_values(by='index')
    grp = test.level_group.values[0]
    session_id = test.session_id.values[0]
    df = (pl.from_pandas(test))
    df = feature_engineer(df)
    a,b = limits[grp]

    df = model_data_1(df, grp)
    preds = []
    for q in range(a, b):
        model = models[q-1]   
        pred = model.predict_proba(df)[0, 1]
        mask = sample_submission.session_id.str.contains(f'q{q}')
        sample_submission.loc[mask,'correct'] = int(pred > threshold[q-1]) 
    env.predict(sample_submission)
    
t2 = datetime.datetime.now()
print('Run in ', t2-t1)

This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.
Run in  0:00:01.238377
