In [1]:
import pandas as pd, numpy as np, gc
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import KFold, GroupKFold
from xgboost import XGBClassifier
from sklearn.metrics import f1_score
# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_columns', 50)
from sklearn.preprocessing import MinMaxScaler
import polars as pl

import gc
import warnings
warnings.filterwarnings('ignore')

In [2]:
targets = pd.read_parquet('/kaggle/input/how-to-get-32gb-ram/train_labels.parquet')
targets['session'] = targets.session_id.apply(lambda x: int(x.split('_')[0]) )
targets['q'] = targets.session_id.apply(lambda x: int(x.split('_')[-1][1:]) )

In [3]:
def reduce_memory_usage_pl(df):
    
    start_mem = df.estimated_size("mb")
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    # pl.Uint8,pl.UInt16,pl.UInt32,pl.UInt64
    Numeric_Int_types = [pl.Int8,pl.Int16,pl.Int32,pl.Int64]
    Numeric_Float_types = [pl.Float32,pl.Float64]
    
    for col in df.columns:
        col_type = df[col].dtype
        c_min = df[col].min()
        c_max = df[col].max()
        if col_type in Numeric_Int_types:
            if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                df = df.with_columns(df[col].cast(pl.Int8))
            elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                df = df.with_columns(df[col].cast(pl.Int16))
            elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                df = df.with_columns(df[col].cast(pl.Int32))
            elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                df = df.with_columns(df[col].cast(pl.Int64))

        elif col_type in Numeric_Float_types:
            if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                df = df.with_columns(df[col].cast(pl.Float32))
            else:
                pass
#         elif col_type == pl.Utf8:
#             df = df.with_columns(df[col].cast(pl.Categorical))
        else:
            pass
    mem_usg = df.estimated_size("mb")
    print("Memory usage became: ",mem_usg," MB")
    del start_mem, Numeric_Int_types, Numeric_Float_types, mem_usg; gc.collect()
    return df

In [4]:
columns = [
    
# feature interation note work
# rolling note work
# north south not work
    (
        (pl.col("elapsed_time") - pl.col("elapsed_time").shift(1))
        .fill_null(0)
        .clip(0, 1e9)
        .over(["session_id", "level"])
        .alias("elapsed_time_diff")
    ),
    
    (
        (pl.col("screen_coor_x") - pl.col("screen_coor_x").shift(1))
        .abs()
        .over(["session_id", "level"])
    ),
    
    (
        (pl.col("screen_coor_y") - pl.col("screen_coor_y").shift(1))
        .abs()
        .over(["session_id", "level"])
    ),
    
    (
        (pl.col("room_coor_x") - pl.col("room_coor_x").shift(1))
        .abs()
        .over(["session_id", "level"])
    ),
    
    (
        (pl.col("room_coor_y") - pl.col("room_coor_y").shift(1))
        .abs()
        .over(["session_id", "level"])
    ),
    
    
    pl.col("fqid").fill_null("fqid_None"),
    pl.col("text_fqid").fill_null("text_fqid_None")

]

In [5]:
print('Read')
print('train data ..')
df = (pl.read_parquet("/kaggle/input/how-to-get-32gb-ram/train.parquet")
      .drop(["fullscreen", "hq", 
             "music"])
      .with_columns(columns))

Read
train data ..


In [6]:
# # reduce memory usage
# print('Reduced Memory')
# df = reduce_memory_usage_pl(df)

In [7]:
# df.filter((pl.col('level_group')=="0-4"))

In [8]:
# df.filter((pl.col('level_group')=="0-4") & ((pl.col("event_name") == 'navigate_click')))['fqid']#.unique().to_numpy()

In [9]:
# df.filter((pl.col('level_group')=="5-12") & ((pl.col("event_name") == 'navigate_click')))['room_fqid'].unique().to_numpy()

In [10]:
room_fqid_0_4 = ['tunic.historicalsociety.closet', 'tunic.kohlcenter.halloffame',
       'tunic.historicalsociety.entry', 'tunic.historicalsociety.stacks',
       'tunic.historicalsociety.collection', 'tunic.capitol_0.hall',
       'tunic.historicalsociety.basement']

In [11]:
# df.filter((pl.col('level_group')=="13-22") & ((pl.col("room_fqid") == 'tunic.historicalsociety.cage')))['fqid'].unique().to_numpy()

In [12]:
# target = pd.DataFrame()
# for i in range(1,19):
#     target[f'Quetions_{i}'] = targets.loc[targets.q==i].set_index('session')['correct']#.loc[train_users]
# target['session_id'] =target.index
# target.reset_index(drop=True,inplace=True)
# target= pl.from_pandas(target)

In [13]:
CATS = ['event_name', 'name', 'fqid', 'room_fqid', 'text_fqid']

NUMS = [# 'room_coor_x', 'room_coor_y', 'screen_coor_x', 'screen_coor_y',
        'elapsed_time_bin_100','elapsed_time_bin_200',
        'elapsed_time_bin_500', 'elapsed_time_bin_1000',
        'elapsed_time_bin_1500', 'elapsed_time_bin_2000', 'elapsed_time_bin_5000', 'elapsed_time_bin_10000',
        'hover_duration', 'elapsed_time_diff']

fqid_lists = ['fqid_None','worker', 'archivist', 'gramps', 'wells', 'toentry', 'confrontation', 'crane_ranger', 'groupconvo', 'flag_girl', 'tomap', 'tostacks', 'tobasement', 'archivist_glasses', 'boss', 'journals', 'seescratches', 'groupconvo_flag', 'cs', 'teddy', 'expert', 'businesscards', 'ch3start', 'tunic.historicalsociety', 'tofrontdesk', 'savedteddy', 'plaque', 'glasses', 'tunic.drycleaner', 'reader_flag', 'tunic.library', 'tracks', 'tunic.capitol_2', 'trigger_scarf', 'reader', 'directory', 'tunic.capitol_1', 'journals.pic_0.next', 'unlockdoor', 'tunic', 'what_happened', 'tunic.kohlcenter', 'tunic.humanecology', 'colorbook', 'logbook', 'businesscards.card_0.next', 'journals.hub.topics', 'logbook.page.bingo', 'journals.pic_1.next', 'journals_flag', 'reader.paper0.next', 'tracks.hub.deer', 'reader_flag.paper0.next', 'trigger_coffee', 'wellsbadge', 'journals.pic_2.next', 'tomicrofiche', 'journals_flag.pic_0.bingo', 'plaque.face.date', 'notebook', 'tocloset_dirty', 'businesscards.card_bingo.bingo', 'businesscards.card_1.next', 'tunic.wildlife', 'tunic.hub.slip', 'tocage', 'journals.pic_2.bingo', 'tocollectionflag', 'tocollection', 'chap4_finale_c', 'chap2_finale_c', 'lockeddoor', 'journals_flag.hub.topics', 'tunic.capitol_0', 'reader_flag.paper2.bingo', 'photo', 'tunic.flaghouse', 'reader.paper1.next', 'directory.closeup.archivist', 'intro', 'businesscards.card_bingo.next', 'reader.paper2.bingo', 'retirement_letter', 'remove_cup', 'journals_flag.pic_0.next', 'magnify', 'coffee', 'key', 'togrampa', 'reader_flag.paper1.next', 'janitor', 'tohallway', 'chap1_finale', 'report', 'outtolunch', 'journals_flag.hub.topics_old', 'journals_flag.pic_1.next', 'reader.paper2.next', 'chap1_finale_c', 'reader_flag.paper2.next', 'door_block_talk', 'journals_flag.pic_1.bingo', 'journals_flag.pic_2.next', 'journals_flag.pic_2.bingo', 'block_magnify', 'reader.paper0.prev', 'block', 'reader_flag.paper0.prev', 'block_0', 'door_block_clean', 'reader.paper2.prev', 'reader.paper1.prev', 'doorblock', 'tocloset', 'reader_flag.paper2.prev', 'reader_flag.paper1.prev', 'block_tomap2', 'journals_flag.pic_0_old.next', 'journals_flag.pic_1_old.next', 'block_tocollection', 'block_nelson', 'journals_flag.pic_2_old.next', 'block_tomap1', 'block_badge', 'need_glasses', 'block_badge_2', 'fox', 'block_1']
DIALOGS = ['that', 'this', 'it', 'you','find','found','Found','notebook','Wells','wells','help','need', 'Oh','Ooh','Jo', 'flag', 'can','and','is','the','to']

name_feature = ['basic', 'undefined', 'close', 'open', 'prev', 'next']

event_name_feature = ['cutscene_click', 'person_click', 'navigate_click',
       'observation_click', 'notification_click', 'object_click',
       'object_hover', 'map_hover', 'map_click', 'checkpoint',
       'notebook_click']
text_lists = ['tunic.historicalsociety.cage.confrontation', 'tunic.wildlife.center.crane_ranger.crane', 'tunic.historicalsociety.frontdesk.archivist.newspaper', 'tunic.historicalsociety.entry.groupconvo', 'tunic.wildlife.center.wells.nodeer', 'tunic.historicalsociety.frontdesk.archivist.have_glass', 'tunic.drycleaner.frontdesk.worker.hub', 'tunic.historicalsociety.closet_dirty.gramps.news', 'tunic.humanecology.frontdesk.worker.intro', 'tunic.historicalsociety.frontdesk.archivist_glasses.confrontation', 'tunic.historicalsociety.basement.seescratches', 'tunic.historicalsociety.collection.cs', 'tunic.flaghouse.entry.flag_girl.hello', 'tunic.historicalsociety.collection.gramps.found', 'tunic.historicalsociety.basement.ch3start', 'tunic.historicalsociety.entry.groupconvo_flag', 'tunic.library.frontdesk.worker.hello', 'tunic.library.frontdesk.worker.wells', 'tunic.historicalsociety.collection_flag.gramps.flag', 'tunic.historicalsociety.basement.savedteddy', 'tunic.library.frontdesk.worker.nelson', 'tunic.wildlife.center.expert.removed_cup', 'tunic.library.frontdesk.worker.flag', 'tunic.historicalsociety.frontdesk.archivist.hello', 'tunic.historicalsociety.closet.gramps.intro_0_cs_0', 'tunic.historicalsociety.entry.boss.flag', 'tunic.flaghouse.entry.flag_girl.symbol', 'tunic.historicalsociety.closet_dirty.trigger_scarf', 'tunic.drycleaner.frontdesk.worker.done', 'tunic.historicalsociety.closet_dirty.what_happened', 'tunic.wildlife.center.wells.animals', 'tunic.historicalsociety.closet.teddy.intro_0_cs_0', 'tunic.historicalsociety.cage.glasses.afterteddy', 'tunic.historicalsociety.cage.teddy.trapped', 'tunic.historicalsociety.cage.unlockdoor', 'tunic.historicalsociety.stacks.journals.pic_2.bingo', 'tunic.historicalsociety.entry.wells.flag', 'tunic.humanecology.frontdesk.worker.badger', 'tunic.historicalsociety.stacks.journals_flag.pic_0.bingo', 'tunic.historicalsociety.closet.intro', 'tunic.historicalsociety.closet.retirement_letter.hub', 'tunic.historicalsociety.entry.directory.closeup.archivist', 'tunic.historicalsociety.collection.tunic.slip', 'tunic.kohlcenter.halloffame.plaque.face.date', 'tunic.historicalsociety.closet_dirty.trigger_coffee', 'tunic.drycleaner.frontdesk.logbook.page.bingo', 'tunic.library.microfiche.reader.paper2.bingo', 'tunic.kohlcenter.halloffame.togrampa', 'tunic.capitol_2.hall.boss.haveyougotit', 'tunic.wildlife.center.wells.nodeer_recap', 'tunic.historicalsociety.cage.glasses.beforeteddy', 'tunic.historicalsociety.closet_dirty.gramps.helpclean', 'tunic.wildlife.center.expert.recap', 'tunic.historicalsociety.frontdesk.archivist.have_glass_recap', 'tunic.historicalsociety.stacks.journals_flag.pic_1.bingo', 'tunic.historicalsociety.cage.lockeddoor', 'tunic.historicalsociety.stacks.journals_flag.pic_2.bingo', 'tunic.historicalsociety.collection.gramps.lost', 'tunic.historicalsociety.closet.notebook', 'tunic.historicalsociety.frontdesk.magnify', 'tunic.humanecology.frontdesk.businesscards.card_bingo.bingo', 'tunic.wildlife.center.remove_cup', 'tunic.library.frontdesk.wellsbadge.hub', 'tunic.wildlife.center.tracks.hub.deer', 'tunic.historicalsociety.frontdesk.key', 'tunic.library.microfiche.reader_flag.paper2.bingo', 'tunic.flaghouse.entry.colorbook', 'tunic.wildlife.center.coffee', 'tunic.capitol_1.hall.boss.haveyougotit', 'tunic.historicalsociety.basement.janitor', 'tunic.historicalsociety.collection_flag.gramps.recap', 'tunic.wildlife.center.wells.animals2', 'tunic.flaghouse.entry.flag_girl.symbol_recap', 'tunic.historicalsociety.closet_dirty.photo', 'tunic.historicalsociety.stacks.outtolunch', 'tunic.library.frontdesk.worker.wells_recap', 'tunic.historicalsociety.frontdesk.archivist_glasses.confrontation_recap', 'tunic.capitol_0.hall.boss.talktogramps', 'tunic.historicalsociety.closet.photo', 'tunic.historicalsociety.collection.tunic', 'tunic.historicalsociety.closet.teddy.intro_0_cs_5', 'tunic.historicalsociety.closet_dirty.gramps.archivist', 'tunic.historicalsociety.closet_dirty.door_block_talk', 'tunic.historicalsociety.entry.boss.flag_recap', 'tunic.historicalsociety.frontdesk.archivist.need_glass_0', 'tunic.historicalsociety.entry.wells.talktogramps', 'tunic.historicalsociety.frontdesk.block_magnify', 'tunic.historicalsociety.frontdesk.archivist.foundtheodora', 'tunic.historicalsociety.closet_dirty.gramps.nothing', 'tunic.historicalsociety.closet_dirty.door_block_clean', 'tunic.capitol_1.hall.boss.writeitup', 'tunic.library.frontdesk.worker.nelson_recap', 'tunic.library.frontdesk.worker.hello_short', 'tunic.historicalsociety.stacks.block', 'tunic.historicalsociety.frontdesk.archivist.need_glass_1', 'tunic.historicalsociety.entry.boss.talktogramps', 'tunic.historicalsociety.frontdesk.archivist.newspaper_recap', 'tunic.historicalsociety.entry.wells.flag_recap', 'tunic.drycleaner.frontdesk.worker.done2', 'tunic.library.frontdesk.worker.flag_recap', 'tunic.humanecology.frontdesk.block_0', 'tunic.library.frontdesk.worker.preflag', 'tunic.historicalsociety.basement.gramps.seeyalater', 'tunic.flaghouse.entry.flag_girl.hello_recap', 'tunic.historicalsociety.closet.doorblock', 'tunic.drycleaner.frontdesk.worker.takealook', 'tunic.historicalsociety.basement.gramps.whatdo', 'tunic.library.frontdesk.worker.droppedbadge', 'tunic.historicalsociety.entry.block_tomap2', 'tunic.library.frontdesk.block_nelson', 'tunic.library.microfiche.block_0', 'tunic.historicalsociety.entry.block_tocollection', 'tunic.historicalsociety.entry.block_tomap1', 'tunic.historicalsociety.collection.gramps.look_0', 'tunic.library.frontdesk.block_badge', 'tunic.historicalsociety.cage.need_glasses', 'tunic.library.frontdesk.block_badge_2', 'tunic.kohlcenter.halloffame.block_0', 'tunic.capitol_0.hall.chap1_finale_c', 'tunic.capitol_1.hall.chap2_finale_c', 'tunic.capitol_2.hall.chap4_finale_c', 'tunic.wildlife.center.fox.concern', 'tunic.drycleaner.frontdesk.block_0', 'tunic.historicalsociety.entry.gramps.hub', 'tunic.humanecology.frontdesk.block_1', 'tunic.drycleaner.frontdesk.block_1']
room_lists = ['tunic.historicalsociety.entry', 'tunic.wildlife.center', 'tunic.historicalsociety.cage', 'tunic.library.frontdesk', 'tunic.historicalsociety.frontdesk', 'tunic.historicalsociety.stacks', 'tunic.historicalsociety.closet_dirty', 'tunic.humanecology.frontdesk', 'tunic.historicalsociety.basement', 'tunic.kohlcenter.halloffame', 'tunic.library.microfiche', 'tunic.drycleaner.frontdesk', 'tunic.historicalsociety.collection', 'tunic.historicalsociety.closet', 'tunic.flaghouse.entry', 'tunic.historicalsociety.collection_flag', 'tunic.capitol_1.hall', 'tunic.capitol_0.hall', 'tunic.capitol_2.hall']

LEVELS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
level_groups = ["0-4", "5-12", "13-22"]

In [14]:
 more_c =[
     
    (
       (((pl.col('elapsed_time_diff') / 100) .round()) * 100)
        .over(["session_id", "level"])
        .alias('elapsed_time_bin_100')
    ),
     
          
    (
       (((pl.col('elapsed_time_diff') / 200) .round()) * 200)
        .over(["session_id", "level"])
        .alias('elapsed_time_bin_200')
    ),
     
     
    (
       (((pl.col('elapsed_time_diff') / 500) .round()) * 500)
        .over(["session_id", "level"])
        .alias('elapsed_time_bin_500')
    ),
     # bin to 1000
    (
       (((pl.col('elapsed_time_diff') / 1000) .round()) * 1000)
        .over(["session_id", "level"])
        .alias('elapsed_time_bin_1000')
    ),
     
    (
       (((pl.col('elapsed_time_diff') / 1500) .round()) * 1500)
        .over(["session_id", "level"])
        .alias('elapsed_time_bin_1500')
    ),
     
    (
       (((pl.col('elapsed_time_diff') / 2000) .round()) * 2000)
        .over(["session_id", "level"])
        .alias('elapsed_time_bin_2000')
    ),
     
    (
       (((pl.col('elapsed_time_diff') / 5000) .round()) * 5000)
        .over(["session_id", "level"])
        .alias('elapsed_time_bin_5000')
    ),
     
    (
       (((pl.col('elapsed_time_diff') / 10000) .round()) * 10000)
        .over(["session_id", "level"])
        .alias('elapsed_time_bin_10000')
    ),
# rolling note work
    
# Feature Interation elpased_time and index notework
  
    # add new feature
    *[(pl.col("name") == c).cast(pl.Int8).alias(f"{c}_name") for c in name_feature],
    *[(pl.col("room_fqid") == c).cast(pl.Int8).alias(f"{c}_room") for c in room_lists],
    *[(pl.col("fqid") == c).cast(pl.Int8).alias(f"{c}_fqid") for c in fqid_lists],
    *[(pl.col("event_name") == c).cast(pl.Int8).alias(f"{c}_EVENTS") for c in event_name_feature],
     
    ]

In [15]:
print('more feature engineering..')
df = df.with_columns(more_c)

more feature engineering..


In [16]:
print('training df1 with level_group [0-4]')
df1 = df.filter(pl.col("level_group")=='0-4')
print('training df2 with level_group [5-12]')
df2 = df.filter(pl.col("level_group")=='5-12')
print('training df3 with level_group [3-22]')
df3 = df.filter(pl.col("level_group")=='13-22')

training df1 with level_group [0-4]
training df2 with level_group [5-12]
training df3 with level_group [3-22]


In [17]:
def time_feature(train):
    train["year"] = train["session_id"].apply(lambda x: int(str(x)[:2])).astype(np.uint8)
    train["month"] = train["session_id"].apply(lambda x: int(str(x)[2:4])+1).astype(np.uint8)
    train["day"] = train["session_id"].apply(lambda x: int(str(x)[4:6])).astype(np.uint8)
    train["hour"] = train["session_id"].apply(lambda x: int(str(x)[6:8])).astype(np.uint8)
    train["minute"] = train["session_id"].apply(lambda x: int(str(x)[8:10])).astype(np.uint8)
    train["second"] = train["session_id"].apply(lambda x: int(str(x)[10:12])).astype(np.uint8)

    return train

In [18]:
def feature_engineer(x, grp, use_extra, feature_suffix):
    aggs = [
        
        *[pl.col(f"{c}_fqid").sum().alias(f"{c}_sum") for c in fqid_lists],
        *[pl.col(f"{c}_room").sum().alias(f"{c}_sum") for c in room_lists],
        *[pl.col(f"{c}_name").sum().alias(f"{c}_sum") for c in name_feature],
        *[pl.col(f"{c}_EVENTS").sum().alias(f"{c}_sum") for c in event_name_feature],
        
      
        pl.col('elapsed_time_diff').quantile(0.3).alias(f"d_time_30"),
        pl.col('elapsed_time_diff').quantile(0.8).alias(f"d_time_80"),
        pl.col('elapsed_time_diff').quantile(0.65).alias(f"d_time_65"),
        
        pl.col("index").count().alias(f"session_number"),

        *[pl.col('index').filter(pl.col('text').str.contains(c)).count().alias(f'word_{c}') for c in DIALOGS],
        *[pl.col("elapsed_time_diff").filter((pl.col('text').str.contains(c))).mean().alias(f'word_mean_{c}') for c in
          DIALOGS],
        *[pl.col("elapsed_time_diff").filter((pl.col('text').str.contains(c))).std().alias(f'word_std_{c}') for c in
          DIALOGS],
        *[pl.col("elapsed_time_diff").filter((pl.col('text').str.contains(c))).max().alias(f'word_max_{c}') for c in
          DIALOGS],
        *[pl.col("elapsed_time_diff").filter((pl.col('text').str.contains(c))).sum().alias(f'word_sum_{c}') for c in
          DIALOGS],
        *[pl.col("elapsed_time_diff").filter((pl.col('text').str.contains(c))).median().alias(f'word_median_{c}') for c
          in DIALOGS],

        *[pl.col(c).drop_nulls().n_unique().alias(f"{c}_unique_{feature_suffix}") for c in CATS],

        *[pl.col(c).mean().alias(f"{c}_mean_{feature_suffix}") for c in NUMS],
        *[pl.col(c).std().alias(f"{c}_std_{feature_suffix}") for c in NUMS],
        *[pl.col(c).sum().alias(f"{c}_sum_{feature_suffix}") for c in NUMS],

        *[pl.col("fqid").filter(pl.col("fqid") == c).count().alias(f"{c}_fqid_counts{feature_suffix}")
          for c in fqid_lists],
        
        *[pl.col("elapsed_time_diff").filter(pl.col("fqid") == c).std().alias(f"{c}_ET_std_{feature_suffix}") for
          c in fqid_lists],
        *[pl.col("elapsed_time_diff").filter(pl.col("fqid") == c).mean().alias(f"{c}_ET_mean_{feature_suffix}") for
          c in fqid_lists],
        *[pl.col("elapsed_time_diff").filter(pl.col("fqid") == c).sum().alias(f"{c}_ET_sum_{feature_suffix}") for
          c in fqid_lists],
        *[pl.col("elapsed_time_diff").filter(pl.col("fqid") == c).median().alias(f"{c}_ET_median_{feature_suffix}") for
          c in fqid_lists],
        *[pl.col("elapsed_time_diff").filter(pl.col("fqid") == c).max().alias(f"{c}_ET_max_{feature_suffix}") for
          c in fqid_lists],

        *[pl.col("text_fqid").filter(pl.col("text_fqid") == c).count().alias(f"{c}_text_fqid_counts{feature_suffix}")
          for
          c in text_lists],
        *[pl.col("elapsed_time_diff").filter(pl.col("text_fqid") == c).std().alias(f"{c}_ET_std_{feature_suffix}") for
          c in text_lists],
        *[pl.col("elapsed_time_diff").filter(pl.col("text_fqid") == c).mean().alias(f"{c}_ET_mean_{feature_suffix}") for
          c in text_lists],
        *[pl.col("elapsed_time_diff").filter(pl.col("text_fqid") == c).sum().alias(f"{c}_ET_sum_{feature_suffix}") for
          c in text_lists],
        *[pl.col("elapsed_time_diff").filter(pl.col("text_fqid") == c).median().alias(f"{c}_ET_median_{feature_suffix}")
          for
          c in text_lists],
        *[pl.col("elapsed_time_diff").filter(pl.col("text_fqid") == c).max().alias(f"{c}_ET_max_{feature_suffix}") for
          c in text_lists],

               
        *[pl.col("room_fqid").filter(pl.col("room_fqid") == c).count().alias(f"{c}_room_fqid_counts{feature_suffix}")
          for c in room_lists],
        *[pl.col("elapsed_time_diff").filter(pl.col("room_fqid") == c).std().alias(f"{c}_ET_std_{feature_suffix}") for
          c in room_lists],
        *[pl.col("elapsed_time_diff").filter(pl.col("room_fqid") == c).mean().alias(f"{c}_ET_mean_{feature_suffix}") for
          c in room_lists],
        *[pl.col("elapsed_time_diff").filter(pl.col("room_fqid") == c).sum().alias(f"{c}_ET_sum_{feature_suffix}") for
          c in room_lists],
        *[pl.col("elapsed_time_diff").filter(pl.col("room_fqid") == c).median().alias(f"{c}_ET_median_{feature_suffix}")
          for
          c in room_lists],
        *[pl.col("elapsed_time_diff").filter(pl.col("room_fqid") == c).max().alias(f"{c}_ET_max_{feature_suffix}") for
          c in room_lists],
        

        *[pl.col("event_name").filter(pl.col("event_name") == c).count().alias(f"{c}_event_name_counts{feature_suffix}")
          for c in event_name_feature],
        *[pl.col("elapsed_time_diff").filter(pl.col("event_name") == c).std().alias(f"{c}_ET_std_{feature_suffix}") for
          c in event_name_feature],
        *[pl.col("elapsed_time_diff").filter(pl.col("event_name") == c).mean().alias(f"{c}_ET_mean_{feature_suffix}")
          for
          c in event_name_feature],
        *[pl.col("elapsed_time_diff").filter(pl.col("event_name") == c).sum().alias(f"{c}_ET_sum_{feature_suffix}") for
          c in event_name_feature],
        *[pl.col("elapsed_time_diff").filter(pl.col("event_name") == c).median().alias(
            f"{c}_ET_median_{feature_suffix}") for
          c in event_name_feature],
        *[pl.col("elapsed_time_diff").filter(pl.col("event_name") == c).max().alias(f"{c}_ET_max_{feature_suffix}") for
          c in event_name_feature],
        

        *[pl.col("name").filter(pl.col("name") == c).count().alias(f"{c}_name_counts{feature_suffix}") for c in
          name_feature],
        *[pl.col("elapsed_time_diff").filter(pl.col("name") == c).std().alias(f"{c}_ET_std_{feature_suffix}") for c in
          name_feature],
        *[pl.col("elapsed_time_diff").filter(pl.col("name") == c).mean().alias(f"{c}_ET_mean_{feature_suffix}") for c in
          name_feature],
        *[pl.col("elapsed_time_diff").filter(pl.col("name") == c).sum().alias(f"{c}_ET_sum_{feature_suffix}") for c in
          name_feature],
        *[pl.col("elapsed_time_diff").filter(pl.col("name") == c).median().alias(f"{c}_ET_median_{feature_suffix}") for
          c in
          name_feature],
        *[pl.col("elapsed_time_diff").filter(pl.col("name") == c).max().alias(f"{c}_ET_max_{feature_suffix}") for c in
          name_feature],
        

        *[pl.col("level").filter(pl.col("level") == c).count().alias(f"{c}_LEVEL_count{feature_suffix}") for c in
          LEVELS],
        *[pl.col("elapsed_time_diff").filter(pl.col("level") == c).std().alias(f"{c}_ET_std_{feature_suffix}") for c in
          LEVELS],
        *[pl.col("elapsed_time_diff").filter(pl.col("level") == c).mean().alias(f"{c}_ET_mean_{feature_suffix}") for c
          in
          LEVELS],
        *[pl.col("elapsed_time_diff").filter(pl.col("level") == c).sum().alias(f"{c}_ET_sum_{feature_suffix}") for c in
          LEVELS],
        *[pl.col("elapsed_time_diff").filter(pl.col("level") == c).median().alias(f"{c}_ET_median_{feature_suffix}") for
          c in
          LEVELS],
        *[pl.col("elapsed_time_diff").filter(pl.col("level") == c).max().alias(f"{c}_ET_max_{feature_suffix}") for c in
          LEVELS],
        
        *[pl.col("level_group").filter(pl.col("level_group") == c).count().alias(
            f"{c}_LEVEL_group_count{feature_suffix}") for c in
          level_groups],
        *[pl.col("elapsed_time_diff").filter(pl.col("level_group") == c).std().alias(f"{c}_ET_std_{feature_suffix}") for
          c in
          level_groups],
        *[pl.col("elapsed_time_diff").filter(pl.col("level_group") == c).mean().alias(f"{c}_ET_mean_{feature_suffix}")
          for c in
          level_groups],
        *[pl.col("elapsed_time_diff").filter(pl.col("level_group") == c).sum().alias(f"{c}_ET_sum_{feature_suffix}") for
          c in
          level_groups],
        *[pl.col("elapsed_time_diff").filter(pl.col("level_group") == c).median().alias(
            f"{c}_ET_median_{feature_suffix}") for c in
          level_groups],
        *[pl.col("elapsed_time_diff").filter(pl.col("level_group") == c).max().alias(f"{c}_ET_max_{feature_suffix}") for
          c in
          level_groups],
        
    ]

    df = x.groupby(['session_id'], maintain_order=True).agg(aggs).sort("session_id")

    if use_extra:
        if grp =='0-4':
            aggs = [
       
            *[pl.col("elapsed_time").filter((pl.col("event_name") == 'navigate_click') & (pl.col("fqid") == f'{i}')).apply(
                lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"elapsed_time_{i}_fqid_0_4")  for i in fqid_0_4],
            *[pl.col("index").filter((pl.col("event_name") == 'navigate_click') & (pl.col("fqid") == f'{i}')).apply(
                lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"index_{i}_fqid_0_4")  for i in fqid_0_4],                     

            *[pl.col("elapsed_time").filter((pl.col("event_name") == 'navigate_click') & (pl.col("room_fqid") == f'{i}')).apply(
                lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"elapsed_time_{i}_room_fqid_0_4")  for i in room_fqid_0_4],
            *[pl.col("index").filter((pl.col("event_name") == 'navigate_click') & (pl.col("room_fqid") == f'{i}')).apply(
                lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"index_{i}_room_fqid_0_4")  for i in room_fqid_0_4], 
            ]

            tmp = x.groupby(["session_id"], maintain_order=True).agg(aggs).sort("session_id")
            df = df.join(tmp, on="session_id", how='left')          
            
        if grp == '5-12':
            aggs = [
                pl.col("elapsed_time").filter((pl.col("text") == "Here's the log book.") | (pl.col("fqid") == 'logbook.page.bingo'))
                    .apply(lambda s: s.max() - s.min()).alias("logbook_bingo_duration"),
                pl.col("index").filter(
                    (pl.col("text") == "Here's the log book.") | (pl.col("fqid") == 'logbook.page.bingo')).apply(
                    lambda s: s.max() - s.min()).alias("logbook_bingo_indexCount"),
                
                pl.col("elapsed_time").filter(
                    ((pl.col("event_name") == 'navigate_click') & (pl.col("fqid") == 'reader')) | (
                            pl.col("fqid") == "reader.paper2.bingo")).apply(lambda s: s.max() - s.min()).alias(
                    "reader_bingo_duration"),
                pl.col("index").filter(((pl.col("event_name") == 'navigate_click') & (pl.col("fqid") == 'reader')) | (
                        pl.col("fqid") == "reader.paper2.bingo")).apply(lambda s: s.max() - s.min()).alias(
                    "reader_bingo_indexCount"),
                
                pl.col("elapsed_time").filter(
                    ((pl.col("event_name") == 'navigate_click') & (pl.col("fqid") == 'journals')) | (
                            pl.col("fqid") == "journals.pic_2.bingo")).apply(lambda s: s.max() - s.min()).alias(
                    "journals_bingo_duration"),
                pl.col("index").filter(((pl.col("event_name") == 'navigate_click') & (pl.col("fqid") == 'journals')) | (
                        pl.col("fqid") == "journals.pic_2.bingo")).apply(lambda s: s.max() - s.min()).alias(
                    "journals_bingo_indexCount"),
                
                #######################################################################################################
                *[pl.col("elapsed_time").filter((pl.col("event_name") == 'navigate_click') & (pl.col("room_fqid") == f'{i}'))
                .apply(lambda s: s.max() - s.min()  if s.len() > 0 else 0).alias( f"elapsed_time_{i}_navigate_click_room_5_12")  for i in room_navigate_click_fqid_5_13],
                *[pl.col("index").filter((pl.col("event_name") == 'navigate_click') & (pl.col("room_fqid") == f'{i}'))
                .apply(lambda s: s.max() - s.min()  if s.len() > 0 else 0).alias( f"index_{i}_navigate_click_room_5_12")  for i in room_navigate_click_fqid_5_13],
                
                *[pl.col("elapsed_time").filter((pl.col("event_name") == 'person_click') & (pl.col("text") == f'{i}'))
                .apply(lambda s: s.max() - s.min()  if s.len() > 0 else 0).alias( f"elapsed_time_{i}_person_click_text__5_12")  for i in text_5_12],
                *[pl.col("index").filter((pl.col("event_name") == 'person_click') & (pl.col("text") == f'{i}'))
                .apply(lambda s: s.max() - s.min()  if s.len() > 0 else 0).alias( f"index_{i}_person_click_text_5_12")  for i in text_5_12],
                
                *[pl.col("elapsed_time").filter((pl.col("event_name") == 'person_click') & (pl.col("text_fqid") == f'{i}'))
                .apply(lambda s: s.max() - s.min()  if s.len() > 0 else 0).alias( f"elapsed_time_{i}_person_click_text_fqid_5_12")  for i in text_fqid_5_12],
                *[pl.col("index").filter((pl.col("event_name") == 'person_click') & (pl.col("text_fqid") == f'{i}'))
                .apply(lambda s: s.max() - s.min()  if s.len() > 0 else 0).alias( f"index_{i}_person_click_text_fqid_5_12")  for i in text_fqid_5_12],  
                
                *[pl.col("elapsed_time").filter((pl.col("name") == 'basic') & (pl.col("fqid") == f'{i}'))
                .apply(lambda s: s.max() - s.min()  if s.len() > 0 else 0).alias( f"elapsed_time_{i}_basic_fqid_5_12")  for i in name_fqid_5_12],
                *[pl.col("index").filter((pl.col("name") == 'basic') & (pl.col("fqid") == f'{i}'))
                .apply(lambda s: s.max() - s.min()  if s.len() > 0 else 0).alias( f"index_{i}_basic_fqid_5_12")  for i in name_fqid_5_12], 
                
                *[pl.col("elapsed_time").filter((pl.col("name") == 'undefined') & (pl.col("fqid") == f'{i}'))
                .apply(lambda s: s.max() - s.min()  if s.len() > 0 else 0).alias( f"elapsed_time_{i}_undefined_fqid_5_12")  for i in name_undef_5_12],
                *[pl.col("index").filter((pl.col("name") == 'undefined') & (pl.col("fqid") == f'{i}'))
                .apply(lambda s: s.max() - s.min()  if s.len() > 0 else 0).alias( f"index_{i}_undefined_fqid_5_12")  for i in name_undef_5_12], 
                
#                 *[pl.col("elapsed_time").filter((pl.col("room_fqid") == 'tunic.historicalsociety.entry') & (pl.col("fqid") == f'{i}'))
#                 .apply(lambda s: s.max() - s.min()  if s.len() > 0 else 0).alias( f"elapsed_time_{i}_room_1_5_12_fqid_5_12")  for i in room_1_5_12],
#                 *[pl.col("index").filter((pl.col("room_fqid") == 'tunic.historicalsociety.entry') & (pl.col("fqid") == f'{i}'))
#                 .apply(lambda s: s.max() - s.min()  if s.len() > 0 else 0).alias( f"index_{i}_room_1_5_12_fqid_5_12")  for i in room_1_5_12], 
                
#                 *[pl.col("elapsed_time").filter((pl.col("room_fqid") == 'tunic.drycleaner.frontdesk') & (pl.col("fqid") == f'{i}'))
#                 .apply(lambda s: s.max() - s.min()  if s.len() > 0 else 0).alias( f"elapsed_time_{i}_room_2_5_12_fqid_5_12")  for i in room_2_5_12],
#                 *[pl.col("index").filter((pl.col("room_fqid") == 'tunic.drycleaner.frontdesk') & (pl.col("fqid") == f'{i}'))
#                 .apply(lambda s: s.max() - s.min()  if s.len() > 0 else 0).alias( f"index_{i}_room_2_5_12_fqid_5_12")  for i in room_2_5_12], 
                
#                 *[pl.col("elapsed_time").filter((pl.col("room_fqid") == 'tunic.capitol_1.hall') & (pl.col("fqid") == f'{i}'))
#                 .apply(lambda s: s.max() - s.min()  if s.len() > 0 else 0).alias( f"elapsed_time_{i}_room_3_5_12_fqid_5_12")  for i in room_3_5_12],
#                 *[pl.col("index").filter((pl.col("room_fqid") == 'tunic.capitol_1.hall') & (pl.col("fqid") == f'{i}'))
#                 .apply(lambda s: s.max() - s.min()  if s.len() > 0 else 0).alias( f"index_{i}_room_3_5_12_fqid_5_12")  for i in room_3_5_12], 
                
#                 *[pl.col("elapsed_time").filter((pl.col("room_fqid") == 'tunic.historicalsociety.collection') & (pl.col("fqid") == f'{i}'))
#                 .apply(lambda s: s.max() - s.min()  if s.len() > 0 else 0).alias( f"elapsed_time_{i}_room_4_5_12_fqid_5_12")  for i in room_4_5_12],
#                 *[pl.col("index").filter((pl.col("room_fqid") == 'tunic.historicalsociety.collection') & (pl.col("fqid") == f'{i}'))
#                 .apply(lambda s: s.max() - s.min()  if s.len() > 0 else 0).alias( f"index_{i}_room_4_5_12_fqid_5_12")  for i in room_4_5_12],              
                
            ]
            tmp = x.groupby(["session_id"], maintain_order=True).agg(aggs).sort("session_id")
            df = df.join(tmp, on="session_id", how='left')

        if grp == '13-22':
            aggs = [
                pl.col("elapsed_time").filter(
                    ((pl.col("event_name") == 'navigate_click') & (pl.col("fqid") == 'reader_flag')) | (
                            pl.col("fqid") == "tunic.library.microfiche.reader_flag.paper2.bingo")).apply(
                    lambda s: s.max() - s.min() if s.len() > 0 else 0).alias("reader_flag_duration"),
                pl.col("index").filter(
                    ((pl.col("event_name") == 'navigate_click') & (pl.col("fqid") == 'reader_flag')) | (
                            pl.col("fqid") == "tunic.library.microfiche.reader_flag.paper2.bingo")).apply(
                    lambda s: s.max() - s.min() if s.len() > 0 else 0).alias("reader_flag_indexCount"),
                
                pl.col("elapsed_time").filter(
                    ((pl.col("event_name") == 'navigate_click') & (pl.col("fqid") == 'journals_flag')) | (
                            pl.col("fqid") == "journals_flag.pic_0.bingo")).apply(
                    lambda s: s.max() - s.min() if s.len() > 0 else 0).alias("journalsFlag_bingo_duration"),
                pl.col("index").filter(
                    ((pl.col("event_name") == 'navigate_click') & (pl.col("fqid") == 'journals_flag')) | (
                            pl.col("fqid") == "journals_flag.pic_0.bingo")).apply(
                    lambda s: s.max() - s.min() if s.len() > 0 else 0).alias("journalsFlag_bingo_indexCount"),              
                        
                #############################################################################################################
                *[pl.col("elapsed_time").filter((pl.col("event_name") == 'navigate_click') & (pl.col("room_fqid") == f'{i}')).apply(
                    lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"elapsed_time_{i}_room_navigate_click_fqid_13_18")  for i in room_navigate_click_fqid_13_18],
                *[pl.col("index").filter((pl.col("event_name") == 'navigate_click') & (pl.col("room_fqid") == f'{i}')).apply(
                    lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"index_{i}_room_navigate_click_fqid_13_18")  for i in room_navigate_click_fqid_13_18],
                
                *[pl.col("elapsed_time").filter((pl.col("event_name") == 'person_click') & (pl.col("text") == f'{i}')).apply(
                    lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"elapsed_time_{i}_person_click_text_13_18")  for i in text_13_18],
                *[pl.col("index").filter((pl.col("event_name") == 'person_click') & (pl.col("text") == f'{i}')).apply(
                    lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"index_{i}_person_click_text_13_18")  for i in text_13_18],
 
                *[pl.col("elapsed_time").filter((pl.col("event_name") == 'person_click') & (pl.col("text_fqid") == f'{i}')).apply(
                    lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"elapsed_time_{i}_person_click_text_fqid_13_18")  for i in text_fqid_12_18],
                *[pl.col("index").filter((pl.col("event_name") == 'person_click') & (pl.col("text_fqid") == f'{i}')).apply(
                    lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"index_{i}_person_click_text_fqid_13_18")  for i in text_fqid_12_18],                                           
                
                        
                *[pl.col("elapsed_time").filter((pl.col("name") == 'basic') & (pl.col("fqid") == f'{i}')).apply(
                    lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"elapsed_time_{i}_basic_text_13_18")  for i in name_fqid_13_18],
                *[pl.col("index").filter((pl.col("name") == 'basic') & (pl.col("fqid") == f'{i}')).apply(
                    lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"index_{i}_basic_text_13_18")  for i in name_fqid_13_18],
                
                *[pl.col("elapsed_time").filter((pl.col("name") == 'undefined') & (pl.col("fqid") == f'{i}')).apply(
                    lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"elapsed_time_{i}_undefined_text_13_18")  for i in name_undef_13_18],
                *[pl.col("index").filter((pl.col("name") == 'undefined') & (pl.col("fqid") == f'{i}')).apply(
                    lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"index_{i}_undefined_text_13_18")  for i in name_undef_13_18],
                
#                  *[pl.col("elapsed_time").filter((pl.col("room_fqid") == 'tunic.historicalsociety.entry') & (pl.col("fqid") == f'{i}')).apply(
#                     lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"elapsed_time_{i}_room_1_fqid_18_13_18")  for i in room_1_13_18],
#                 *[pl.col("index").filter((pl.col("room_fqid") == 'tunic.historicalsociety.entry') & (pl.col("fqid") == f'{i}')).apply(
#                     lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"index_{i}_room_1_fqid_13_18")  for i in room_1_13_18],
                
#                  *[pl.col("elapsed_time").filter((pl.col("room_fqid") == 'tunic.drycleaner.frontdesk') & (pl.col("fqid") == f'{i}')).apply(
#                     lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"elapsed_time_{i}_room_2_13_18_fqid_18_13_18")  for i in room_2_13_18],
#                 *[pl.col("index").filter((pl.col("room_fqid") == 'tunic.drycleaner.frontdesk') & (pl.col("fqid") == f'{i}')).apply(
#                     lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"index_{i}_room_2_13_18_fqid_13_18")  for i in room_2_13_18], 
                
#                  *[pl.col("elapsed_time").filter((pl.col("room_fqid") == 'tunic.capitol_1.hall') & (pl.col("fqid") == f'{i}')).apply(
#                     lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"elapsed_time_{i}_room_3_13_fqid_18_13_18")  for i in room_3_13_18],
#                 *[pl.col("index").filter((pl.col("room_fqid") == 'tunic.capitol_1.hall') & (pl.col("fqid") == f'{i}')).apply(
#                     lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"index_{i}_room_3_13_18_fqid_13_18")  for i in room_3_13_18], 
                
#                  *[pl.col("elapsed_time").filter((pl.col("room_fqid") == 'tunic.historicalsociety.collection') & (pl.col("fqid") == f'{i}')).apply(
#                     lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"elapsed_time_{i}_room_4_13_fqid_18_13_18")  for i in room_4_13_18],
#                 *[pl.col("index").filter((pl.col("room_fqid") == 'tunic.historicalsociety.collection') & (pl.col("fqid") == f'{i}')).apply(
#                     lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"index_{i}_room_4_13_18_fqid_13_18")  for i in room_4_13_18], 
                
#                  *[pl.col("elapsed_time").filter((pl.col("room_fqid") == 'tunic.historicalsociety.cage') & (pl.col("fqid") == f'{i}')).apply(
#                     lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"elapsed_time_{i}_room_5_13_fqid_18_13_18")  for i in room_5_13_18],
#                 *[pl.col("index").filter((pl.col("room_fqid") == 'tunic.historicalsociety.cage') & (pl.col("fqid") == f'{i}')).apply(
#                     lambda s: s.max() - s.min() if s.len() > 0 else 0 ).alias( f"index_{i}_room_5_13_18_fqid_13_18")  for i in room_5_13_18], 

            ]
        
            tmp = x.groupby(["session_id"], maintain_order=True).agg(aggs).sort("session_id")
            df = df.join(tmp, on="session_id", how='left')

    return df.to_pandas()

In [19]:
room_fqid_0_4 = ['tunic.historicalsociety.closet', 'tunic.kohlcenter.halloffame',
       'tunic.historicalsociety.entry', 'tunic.historicalsociety.stacks',
       'tunic.historicalsociety.collection', 'tunic.capitol_0.hall',
       'tunic.historicalsociety.basement']

fqid_0_4 = ['directory', 'block_tomap2', 'tostacks', 'outtolunch', 'plaque',
       'toentry', 'janitor', 'tunic', 'tocloset', 'retirement_letter',
       'gramps', 'tomap', 'photo', 'wells', 'tocollection',
       'block_tocollection', 'boss', 'chap1_finale', 'notebook',
       'block_0', 'doorblock', 'block_tomap1', 'fqid_None', 'groupconvo',
       'teddy', 'tobasement']

room_5_13_18 = ['tobasement', 'unlockdoor', 'glasses', 'fqid_None', 'teddy',
       'lockeddoor', 'confrontation', 'need_glasses']


room_4_5_12 = ['tunic.hub.slip', 'tunic', 'fqid_None', 'toentry']
room_4_13_18 = ['tunic.hub.slip', 'toentry', 'tunic', 'fqid_None']

room_3_5_12 = ['tunic.drycleaner', 'tunic.library', 'tunic.kohlcenter', 'boss',
       'chap2_finale_c', 'fqid_None', 'tunic.capitol_1', 'toentry',
       'tunic.humanecology', 'tunic.historicalsociety', 'chap2_finale']

room_3_13_18 = ['tunic.library', 'tunic.historicalsociety', 'boss',
       'tunic.drycleaner', 'tunic.kohlcenter', 'tunic.capitol_1',
       'toentry', 'tunic.humanecology', 'fqid_None']

room_2_5_12 = ['logbook', 'tunic.kohlcenter', 'fqid_None', 'logbook.page.bingo',
       'worker', 'tunic.drycleaner', 'tunic.historicalsociety',
       'tunic.capitol_1', 'tunic.library', 'block_1', 'block_0',
       'toentry', 'tunic.humanecology']

room_2_13_18 = ['tunic.humanecology', 'worker', 'tunic.kohlcenter',
       'tunic.flaghouse', 'tunic.library', 'tunic.historicalsociety',
       'logbook', 'tunic.drycleaner', 'toentry', 'tunic.capitol_1',
       'tunic.wildlife', 'fqid_None', 'tunic.capitol_2']


room_1_5_12 = ['tunic.capitol_1', 'tomap', 'directory', 'tobasement',
       'tunic.kohlcenter', 'tocollection', 'tunic.drycleaner',
       'tunic.library', 'fqid_None', 'tunic.humanecology',
       'tunic.historicalsociety', 'tostacks', 'tunic.capitol_0']

room_1_13_18 = ['fqid_None', 'groupconvo_flag', 'tobasement', 'boss',
       'tunic.drycleaner', 'tunic.library', 'tostacks', 'directory',
       'tunic.kohlcenter', 'tunic.capitol_2', 'tomap',
       'tunic.historicalsociety', 'directory.closeup.archivist',
       'tunic.flaghouse', 'tunic.humanecology', 'tocollectionflag',
       'tunic.capitol_1', 'wells', 'tocollection', 'tunic.wildlife']


name_undef_5_12 = ['tofrontdesk', 'block_badge_2', 'tunic.capitol_0',
       'reader.paper2.next', 'businesscards.card_0.next', 'tohallway',
       'tunic', 'block', 'tomicrofiche', 'businesscards.card_1.next',
       'block_0', 'tocloset_dirty', 'tunic.kohlcenter', 'magnify',
       'janitor', 'journals.pic_0.next', 'toentry', 'tunic.capitol_1',
       'gramps', 'outtolunch', 'logbook.page.bingo', 'tunic.drycleaner',
       'chap2_finale_c', 'tunic.historicalsociety', 'reader.paper0.next',
       'reader.paper1.prev', 'reader.paper2.prev', 'trigger_coffee',
       'archivist', 'tunic.hub.slip', 'tunic.library',
       'journals.pic_2.next', 'reader.paper1.next', 'logbook',
       'reader.paper0.prev', 'block_badge', 'block_magnify',
       'door_block_talk', 'fqid_None', 'businesscards.card_bingo.next',
       'boss', 'journals.pic_2.bingo', 'tocollection',
       'tunic.humanecology', 'door_block_clean', 'block_1', 'plaque',
       'businesscards.card_bingo.bingo', 'businesscards', 'tobasement',
       'tostacks', 'journals', 'trigger_scarf', 'directory', 'photo',
       'journals.pic_1.next', 'tomap', 'reader.paper2.bingo',
       'wellsbadge', 'reader', 'chap2_finale', 'journals.hub.topics',
       'worker']

name_undef_13_18 =['tracks', 'reader.paper1.next', 'reader.paper0.prev', 'colorbook',
       'tomap', 'remove_cup', 'reader.paper2.prev', 'tunic.humanecology',
       'seescratches', 'key', 'tunic.library', 'journals',
       'archivist_glasses', 'fox', 'reader_flag.paper1.prev', 'directory',
       'plaque', 'tracks.hub.deer', 'tunic.wildlife',
       'journals.pic_1.next', 'journals_flag.pic_0.next', 'tohallway',
       'journals_flag.pic_1.next', 'groupconvo_flag', 'businesscards',
       'reader.paper1.prev', 'unlockdoor', 'worker',
       'journals_flag.pic_0.bingo', 'reader.paper2.next', 'tobasement',
       'businesscards.card_bingo.next', 'journals_flag.hub.topics',
       'tofrontdesk', 'journals_flag.pic_2.next',
       'journals_flag.pic_1_old.next', 'reader_flag.paper1.next',
       'reader_flag.paper2.next', 'tocloset_dirty', 'expert',
       'tocollectionflag', 'tunic.flaghouse', 'journals.pic_2.next',
       'reader_flag', 'glasses', 'businesscards.card_0.next',
       'reader.paper0.next', 'photo', 'journals_flag.pic_1.bingo',
       'teddy', 'journals.pic_2.bingo', 'reader_flag.paper0.prev',
       'chap4_finale_c', 'tunic.capitol_1', 'tocage', 'boss', 'coffee',
       'wells', 'confrontation', 'tunic.drycleaner', 'need_glasses',
       'outtolunch', 'tocollection', 'logbook',
       'businesscards.card_1.next', 'block_nelson',
       'reader_flag.paper2.prev', 'flag_girl', 'toentry',
       'tunic.hub.slip', 'tomicrofiche', 'journals_flag.pic_2_old.next',
       'crane_ranger', 'journals_flag.pic_0_old.next',
       'tunic.historicalsociety', 'gramps', 'journals_flag',
       'journals.pic_0.next', 'journals_flag.pic_2.bingo',
       'directory.closeup.archivist', 'reader', 'lockeddoor',
       'tunic.capitol_2', 'journals_flag.hub.topics_old',
       'reader_flag.paper0.next', 'journals.hub.topics', 'fqid_None',
       'tunic', 'tunic.kohlcenter', 'tostacks',
       'reader_flag.paper2.bingo']


name_fqid_5_12 = ['archivist', 'photo', 'tunic.library', 'journals.pic_2.next',
       'reader.paper0.next', 'logbook.page.bingo',
       'businesscards.card_bingo.next', 'plaque', 'block_magnify',
       'directory', 'reader.paper1.prev', 'block', 'gramps', 'journals',
       'businesscards.card_1.next', 'tunic.hub.slip', 'fqid_None',
       'journals.pic_2.bingo', 'reader.paper1.next', 'trigger_scarf',
       'tofrontdesk', 'journals.hub.topics', 'journals.pic_0.next',
       'outtolunch', 'tobasement', 'tunic.drycleaner', 'logbook',
       'reader.paper2.bingo', 'door_block_clean', 'worker',
       'journals.pic_1.next', 'tomap', 'tunic.historicalsociety',
       'block_badge_2', 'boss', 'businesscards.card_0.next', 'wellsbadge',
       'toentry', 'reader.paper2.prev', 'what_happened',
       'businesscards.card_bingo.bingo', 'tomicrofiche',
       'tunic.capitol_0', 'tunic', 'chap2_finale_c', 'tostacks',
       'reader.paper0.prev', 'trigger_coffee', 'janitor', 'businesscards',
       'door_block_talk', 'tunic.kohlcenter', 'tunic.humanecology',
       'magnify', 'block_1', 'tunic.capitol_1', 'block_0', 'tohallway',
       'block_badge', 'tocollection', 'reader.paper2.next', 'reader']

name_fqid_13_18 = ['journals_flag.pic_2.next', 'logbook', 'journals_flag.hub.topics',
       'journals_flag.pic_0.next', 'reader.paper2.prev',
       'businesscards.card_1.next', 'reader.paper0.next',
       'reader_flag.paper1.prev', 'journals_flag.pic_1.next',
       'reader_flag.paper2.next', 'gramps', 'reader_flag.paper0.prev',
       'tunic.wildlife', 'reader', 'savedteddy', 'businesscards',
       'archivist_glasses', 'fox', 'businesscards.card_bingo.next',
       'photo', 'reader.paper2.next', 'tunic.capitol_1', 'journals_flag',
       'flag_girl', 'tobasement', 'journals_flag.pic_2.bingo', 'expert',
       'need_glasses', 'reader.paper1.next', 'worker', 'remove_cup',
       'reader_flag.paper1.next', 'journals_flag.pic_1.bingo',
       'tunic.library', 'journals.pic_2.next', 'tofrontdesk',
       'directory.closeup.archivist', 'journals.pic_0.next', 'colorbook',
       'unlockdoor', 'tocollectionflag', 'toentry', 'tunic.hub.slip',
       'tohallway', 'journals.pic_1.next', 'reader.paper1.prev',
       'journals_flag.hub.topics_old', 'journals_flag.pic_2_old.next',
       'lockeddoor', 'tunic.kohlcenter', 'journals_flag.pic_0.bingo',
       'tomap', 'tunic.flaghouse', 'block_nelson', 'tunic.capitol_2',
       'directory', 'confrontation', 'journals.pic_2.bingo',
       'tracks.hub.deer', 'reader_flag.paper0.next', 'glasses',
       'crane_ranger', 'businesscards.card_0.next',
       'reader_flag.paper2.bingo', 'reader_flag.paper2.prev', 'ch3start',
       'teddy', 'journals_flag.pic_1_old.next', 'tunic.historicalsociety',
       'groupconvo_flag', 'coffee', 'journals_flag.pic_0_old.next',
       'reader_flag', 'journals.hub.topics', 'chap4_finale_c',
       'reader.paper0.prev', 'tunic.drycleaner', 'outtolunch', 'key',
       'tostacks', 'tunic.humanecology', 'seescratches', 'wells',
       'fqid_None', 'plaque', 'boss', 'tracks', 'tunic', 'journals']


text_fqid_5_12 = ['tunic.historicalsociety.frontdesk.archivist.have_glass',
       'tunic.historicalsociety.frontdesk.archivist.hello',
       'tunic.drycleaner.frontdesk.worker.hub',
       'tunic.library.frontdesk.worker.hello_short',
       'tunic.historicalsociety.closet_dirty.gramps.archivist',
       'tunic.historicalsociety.frontdesk.archivist.need_glass_0',
       'tunic.historicalsociety.frontdesk.archivist.foundtheodora',
       'tunic.historicalsociety.frontdesk.archivist.newspaper_recap',
       'tunic.library.frontdesk.worker.wells',
       'tunic.drycleaner.frontdesk.worker.done',
       'tunic.capitol_1.hall.boss.haveyougotit',
       'tunic.drycleaner.frontdesk.worker.done2',
       'tunic.historicalsociety.frontdesk.archivist.newspaper',
       'tunic.humanecology.frontdesk.worker.intro',
       'tunic.historicalsociety.closet_dirty.gramps.nothing',
       'tunic.historicalsociety.frontdesk.archivist.need_glass_1',
       'tunic.library.frontdesk.worker.hello',
       'tunic.library.frontdesk.worker.droppedbadge',
       'tunic.historicalsociety.closet_dirty.gramps.news',
       'tunic.library.frontdesk.worker.wells_recap',
       'tunic.capitol_0.hall.boss.talktogramps',
       'tunic.drycleaner.frontdesk.worker.takealook',
       'tunic.historicalsociety.closet_dirty.gramps.helpclean',
       'tunic.humanecology.frontdesk.worker.badger',
       'tunic.library.frontdesk.worker.preflag',
       'tunic.historicalsociety.frontdesk.archivist.have_glass_recap']

text_fqid_12_18 = ['tunic.historicalsociety.frontdesk.archivist.have_glass',
       'tunic.historicalsociety.frontdesk.archivist.hello',
       'tunic.drycleaner.frontdesk.worker.hub',
       'tunic.library.frontdesk.worker.hello_short',
       'tunic.historicalsociety.closet_dirty.gramps.archivist',
       'tunic.historicalsociety.frontdesk.archivist.need_glass_0',
       'tunic.historicalsociety.frontdesk.archivist.foundtheodora',
       'tunic.historicalsociety.frontdesk.archivist.newspaper_recap',
       'tunic.library.frontdesk.worker.wells',
       'tunic.drycleaner.frontdesk.worker.done',
       'tunic.capitol_1.hall.boss.haveyougotit',
       'tunic.drycleaner.frontdesk.worker.done2',
       'tunic.historicalsociety.frontdesk.archivist.newspaper',
       'tunic.humanecology.frontdesk.worker.intro',
       'tunic.historicalsociety.closet_dirty.gramps.nothing',
       'tunic.historicalsociety.frontdesk.archivist.need_glass_1',
       'tunic.library.frontdesk.worker.hello',
       'tunic.library.frontdesk.worker.droppedbadge',
       'tunic.historicalsociety.closet_dirty.gramps.news',
       'tunic.library.frontdesk.worker.wells_recap',
       'tunic.capitol_0.hall.boss.talktogramps',
       'tunic.drycleaner.frontdesk.worker.takealook',
       'tunic.historicalsociety.closet_dirty.gramps.helpclean',
       'tunic.humanecology.frontdesk.worker.badger',
       'tunic.library.frontdesk.worker.preflag',
       'tunic.historicalsociety.frontdesk.archivist.have_glass_recap']

text_5_12 = ["I haven't seen him.", "I can't calm down. This is important!",
       'Can you help me tidy up?',
       "You haven't seen any badgers around here, have you?",
       '*cough cough*', "Why don't you take a look?",
       'Thanks to them, Wisconsin was the first state to approve votes for women!',
       'Where are the Stacks?', 'Well? What are you still doing here?',
       'A little horse!', "Unless you're too busy horsing around.",
       '*grumble grumble*', 'Badgers? No.',
       "And I'll figure out the shirt, too.", 'Hold your horses, Jo.',
       "Did you drop something, Dear? There's a card on the floor.",
       'An old shirt? Try the university.',
       'What are you waiting for? The Stacks are right outside the door.',
       'And you are?', 'Um, are you okay?',
       'Where did you get that coffee?',
       "Please let me know if you do. It's important!",
       "Hmmm... not sure. Why don't you try the library?",
       'Wow! What is all this stuff?', 'Ooh, thanks!', 'Slow down, Jo.',
       'Maybe I can help!', 'Yep.',
       'Ha! What do you call a pony with a sore throat?',
       'Can you help me? I need to find the owner of this slip.',
       'Can you help me-', 'Right outside the door.',
       'I used to have a magnifying glass around here\\u00e2\\u20ac\\u00a6',
       "Guess it couldn't hurt to let you take a look.",
       "Yup, that's him!", 'I ran into Wells there this morning.',
       "Check out our microfiche. It's right through that door.",
       'Hi! *cough*', "Sorry, I'm in a hurry.", 'Wait a minute!',
       'Ugh. Fine.',
       "He's always trying to get you in trouble, and he doesn't like animals!",
       'Yikes... this could take a while.', "Okay. I'll find Teddy!",
       'Who are you?', 'Do you know what this slip is?', 'Are you okay?',
       "I don't have time for kids.", 'Hello there!',
       '*COUGH COUGH COUGH*',
       'Theodora Youmans? Is that who owned the shirt?',
       'I knew I could count on you, Jo!',
       'He was looking for a taxidermist.', 'Two missions, actually!',
       'Thanks.',
       'Head upstairs and talk to the archivist. He might be able to help!',
       'Do you have any info on Theodora Youmans?',
       "Weren't you going to check out our microfiche?",
       'I need to find Wells!!!', "Wait a sec. Women couldn't vote?!",
       'Now I just need to find all the cleaners from wayyyy back in 1916.',
       'What should I do first?', 'Could be. But we need evidence.',
       "I've got a stack of business cards from my favorite cleaners.",
       'What was Wells doing here?',
       '\\Taxidermy: the art of preparing, stuffing, and mounting the skins of animals.\\',
       'Where should I go again?',
       "You're right, Gramps. Let's investigate!", 'Yeah. Thanks anyway.',
       "Who could've done this?", 'Head over to the university.',
       'Have you seen a badger around here?',
       'Your gramps is awesome! Always full of stories.',
       'Go find your grampa and get to work!',
       'Please let me know if you do.', 'Oh, hello there!',
       'They study clothes and fabric.', 'I got that one from my Gramps!',
       "Sorry, can't help you.", 'Theodora Youmans? Of course!',
       'Did you have a question?', 'Can I give you the tour?',
       'Nope. But Youmans and other suffragists worked hard to change that.',
       'Looks like a dry cleaning receipt.',
       'I need to find the owner of this slip.', 'Take a look!',
       'Now I Just need to find all the cleaners from way back in 1916.',
       "It's such a nice fall day.", 'Please?', 'Oh no... Teddy!',
       'Wells? I knew it!',
       "Jolie! I was hoping you'd stop by. Any news on the shirt artifact?",
       'Ha! Good one.', "Here's the log book.",
       'You can talk to a textile expert there.',
       "It must've been Wells.", 'Great! Thanks for the help!',
       "It's for Grampa Leo. He's a historian!",
       "He's wrong about old shirts and his name rhymes with \\smells\\...",
       "What's a textile expert?",
       "Don't worry, Gramps. I'll find Teddy!", 'Then we need evidence.',
       "Fine. Let's investigate!", 'Did you have a question or not?',
       "Oh, that's from Bean Town.", "It's our Norwegian Craft exhibit!",
       'Can you help me? I need to find Wells!', 'Wow!',
       "Calm down, kid. I haven't seen him.",
       "Why don't you go upstairs and see the archivist?",
       "But I hear the museum's got one on the loose!",
       'Not sure. Here, let me look it up.',
       "I'm also looking for Theodora Youmans. Have you heard of her?",
       'Nice seeing you, Jolie!', 'I need your help!',
       'Do you know anything about this slip?',
       "Well, get on it. I'm counting on you and your gramps to figure this out!",
       "Ah, that's better!", 'I found it!',
       'Please? This is really important.', 'Can you help-',
       'So much cleaning to do...',
       "You'll have to get started without me.", 'Yes! I was wondering-',
       'How can I help you?', "Can't believe I lost my reading glasses.",
       "Well, I can't show our log books to just anybody.",
       "I'm afraid not.",
       'She led marches and helped women get the right to vote!',
       'Now if only I could read this thing.',
       'Do you know who Theodora Youmans is?',
       'Theodora wearing the shirt!',
       "I haven't quite figured it out just yet...",
       "You look like you're on a mission.", 'Leo... you mean Leopold?',
       'Knew what?', "You're still here? I'm trying to work!", 'Huh?',
       'Oh my!',
       'You could try the archivist. Maybe he can help you find Wells!',
       "He's our expert record keeper.",
       'You could ask the archivist. He knows everybody!', 'Yes!',
       'Now if only I could read this thing. Blasted tiny letters...',
       "I'm sure you'll find Theodora in there somewhere!",
       'What are you still doing here,  Jolie?',
       "I'm Leopold's grandkid!", 'I have an idea.',
       'Hi! How can I help you?',
       "I'm afraid my papers have gone missing in this mess.",
       "Ha! You're funny.", "Sorry, I'm too busy for kids right now.",
       "Why didn't you say so?", 'But what if Wells kidnapped Teddy?',
       'Can you help me find Wells?', 'You better get to the capitol!',
       "Oh, I'm fine! Just a little hoarse.", 'Thanks for the help!',
       'I need to find Wells right away!! Do you know where he is?',
       'Youmans was a suffragist here in Wisconsin.',
       'I need to find Wells right away! Do you know where he is?',
       "Here's a call number to find more info in the Stacks.",
       'I found it on an old shirt.', "What's a taxidermist?",
       'BUT WELLS STOLE TEDDY!', 'Run along to the university.',
       'Okay. Thanks anyway.']

text_13_18 = ['Wells, meet Teddy.',
       'Oh! There was a staff directory in the entryway!',
       'Notice any clues about this flag?', 'Good luck!',
       'I wonder whose glasses these are.', 'Okay. Thanks!',
       "Yep. I'm a vexillophile!", "It's such a nice fall day.",
       'Welcome back, Dear! How can I help you?', "I'll ride with you!",
       "Oh, trust me. He'll make time.", 'Can you take a look?',
       'What kind of photos do you need?', "What's a vexillophile? ",
       "Fine. Then I guess you don't want a real, live badger for the exhibit.",
       'Ugh! Those cups are all over the place.',
       'There are some old newspapers loaded up in the microfiche.',
       "I'm investigating this flag.", "Actually, he's a badger.",
       'Are you sure? I know where you can find a real, live badger for the exhibit!',
       'Ugh...', '\\u00f0\\u0178\\u008d\\u00a9',
       "Maybe she'll let me take off the cup!",
       "Here's a call number for the Stacks. Go find some photos.",
       'Head back to the museum. Your gramps is waiting for you.',
       'I think I might be able to help you.', "I'm not sure.",
       'Oh yeah, cranes eat insects!',
       'Whoever lost these glasses probably took Teddy!',
       'Ugh. I have to head over there and check out the animals.',
       "Cranes don't eat donuts!", '\\u00f0\\u0178\\u02dc\\u00ad',
       'Nice seeing you, Jolie!', 'The archivist had him locked up!',
       'Do you know what this flag was used for?',
       'I think I can help with your animal problem.',
       "You're becoming quite the detective, Jo.",
       'She should be able to help you out.', 'Wait! What?! Really?',
       'I love it!',
       'I have to head over there and check out the animals.',
       "Come on, kid. You're slowing me down.",
       'Ugh... I think that lynx is looking at me funny.',
       "Hey, I've seen that symbol before! Check it out!",
       "Actually, badgers aren't rodents-",
       'It has something to do with ecology.',
       'Though the archivist might be too busy to help...',
       "Well, get on it. I'm counting on you to figure this out!",
       'Aha! Good catch, Jo.', 'What are you doing here?', 'Teddy!!!',
       'Not sure. Do I look like a deer expert to you?',
       'We need to calm her down, Teddy.',
       'Go on, tell the boss what you found!',
       "You again! Don't let him hurt me!", "So? What'd you find out?",
       "Why don't you go talk to the boss?",
       'Luckily there are tons of insects around here...',
       'Careful. That beak is sharp!', 'A real, live ferret!',
       'You could try the archives.',
       'Do you really think that symbol is a deer hoof?',
       'Does it look like a deer hoof?', 'Yoga does sound nice.',
       'Thanks for your help, kid!',
       "Actually, we're just here for some photos.", 'Good catch!',
       'YOU?!', 'Head over to the Wildlife Center!',
       'The boss is gonna love it!', "It's an ecology flag!",
       'We still need to figure out that flag. Do you know anyone who could help?',
       'Gadzooks! Poor critter.',
       "There's a diagram of animal tracks over there.",
       'Can I help you with anything?', 'Can Teddy and I help?',
       'Great. Just great. Could this day get any worse?!',
       "She's right outside.", 'Yes!!!',
       "Go check the microfiche. Maybe you'll find something!",
       "But cranes can't do yoga, Teddy!",
       'Hey, nice dog! What breed is he?',
       "Don't worry, Teddy won't eat your lunch anymore!",
       'Well... it looks hand-stitched.',
       'There should be some info about that symbol in my book.',
       'It just means flag expert. How can I help?', 'Whatever.',
       "Oh no. If I don't impress the boss soon,  I'm gonna get fired!",
       "I'll be in the collection room. Come find me when you're ready to check out the artifact.",
       'What is it, Teddy?', "Teddy! I'm sure glad to see you.",
       "It's lucky we found her.", "It's for the flag display!",
       "He's a badger!", "I'll go look at everyone's pictures!",
       'Wait a minute...', "Gah. I can't believe this.",
       "Jolie! I was hoping you'd stop by. Any news on the flag artifact?",
       "I can't believe this.", 'Good idea. Thanks!', 'A vexy-wha?',
       "Fine, fine. Let's see...",
       'FINE. That possum better not scratch my leather seats...',
       "Teddy's helping too.",
       'Your flag must have been part of a national movement!',
       'We just have to keep our eyes open!', "Hmm. Let's see...",
       'I need to learn more about this flag!',
       "Looks like it's not a deer hoof.",
       "We're just looking for photos for the flag display.",
       "YOU'RE the new history detective everybody's talking about?",
       'Can I ride with you?', 'What now, kid?', 'Got one!',
       'How can I find out whose glasses these are?',
       "I'm investigating this symbol.", 'Badgers? No.',
       '\\u00f0\\u0178\\u00a6\\u2014', 'Yeah. Thanks anyway.',
       "Teddy! I'm glad to see you.", 'Any ideas?',
       "Come on, kid. Let's go.",
       "You haven't seen any badgers around here, have you?",
       'Okay. Thanks anyway.', "We're looking for some photos.",
       'Something to do with ecology and Wisconsin.',
       "Ha! I don't need your help.",
       'Her beak is stuck in a coffee cup.',
       'My friend is a flag expert.',
       "Check out the archives. They've got tons of old photos!",
       "I haven't quite figured it out just yet...",
       '\\u00f0\\u0178\\u00a7\\u02dc',
       "I need to get her free. She won't hold still!",
       'And this place is dirty, and itchy, and-',
       'The Stacks are right outside the door. Go find some photos!',
       "Just please, don't let your badger eat them!",
       "I'm sure they'll be able to help.",
       "He says he'd be willing to help out.",
       'And we still need to figure out that flag!',
       "Of course you do. You've got a rodent following you around.",
       '\\u00f0\\u0178\\u02dc\\u0160',
       "Don't worry, he won't! (And he's a badger, by the way.)",
       'Not sure.', 'Wow! You figured it out!',
       "But I hear the museum's got one on the loose!", 'Poor foxes!',
       'Poor badger.', "We'll find Teddy.", 'Hey, Wells...',
       "He's. A. Badger.", 'The boss is gonna love it!!!',
       'Oh no... they got sick from polluted water?',
       '\\u00f0\\u0178\\u2122\\u201e',
       'Actually, I went to school with somebody who LOVES old flags.',
       'Besides, you just ate my last snack.', 'Sure! Give it a try.',
       'Hmm. You could try the Aldo Leopold Wildlife Center.',
       "You've got a million flags here!",
       'I found the flag! Governor Nelson used it on the first Earth Day!',
       'Guess so!',
       "Why don't you go talk to her? I'll let her know you're coming.",
       "Hang on. I'll get you out of there!",
       'Now I just need some old photos, like last time.',
       'Do you know where I can find a deer expert?',
       "If I were you, I'd go to the library and do some digging.",
       'Go take a look!',
       'Hmm... those stripes remind me of the American flag.',
       "I'm a historian, not a zookeeper!",
       "No thanks. I don't need help from kids.",
       'You did it! Thanks, kid.',
       "Oh, cool! I've never seen a badger in real life.",
       'Oh no! What happened to that crane?', "Yes!!! I'm saved!"]


room_navigate_click_fqid_5_13 = ['tunic.capitol_0.hall', 'tunic.humanecology.frontdesk',
       'tunic.library.microfiche', 'tunic.kohlcenter.halloffame',
       'tunic.historicalsociety.frontdesk',
       'tunic.historicalsociety.closet_dirty',
       'tunic.historicalsociety.collection', 'tunic.capitol_1.hall',
       'tunic.historicalsociety.stacks',
       'tunic.historicalsociety.basement',
       'tunic.historicalsociety.entry', 'tunic.library.frontdesk',
       'tunic.drycleaner.frontdesk']
room_navigate_click_fqid_13_18 = ['tunic.flaghouse.entry', 'tunic.humanecology.frontdesk',
       'tunic.historicalsociety.stacks', 'tunic.drycleaner.frontdesk',
       'tunic.library.frontdesk', 'tunic.wildlife.center',
       'tunic.historicalsociety.entry', 'tunic.capitol_2.hall',
       'tunic.historicalsociety.frontdesk',
       'tunic.historicalsociety.collection', 'tunic.library.microfiche',
       'tunic.historicalsociety.cage', 'tunic.capitol_1.hall',
       'tunic.historicalsociety.closet_dirty',
       'tunic.kohlcenter.halloffame', 'tunic.historicalsociety.basement',
       'tunic.historicalsociety.collection_flag']


In [20]:
# feature_engineer(df2, grp='5_12', use_extra=True, feature_suffix='')

In [21]:
# feature_engineer(df3, grp='13_18', use_extra=True, feature_suffix='')

In [22]:
print('################# Feature Engineering ####################')
print()

df1 = feature_engineer(df1, grp='0-4', use_extra=True, feature_suffix='')
df1 = time_feature(df1)
print('Train DataFrame 1  ', df1.shape)
df2 = feature_engineer(df2, grp='5-12', use_extra=True, feature_suffix='')
df2 = time_feature(df2)
print('Train DataFrame 2  ', df2.shape)
df3 = feature_engineer(df3, grp='13-22', use_extra=True, feature_suffix='')
df3 = time_feature(df3)
print('Train DataFrame 3  ', df3.shape)

################# Feature Engineering ####################

Train DataFrame 1   (23562, 2292)
Train DataFrame 2   (23562, 2894)
Train DataFrame 3   (23562, 3022)


In [23]:
print('Null/  Unique Drop')
print(df1.shape , df2.shape, df3.shape)
null1 = df1.isnull().sum().sort_values(ascending=False)/len(df1)
null2 = df2.isnull().sum().sort_values(ascending=False)/len(df2)
null3 = df3.isnull().sum().sort_values(ascending=False)/len(df3)

drop1 = list(null1[null1 > 0.9].index)
drop2 = list(null2[null2 > 0.9].index)
drop3 = list(null3[null3 > 0.9].index)

for col in df1.columns:
    if df1[col].nunique() == 1:
#         print(col)
        drop1.append(col)
for col in df2.columns:
    if df2[col].nunique() == 1:
#         print(col)
        drop2.append(col)
for col in df3.columns:
    if df3[col].nunique() == 1:
#         print(col)
        drop3.append(col)  
df1.drop(drop1,axis=1,inplace=True)
df2.drop(drop2,axis=1,inplace=True)
df3.drop(drop3,axis=1,inplace=True)
print('--')
print(df1.shape , df2.shape, df3.shape)

Null/  Unique Drop
(23562, 2292) (23562, 2894) (23562, 3022)
--
(23562, 761) (23562, 1723) (23562, 2014)


In [24]:
print('############## Extract Data ##################')
print()
print('Training Data 1 Quetions from [1-3] shape ', end= '')
df1.to_csv('df1.csv',index=False)
print(df1.shape,'\n')
print('Training Data 2 Quetions from [4-12] shape ', end= '')
df2.to_csv('df2.csv',index=False)
print(df2.shape,'\n')
print('Training Data 3 Quetions from [13-18] shape ', end= '')
df3.to_csv('df3.csv',index=False)
print(df3.shape,'\n')

############## Extract Data ##################

Training Data 1 Quetions from [1-3] shape (23562, 761) 

Training Data 2 Quetions from [4-12] shape (23562, 1723) 

Training Data 3 Quetions from [13-18] shape (23562, 2014) 

