In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

import os
import gc
import pickle

from IPython.core.debugger import set_trace

from tqdm import tqdm
from sklearn import preprocessing

import lightgbm
import xgboost
from sklearn.ensemble import RandomForestClassifier 
from sklearn.ensemble import AdaBoostClassifier 
from sklearn.linear_model import LogisticRegression


import catboost
import random
random.seed(20)

# Install tsflex and seglearn
!pip install tsflex --no-index --find-links=file:///kaggle/input/tsflex
!pip install seglearn --no-index --find-links=file:///kaggle/input/segalearn


from seglearn.feature_functions import base_features, emg_features

from tsflex.features import FeatureCollection, MultipleFeatureDescriptors
from tsflex.features.integrations import seglearn_feature_dict_wrapper

Looking in links: file:///kaggle/input/tsflex
Processing /kaggle/input/tsflex/tsflex-0.3.0-py3-none-any.whl
Installing collected packages: tsflex
Successfully installed tsflex-0.3.0
[0mLooking in links: file:///kaggle/input/segalearn
Processing /kaggle/input/segalearn/seglearn-1.2.5-py3-none-any.whl
Installing collected packages: seglearn
Successfully installed seglearn-1.2.5
[0m

In [2]:
all_feats = list(pd.read_pickle("/kaggle/input/make-data-v9-tsflex-td/train_td.pkl").columns)
all_feats.extend(pd.read_pickle("/kaggle/input/make-data-v9-td/train_td.pkl").columns)

# those feats must not be in the black list.
for c in ['target','file','id','Subject','Valid', 'Task','target','file',]:
    if c in all_feats:
        all_feats.remove(c)

In [3]:
def get_black_white_list(all_feats, models):
    return_lists = {}
    white_list = set([])
    for model_name in models: 
        model = models[model_name]['model']
        if "xgboost" in model_name :
            model_cols = model.get_booster().feature_names

        elif "lgbm" in model_name:
            model_cols = model.feature_name_

        elif "rf" in model_name or "adaboost" in model_name: 
            model_cols =  model.feature_names_in_
        else:
            model_cols = model.feature_names_

        white_list = white_list.union(set(model_cols)).union(["Subject", "Visit","id"])
    return_lists["white_list"] = white_list
    return_lists["black_list"] = [c for c in all_feats if c not in white_list]
    return return_lists

In [4]:
catboost_tsflex_model = pickle.load(open("/kaggle/input/make-final-model-full-catboost-v9-tsflex/model_dict_catboost_full.pkl", "rb"))
xgboost_tsflex_model = pickle.load(open("/kaggle/input/make-final-model-full-xgboost-v9-tsflex/model_dict_xgboost_full.pkl", "rb"))
lgbm_tsflex_model = pickle.load(open("/kaggle/input/make-final-model-full-lgbm-v9-tsflex/model_dict_lgbm_full.pkl", "rb"))
rf_tsflex_model = pickle.load(open("/kaggle/input/make-final-model-full-rf-v9-tsflex/model_dict_rf_full.pkl", "rb"))
adaboost_tsflex_model = pickle.load(open("/kaggle/input/make-final-model-full-adaboost-v9-tsflex/model_dict_adaboost_full.pkl", "rb"))

catboost_model = pickle.load(open("/kaggle/input/make-final-model-full-catboost-v9/model_dict_catboost_full.pkl", "rb"))
xgboost_model = pickle.load(open("/kaggle/input/make-final-model-full-xgboost-v9/model_dict_xgboost_full.pkl", "rb"))
lgbm_model = pickle.load(open("/kaggle/input/make-final-model-full-lgbm-v9/model_dict_lgbm_full.pkl", "rb"))
rf_model = pickle.load(open("/kaggle/input/make-final-model-full-rf-v9/model_dict_rf_full.pkl", "rb"))
adaboost_model = pickle.load(open("/kaggle/input/make-final-model-full-adaboost-v9/model_dict_adaboost_full.pkl", "rb"))

lists = get_black_white_list(all_feats, {
                            "xgboost_tsflex": xgboost_tsflex_model,
                             "lgbm_tsflex": lgbm_tsflex_model,
                            "rf_tsflex": rf_tsflex_model, 
                             "catboost_tsflex": catboost_tsflex_model,
                             "adaboost_tsflex":adaboost_tsflex_model,
                             "catboost": catboost_model,
                             "xgboost": xgboost_model, "lgbm" :lgbm_model,
                             "rf": rf_model,"adaboost": adaboost_model
                                })

black_list = lists["black_list"]
white_list = lists["white_list"]

In [5]:
pickle.dump(black_list, open("black_list.pkl","wb"))
pickle.dump(white_list, open("white_list.pkl","wb"))