### Setup

In [None]:
import numpy as np
import pandas as pd
import sklearn as sk
import seaborn as sns
import matplotlib.pyplot as plt

from catboost import CatBoostClassifier, Pool
from catboost.eval.catboost_evaluation import *

In [None]:
DROPPABLE_COLUMNS = ["Unnamed: 0",]
TEST_SIZE = 0.20
RANDOM = 44

df = pd.read_csv("./data/superset.csv")
df = (df
    .set_axis([df["Unnamed: 0"]])
    .drop(columns=DROPPABLE_COLUMNS)
)

df.Subgroup = df.Subgroup.replace(
    {cat:num for num, cat in enumerate(df.Subgroup.unique())})

In [None]:
df.to_csv("./data/feature_eval.csv", header=False, index=False)

In [None]:
from catboost.utils import create_cd

In [None]:
feature_names = dict(list(enumerate(df.keys()[1:])))
    
create_cd(
    label=0,
    cat_features=list(range(1, df.shape[1])),
    feature_names=feature_names,
    output_path='./data/train.cd'
)

### Evaluation

In [None]:
fold_size = 50
fold_offset = 0
folds_count = 20
random_seed = 44

train_file = "./data/feature_eval.csv"

learn_params = {'iterations': 2000, 
                'random_seed': random_seed, 
                # 'logging_level': 'Silent',
                'loss_function': 'MultiClass',
                # You could set learning process to GPU
                # 'devices': '1',  
                # 'task_type': 'GPU',
                'boosting_type': 'Plain', 
                # For feature evaluation learning time is important and we need just the relative quality
                'max_ctr_complexity' : 4}

In [None]:
features_to_evaluate = list(feature_names.keys())[1:] # Apparently they dont exclude the column name
description_file = "./data/train.cd"

In [None]:
evaluator = CatboostEvaluation(train_file,
                               fold_size,
                               folds_count,
                               delimiter=',',
                               column_description=description_file,
                               partition_random_seed=random_seed,)

In [None]:
result = evaluator.eval_features(learn_config=learn_params,
                                 eval_metrics=["MultiClass", "Accuracy"],
                                 features_to_eval=features_to_evaluate)

1878:	learn: 0.1450345	total: 3.86s	remaining: 248ms
1879:	learn: 0.1450296	total: 3.86s	remaining: 246ms
1880:	learn: 0.1450158	total: 3.86s	remaining: 244ms
1881:	learn: 0.1450129	total: 3.86s	remaining: 242ms
1882:	learn: 0.1450064	total: 3.86s	remaining: 240ms
1883:	learn: 0.1450000	total: 3.87s	remaining: 238ms
1884:	learn: 0.1449952	total: 3.87s	remaining: 236ms
1885:	learn: 0.1449883	total: 3.87s	remaining: 234ms
1886:	learn: 0.1449834	total: 3.87s	remaining: 232ms
1887:	learn: 0.1449781	total: 3.87s	remaining: 230ms
1888:	learn: 0.1449701	total: 3.87s	remaining: 228ms
1889:	learn: 0.1449679	total: 3.88s	remaining: 226ms
1890:	learn: 0.1449621	total: 3.88s	remaining: 223ms
1891:	learn: 0.1449570	total: 3.88s	remaining: 221ms
1892:	learn: 0.1449425	total: 3.88s	remaining: 219ms
1893:	learn: 0.1449375	total: 3.88s	remaining: 217ms
1894:	learn: 0.1449316	total: 3.88s	remaining: 215ms
1895:	learn: 0.1449248	total: 3.88s	remaining: 213ms
1896:	learn: 0.1449210	total: 3.91s	remaining:

1001:	learn: 0.0490460	total: 2.01s	remaining: 2s
1002:	learn: 0.0490324	total: 2.01s	remaining: 2s
1003:	learn: 0.0487330	total: 2.02s	remaining: 2s
1004:	learn: 0.0487197	total: 2.02s	remaining: 2s
1005:	learn: 0.0486930	total: 2.02s	remaining: 1.99s
1006:	learn: 0.0486783	total: 2.02s	remaining: 1.99s
1007:	learn: 0.0486705	total: 2.02s	remaining: 1.99s
1008:	learn: 0.0486450	total: 2.02s	remaining: 1.99s
1009:	learn: 0.0486080	total: 2.02s	remaining: 1.98s
1010:	learn: 0.0485899	total: 2.03s	remaining: 1.98s
1011:	learn: 0.0485658	total: 2.03s	remaining: 1.98s
1012:	learn: 0.0483130	total: 2.03s	remaining: 1.98s
1013:	learn: 0.0482688	total: 2.03s	remaining: 1.98s
1014:	learn: 0.0482512	total: 2.03s	remaining: 1.97s
1015:	learn: 0.0482361	total: 2.03s	remaining: 1.97s
1016:	learn: 0.0482122	total: 2.04s	remaining: 1.97s
1017:	learn: 0.0481769	total: 2.04s	remaining: 1.97s
1018:	learn: 0.0481568	total: 2.04s	remaining: 1.96s
1019:	learn: 0.0481391	total: 2.04s	remaining: 1.96s
1020:

KernelInterrupted: Execution interrupted by the Jupyter kernel.

KernelInterrupted: Execution interrupted by the Jupyter kernel.

In [None]:
metrics = result.get_metric_results("MultiClass")

In [None]:
metrics.get_baseline_comparison()

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=c915e4f9-60c2-40b5-a522-8a90cb3fd50a' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>