-
Notifications
You must be signed in to change notification settings - Fork 397
/
Copy pathtest_feature_names.py
96 lines (87 loc) · 3.65 KB
/
test_feature_names.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"""Tests for the feature names of the encoders."""
from unittest import TestCase
import category_encoders as encoders
import numpy as np
import pandas as pd
import sklearn
from numpy.testing import assert_array_equal
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
import tests.helpers as th
__author__ = 'JaimeArboleda'
# data definitions
X = th.create_dataset(n_rows=100)
cat_columns = ['categorical', 'na_categorical']
num_columns = ['float']
X = X[cat_columns + num_columns]
np_y = np.random.randn(X.shape[0]) > 0.5
y = pd.DataFrame(np_y)
class TestEncodersFeaturesOut(TestCase):
"""Tests for the feature names of the encoders."""
def test_feature_names_out(self):
"""Test the feature names out of the encoders."""
for encoder_name in encoders.__all__:
if sklearn.__version__ < '1.2.0':
continue
else:
sklearn.set_config(transform_output='pandas')
with self.subTest(encoder_name=encoder_name):
encoder = getattr(encoders, encoder_name)()
X_t = encoder.fit_transform(X, y)
categorical_preprocessor_start = Pipeline(
steps=[('encoder', getattr(encoders, encoder_name)())]
)
categorical_preprocessor_middle = Pipeline(
steps=[
(
'imputation_constant',
SimpleImputer(fill_value='missing', strategy='constant'),
),
('encoder', getattr(encoders, encoder_name)()),
]
)
numerical_preprocessor = Pipeline(
steps=[
('imputation_constant', SimpleImputer(fill_value=0, strategy='constant'))
]
)
preprocessor = ColumnTransformer(
[
(
'categorical_prep_start',
categorical_preprocessor_start,
['categorical', 'na_categorical'],
),
(
'categorical_prep_middle',
categorical_preprocessor_middle,
['categorical', 'na_categorical'],
),
('numerical_prep', numerical_preprocessor, ['float']),
]
)
X_tt = preprocessor.fit_transform(X, y)
assert_array_equal(np.array(X_t.columns), encoder.get_feature_names_out())
assert_array_equal(np.array(X_tt.columns), preprocessor.get_feature_names_out())
assert_array_equal(
np.array([c for c in X_t.columns if c not in num_columns]),
np.array(
[
c[len('categorical_prep_start__') :]
for c in X_tt.columns
if 'categorical_prep_start' in c
]
),
)
assert_array_equal(
np.array([c for c in X_t.columns if c not in num_columns]),
np.array(
[
c[len('categorical_prep_middle__') :]
for c in X_tt.columns
if 'categorical_prep_middle' in c
]
),
)
sklearn.set_config(transform_output='default')