-
Notifications
You must be signed in to change notification settings - Fork 83
/
cat_imputer.py
121 lines (109 loc) · 5.18 KB
/
cat_imputer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# Copyright 2020 IBM Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import autoai_libs.transformers.exportable
import lale.docstrings
import lale.operators
import numpy as np
class CatImputerImpl():
def __init__(self, strategy, missing_values, sklearn_version_family, activate_flag):
self._hyperparams = {
'strategy': strategy,
'missing_values': missing_values,
'sklearn_version_family': sklearn_version_family,
'activate_flag': activate_flag}
self._wrapped_model = autoai_libs.transformers.exportable.CatImputer(**self._hyperparams)
def fit(self, X, y=None):
self._wrapped_model.fit(X, y)
return self
def transform(self, X):
return self._wrapped_model.transform(X)
_hyperparams_schema = {
'allOf': [{
'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.',
'type': 'object',
'additionalProperties': False,
'required': ['strategy', 'missing_values', 'sklearn_version_family', 'activate_flag'],
'relevantToOptimizer': ['strategy'],
'properties': {
'strategy': {
'description': 'The imputation strategy.',
'anyOf': [
{ 'enum': ['mean'],
'description': 'Replace using the mean along each column. Can only be used with numeric data.'},
{ 'enum': ['median'],
'description': 'Replace using the median along each column. Can only be used with numeric data.'},
{ 'enum': ['most_frequent'],
'description': 'Replace using most frequent value each column. Used with strings or numeric data.'},
{ 'enum': ['constant'],
'description': 'Replace with fill_value. Can be used with strings or numeric data.'}],
'default': 'mean'},
'missing_values': {
'description': 'The placeholder for the missing values. All occurrences of missing_values will be imputed.',
'anyOf': [
{ 'type': 'number'},
{ 'type': 'string'},
{ 'enum': [np.nan]},
{ 'enum': [None]}],
'default': np.nan},
'sklearn_version_family': {
'description': 'The sklearn version for backward compatibiity with versions 019 and 020dev. Currently unused.',
'enum': ['20', None],
'default': None},
'activate_flag': {
'description': 'If False, transform(X) outputs the input numpy array X unmodified.',
'type': 'boolean',
'default': True}}}]}
_input_fit_schema = {
'type': 'object',
'required': ['X'],
'additionalProperties': False,
'properties': {
'X': {#Handles 1-D arrays as well
'anyOf': [
{'type': 'array', 'items': {'laleType': 'Any'}},
{'type': 'array', 'items': {'type': 'array', 'items': {'laleType': 'Any'}}}]},
'y': {
'laleType': 'Any'}}}
_input_transform_schema = {
'type': 'object',
'required': ['X'],
'additionalProperties': False,
'properties': {
'X': {#Handles 1-D arrays as well
'anyOf': [
{'type': 'array', 'items': {'laleType': 'Any'}},
{'type': 'array', 'items': {'type': 'array', 'items': {'laleType': 'Any'}}}]}}}
_output_transform_schema = {
'description': 'Features; the outer array is over samples.',
'anyOf': [
{'type': 'array', 'items': {'laleType': 'Any'}},
{'type': 'array', 'items': {'type': 'array', 'items': {'laleType': 'Any'}}}]}
_combined_schemas = {
'$schema': 'http://json-schema.org/draft-04/schema#',
'description': """Operator from `autoai_libs`_. Missing value imputation for categorical features, currently internally uses the sklearn SimpleImputer_.
.. _`autoai_libs`: https://pypi.org/project/autoai-libs
.. _SimpleImputer: https://scikit-learn.org/0.20/modules/generated/sklearn.impute.SimpleImputer.html#sklearn-impute-simpleimputer""",
'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.autoai_libs.cat_imputer.html',
'type': 'object',
'tags': {
'pre': [],
'op': ['transformer'],
'post': []},
'properties': {
'hyperparams': _hyperparams_schema,
'input_fit': _input_fit_schema,
'input_transform': _input_transform_schema,
'output_transform': _output_transform_schema}}
lale.docstrings.set_docstrings(CatImputerImpl, _combined_schemas)
CatImputer = lale.operators.make_operator(CatImputerImpl, _combined_schemas)