Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle return type when creating features from Id variables #318

Merged
merged 7 commits into from Nov 27, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
15 changes: 13 additions & 2 deletions featuretools/primitives/primitive_base.py
Expand Up @@ -12,8 +12,10 @@
_check_timedelta
)
from featuretools.variable_types import (
Categorical,
Datetime,
DatetimeTimeIndex,
Id,
Numeric,
NumericTimeIndex,
Variable
Expand Down Expand Up @@ -100,12 +102,14 @@ def entity(self):
# P TODO: this should get refactored to return_type
@property
def variable_type(self):
from . import direct_feature
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we move this import up to the top?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

direct_feature imports this file, so we have to include this here to avoid the circular reference.

in #326, the directory structure changes things around and actually moves DirectFeature into this file, so we'll likely be able to update this when that PR gets merged in.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

makes sense

feature = self
return_type = self.return_type

while return_type is None:
feature = feature.base_features[0]
return_type = feature.return_type
# get return type of first base feature
base_feature = feature.base_features[0]
return_type = base_feature.return_type

# only the original time index should exist
# so make this feature's return type just a Datetime
Expand All @@ -114,6 +118,13 @@ def variable_type(self):
elif return_type == NumericTimeIndex:
return_type = Numeric

# direct features should keep the Id return type, but all other features should get
# converted to Categorical
if not isinstance(feature, direct_feature.DirectFeature) and return_type == Id:
return_type = Categorical

feature = base_feature

return return_type

@property
Expand Down
16 changes: 15 additions & 1 deletion featuretools/tests/primitive_tests/test_primitive_base.py
Expand Up @@ -4,7 +4,7 @@
from ..testing_utils import make_ecommerce_entityset

from featuretools.primitives import Feature, IdentityFeature, Last, Mode, Sum
from featuretools.variable_types import Datetime, Numeric
from featuretools.variable_types import Categorical, Datetime, Id, Numeric


@pytest.fixture(scope='module')
Expand Down Expand Up @@ -106,3 +106,17 @@ def test_return_type_inference_datetime_time_index(es):
def test_return_type_inference_numeric_time_index(es_numeric):
last = Last(es_numeric["log"]["datetime"], es_numeric["customers"])
assert last.variable_type == Numeric


def test_return_type_inference_id(es):
# direct features should keep Id variable type
direct_id_feature = Feature(es["sessions"]["customer_id"], es["log"])
assert direct_id_feature.variable_type == Id

# aggregations of Id variable types should get converted
mode = Mode(es["log"]["session_id"], es["customers"])
assert mode.variable_type == Categorical

# also test direct feature of aggregation
mode_direct = Feature(mode, es["sessions"])
assert mode_direct.variable_type == Categorical