Skip to content

Commit

Permalink
Removed EntitySet._import_from_dataframe
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeff Hernandez committed Dec 12, 2018
1 parent 9ed8ca9 commit cd07481
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 63 deletions.
82 changes: 24 additions & 58 deletions featuretools/entityset/entityset.py
Expand Up @@ -636,6 +636,7 @@ def entity_from_dataframe(self,
make_index=False,
time_index=None,
secondary_time_index=None,
last_time_index=None,
already_sorted=False):
"""
Load the data for a specified entity from a Pandas DataFrame.
Expand Down Expand Up @@ -690,12 +691,21 @@ def entity_from_dataframe(self,
es["transactions"].df
"""
return self._import_from_dataframe(entity_id, dataframe.copy(), index=index,
make_index=make_index,
time_index=time_index,
secondary_time_index=secondary_time_index,
variable_types=variable_types,
already_sorted=already_sorted)
variable_types = variable_types or {}
entity = Entity(
entity_id,
dataframe,
self,
variable_types=variable_types,
index=index,
time_index=time_index,
secondary_time_index=secondary_time_index,
last_time_index=last_time_index,
already_sorted=already_sorted,
make_index=make_index)
self.entity_dict[entity.id] = entity
self.reset_metadata()
return self

def normalize_entity(self, base_entity_id, new_entity_id, index,
additional_variables=None, copy_variables=None,
Expand Down Expand Up @@ -824,12 +834,14 @@ def normalize_entity(self, base_entity_id, new_entity_id, index,
ti_cols = [c if c != old_ti_name else secondary_time_index for c in ti_cols]
make_secondary_time_index = {secondary_time_index: ti_cols}

self._import_from_dataframe(new_entity_id, new_entity_df,
index,
time_index=new_entity_time_index,
secondary_time_index=make_secondary_time_index,
last_time_index=None,
variable_types=transfer_types)
self.entity_from_dataframe(
new_entity_id,
new_entity_df,
index,
time_index=new_entity_time_index,
secondary_time_index=make_secondary_time_index,
last_time_index=None,
variable_types=transfer_types)

for v in additional_variables:
self.entity_dict[base_entity_id].delete_variable(v)
Expand Down Expand Up @@ -1083,52 +1095,6 @@ def related_instances(self, start_entity_id, final_entity_id,
# Private methods ######################################################
###########################################################################

def _import_from_dataframe(self,
entity_id,
dataframe,
index=None,
variable_types=None,
make_index=False,
time_index=None,
secondary_time_index=None,
last_time_index=None,
already_sorted=False):
"""
Load the data for a specified entity from a pandas dataframe.
Args:
entity_id (str) : Unique id to associate with this entity.
dataframe (pd.DataFrame) : Pandas dataframe containing the data.
index (str, optional): Name of the variable used to index the entity.
If None, take the first column.
variable_types (dict[str -> dict[str -> type]]) : Optional mapping of
entity_id to variable_types dict with which to initialize an
entity's store.
make_index (bool, optional) : If True, assume index does not exist as a column in
dataframe, and create a new column of that name using integers the (0, len(dataframe)).
Otherwise, assume index exists in dataframe.
time_index (str, optional) : Name of column to use as a time index for this entity. Must be
a Datetime or Numeric dtype.
secondary_time_index (str, optional): Name of variable containing
time data to use a second time index for the entity.
already_sorted (bool, optional) : If True, assumes that input dataframe is already sorted by time.
Defaults to False.
"""
variable_types = variable_types or {}
entity = Entity(entity_id,
dataframe,
self,
variable_types=variable_types,
index=index,
time_index=time_index,
secondary_time_index=secondary_time_index,
last_time_index=last_time_index,
already_sorted=already_sorted,
make_index=make_index)
self.entity_dict[entity.id] = entity
self.reset_metadata()
return self

def _add_multigenerational_link_vars(self, frames, start_entity_id,
end_entity_id=None, path=None):
"""
Expand Down
16 changes: 11 additions & 5 deletions featuretools/tests/entityset_tests/test_es.py
Expand Up @@ -322,9 +322,12 @@ def test_converts_datetime():
'time': variable_types.Datetime}

entityset = EntitySet(id='test')
entityset._import_from_dataframe(entity_id='test_entity', index='id',
time_index="time", variable_types=vtypes,
dataframe=df)
entityset.entity_from_dataframe(
entity_id='test_entity',
index='id',
time_index="time",
variable_types=vtypes,
dataframe=df)
pd_col = entityset['test_entity'].df['time']
# assert type(entityset['test_entity']['time']) == variable_types.Datetime
assert type(pd_col[0]) == pd.Timestamp
Expand All @@ -343,8 +346,11 @@ def test_handles_datetime_format():
'time_no_format': variable_types.Datetime}

entityset = EntitySet(id='test')
entityset._import_from_dataframe(entity_id='test_entity', index='id',
variable_types=vtypes, dataframe=df)
entityset.entity_from_dataframe(
entity_id='test_entity',
index='id',
variable_types=vtypes,
dataframe=df)

col_format = entityset['test_entity'].df['time_format']
col_no_format = entityset['test_entity'].df['time_no_format']
Expand Down

0 comments on commit cd07481

Please sign in to comment.