alteryx · bschreck · May 9, 2018 · Mar 27, 2018 · Apr 3, 2018 · Apr 16, 2018
diff --git a/docs/source/guides/deployment.rst b/docs/source/guides/deployment.rst
@@ -41,11 +41,11 @@ Now, we can use :meth:`featuretools.save_features` to save a list features.
 Calculating Feature Matrix for New Data
 ***************************************
 
-We can use :meth:`featuretools.load_features` to read in a list of saved features for our new entity set.
+We can use :meth:`featuretools.load_features` to read in a list of saved features to calculate for our new entity set.
 
 .. ipython:: python
 
-    saved_features = ft.load_features('feature_definitions', es_test)
+    saved_features = ft.load_features('feature_definitions')
 
 .. ipython:: python
     :suppress:

diff --git a/featuretools/computational_backends/calculate_feature_matrix.py b/featuretools/computational_backends/calculate_feature_matrix.py
@@ -28,8 +28,8 @@
 logger = logging.getLogger('featuretools.computational_backend')
 
 
-def calculate_feature_matrix(features, cutoff_time=None, instance_ids=None,
-                             entities=None, relationships=None, entityset=None,
+def calculate_feature_matrix(features, entityset=None, cutoff_time=None, instance_ids=None,
+                             entities=None, relationships=None,
                              cutoff_time_in_index=False,
                              training_window=None, approximate=None,
                              save_progress=None, verbose=False,
@@ -40,6 +40,9 @@ def calculate_feature_matrix(features, cutoff_time=None, instance_ids=None,
     Args:
         features (list[PrimitiveBase]): Feature definitions to be calculated.
 
+        entityset (EntitySet): An already initialized entityset. Required if `entities` and `relationships`
+            not provided
+
         cutoff_time (pd.DataFrame or Datetime): Specifies at which time to calculate
             the features for each instance.  Can either be a DataFrame with
             'instance_id' and 'time' columns, DataFrame with the name of the
@@ -58,9 +61,6 @@ def calculate_feature_matrix(features, cutoff_time=None, instance_ids=None,
             between entities. List items are a tuple with the format
             (parent entity id, parent variable, child entity id, child variable).
 
-        entityset (EntitySet): An already initialized entityset. Required if
-            entities and relationships are not defined.
-
         cutoff_time_in_index (bool): If True, return a DataFrame with a MultiIndex
             where the second index is the cutoff time (first is instance id).
             DataFrame will be sorted by (time, instance_id).
@@ -100,12 +100,7 @@ def calculate_feature_matrix(features, cutoff_time=None, instance_ids=None,
         if entities is not None and relationships is not None:
             entityset = EntitySet("entityset", entities, relationships)
 
-    if entityset is not None:
-        for f in features:
-            f.entityset = entityset
-
-    entityset = features[0].entityset
-    target_entity = features[0].entity
+    target_entity = entityset[features[0].entity.id]
     pass_columns = []
 
     if not isinstance(cutoff_time, pd.DataFrame):
@@ -462,6 +457,7 @@ def approximate_features(features, cutoff_time, window, entityset, backend,
 
         cutoff_time_to_pass.drop_duplicates(inplace=True)
         approx_fm = calculate_feature_matrix(approx_features,
+                                             entityset,
                                              cutoff_time=cutoff_time_to_pass,
                                              training_window=training_window,
                                              approximate=None,

diff --git a/featuretools/entityset/base_entity.py b/featuretools/entityset/base_entity.py
@@ -1,12 +1,11 @@
 from __future__ import print_function
 
 import logging
-from builtins import map
 
-import pandas as pd
 from past.builtins import basestring
 
 from featuretools import variable_types as vtypes
+from featuretools.utils.wrangle import _dataframes_equal
 
 logger = logging.getLogger('featuretools.entityset')
 
@@ -105,24 +104,22 @@ def shape(self):
         return self.get_shape()
 
     def __eq__(self, other, deep=False):
-        if not deep:
-            if isinstance(other, self.__class__):
-                return self.id == other.id
+        if self.index != other.index:
             return False
-        else:
-            if self.index != other.index:
-                return False
-            if self.time_index != other.time_index:
-                return False
-            if self.secondary_time_index != other.secondary_time_index:
-                return False
-            if len(self.variables) != len(other.variables):
+        if self.time_index != other.time_index:
+            return False
+        if self.secondary_time_index != other.secondary_time_index:
+            return False
+        if len(self.variables) != len(other.variables):
+            return False
+        for v in self.variables:
+            if v not in other.variables:
                 return False
-            for v in self.variables:
-                if v not in other.variables:
-                    return False
+        if deep:
             if self.indexed_by is None and other.indexed_by is not None:
                 return False
+            elif self.indexed_by is not None and other.indexed_by is None:
+                return False
             else:
                 for v, index_map in self.indexed_by.items():
                     if v not in other.indexed_by:
@@ -136,7 +133,18 @@ def __eq__(self, other, deep=False):
                         # checked for equality, but don't care about the order.
                         if not set(related) == set(other.indexed_by[v][i]):
                             return False
-            return True
+            if self.last_time_index is None and other.last_time_index is not None:
+                return False
+            elif self.last_time_index is not None and other.last_time_index is None:
+                return False
+            elif self.last_time_index is not None and other.last_time_index is not None:
+                if not self.last_time_index.equals(other.last_time_index):
+                    return False
+
+            if not _dataframes_equal(self.df, other.df):
+                return False
+
+        return True
 
     def __hash__(self):
         return id(self.id)
@@ -177,30 +185,6 @@ def show_instance(self, instance_ids):
     def get_shape():
         raise NotImplementedError()
 
-    def head(self, n=10, cutoff_time=None):
-        """See first n instance in entity
-
-        Args:
-            n (int) : Number of instances to return.
-
-        Returns:
-            :class:`pd.DataFrame` : Pandas DataFrame
-
-
-        """
-        if cutoff_time is None:
-            df = self.entityset.head(self.id, n=n)
-        else:
-            from featuretools.computational_backends.calculate_feature_matrix import calculate_feature_matrix
-            from featuretools.features import Feature
-
-            row = list(map(Feature, self.variables))
-            instance_ids = self.entityset.get_top_n_instances(self.id, n)
-            cutoff_time = pd.DataFrame({'instance_id': instance_ids})
-            cutoff_time['time'] = cutoff_time
-            df = calculate_feature_matrix(row, cutoff_time=cutoff_time)
-        return df
-
     @property
     def variable_types(self):
         return {v.id: type(v) for v in self.variables}

diff --git a/featuretools/entityset/base_entityset.py b/featuretools/entityset/base_entityset.py
@@ -54,22 +54,21 @@ def __init__(self, id, verbose):
         self.time_type = None
 
     def __eq__(self, other, deep=False):
-        if not deep:
-            if isinstance(other, type(self)):
-                return self.id == other.id
-            return False
         if len(self.entity_stores) != len(other.entity_stores):
             return False
         for eid, e in self.entity_stores.items():
             if eid not in other.entity_stores:
                 return False
-            if not e.__eq__(other[eid], deep=True):
+            if not e.__eq__(other[eid], deep=deep):
                 return False
-        for r in self.relationships:
+        for r in other.relationships:
             if r not in other.relationships:
                 return False
         return True
 
+    def __ne__(self, other, deep=False):
+        return not self.__eq__(other, deep=deep)
+
     def __getitem__(self, entity_id):
         """Get entity instance from entityset
 
@@ -183,7 +182,6 @@ def add_relationship(self, relationship):
         # _operations?
 
         # this is a new pair of entities
-        self.relationships.append(relationship)
         child_e = relationship.child_entity
         child_v = relationship.child_variable.id
         parent_e = relationship.parent_entity
@@ -197,6 +195,7 @@ def add_relationship(self, relationship):
                                            new_type=vtypes.Index,
                                            convert_data=False)
 
+        self.relationships.append(relationship)
         self.index_data(relationship)
         return self
 

diff --git a/featuretools/entityset/entity.py b/featuretools/entityset/entity.py
@@ -57,17 +57,46 @@ def __init__(self, id, df, entityset, variable_types=None, name=None,
 
         """
         assert len(df.columns) == len(set(df.columns)), "Duplicate column names"
-        self.df = df
+        self.data = {"df": df,
+                     "last_time_index": last_time_index,
+                     "indexed_by": {}
+                     }
         self.encoding = encoding
-        self.indexed_by = {}
         self._verbose = verbose
         self.created_index = created_index
         self.convert_variable_types(variable_types)
         self.attempt_cast_index_to_int(index)
-        self.last_time_index = last_time_index
         super(Entity, self).__init__(id, entityset, variable_types, name, index,
                                      time_index, secondary_time_index, relationships, already_sorted)
 
+    @property
+    def is_metadata(self):
+        return self.entityset.is_metadata
+
+    @property
+    def df(self):
+        return self.data["df"]
+
+    @df.setter
+    def df(self, _df):
+        self.data["df"] = _df
+
+    @property
+    def last_time_index(self):
+        return self.data["last_time_index"]
+
+    @last_time_index.setter
+    def last_time_index(self, lti):
+        self.data["last_time_index"] = lti
+
+    @property
+    def indexed_by(self):
+        return self.data["indexed_by"]
+
+    @indexed_by.setter
+    def indexed_by(self, idx):
+        self.data["indexed_by"] = idx
+
     def attempt_cast_index_to_int(self, index_var):
         dtype_name = self.df[index_var].dtype.name
         if (dtype_name.find('int') == -1 and
@@ -118,41 +147,6 @@ def is_index_column(self, varname):
 
         return False
 
-    def head(self, n=10, cutoff_time=None):
-        """See first n instance in entity
-
-        Args:
-            n (int) : Number of instances to return.
-            cutoff_time (pd.Timestamp,pd.DataFrame) : Timestamp(s) to restrict rows.
-
-        Returns:
-            :class:`pd.DataFrame` : A Pandas DataFrame.
-
-        """
-
-        if cutoff_time is None:
-            valid_data = self.df
-
-        elif isinstance(cutoff_time, pd.Timestamp) or \
-                isinstance(cutoff_time, datetime):
-            valid_data = self.df[self.df[self.time_index] < cutoff_time]
-
-        elif isinstance(cutoff_time, pd.DataFrame):
-
-            instance_ids, time = list(cutoff_time)
-
-            # TODO filtering the top n during "isin" would be more efficient
-            valid_data = self.df[
-                self.df[self.index].isin(cutoff_time[instance_ids])]
-            valid_data = valid_data[
-                valid_data[self.time_index] < cutoff_time[time]]
-
-        else:
-            raise ValueError(
-                'cutoff_time must be None, a Datetime, a pd.Timestamp, or a pd.DataFrame')
-
-        return valid_data.head(n)
-
     def get_column_type(self, column_id):
         """ get type of column in underlying data structure """
         return self.df[column_id].dtype.name
@@ -573,9 +567,6 @@ def set_secondary_time_index(self, secondary_time_index):
 
         super(Entity, self).set_secondary_time_index(secondary_time_index)
 
-    def set_last_time_index(self, last_time_index):
-        self.last_time_index = last_time_index
-
     def _vals_to_series(self, instance_vals, variable_id):
         """
         instance_vals may be a pd.Dataframe, a pd.Series, a list, a single