Fixed #297 update tests to check error strings

alteryx · Nov 1, 2018 · fca7ada · fca7ada
1 parent 32cec5e
commit fca7ada
Show file tree

Hide file tree

Showing 12 changed files with 103 additions and 76 deletions.
diff --git a/.gitignore b/.gitignore
@@ -125,3 +125,6 @@ ENV/
 *.pickle
 
 .pytest_cache
+
+#IDE
+.vscode
diff --git a/featuretools/tests/computational_backend/test_calculate_feature_matrix.py b/featuretools/tests/computational_backend/test_calculate_feature_matrix.py
@@ -68,27 +68,30 @@ def test_calc_feature_matrix(entityset):
                                               verbose=True)
 
     assert (feature_matrix == labels).values.all()
-
-    with pytest.raises(AssertionError):
+    error_text = 'features must be a non-empty list of features'
+    with pytest.raises(AssertionError, match=error_text):
         feature_matrix = calculate_feature_matrix('features', entityset, cutoff_time=cutoff_time)
-    with pytest.raises(AssertionError):
+    with pytest.raises(AssertionError, match=error_text):
         feature_matrix = calculate_feature_matrix([], entityset, cutoff_time=cutoff_time)
-    with pytest.raises(AssertionError):
+    with pytest.raises(AssertionError, match=error_text):
         feature_matrix = calculate_feature_matrix([1, 2, 3], entityset, cutoff_time=cutoff_time)
-    with pytest.raises(TypeError):
+    error_text = ".*type object 17"
+    with pytest.raises(TypeError, match=error_text):
         calculate_feature_matrix([property_feature],
                                  entityset,
                                  instance_ids=range(17),
                                  cutoff_time=17)
-    with pytest.raises(TypeError):
+    error_text = 'cutoff_time must be a single value or DataFrame'
+    with pytest.raises(TypeError, match=error_text):
         calculate_feature_matrix([property_feature],
                                  entityset,
                                  instance_ids=range(17),
                                  cutoff_time=times)
     cutoff_times_dup = pd.DataFrame({'time': [pd.datetime(2018, 3, 1),
                                               pd.datetime(2018, 3, 1)],
                                     entityset['log'].index: [1, 1]})
-    with pytest.raises(AssertionError):
+    error_text = 'Duplicated rows in cutoff time dataframe.'
+    with pytest.raises(AssertionError, match=error_text):
         feature_matrix = calculate_feature_matrix([property_feature],
                                                   entityset=entityset,
                                                   cutoff_time=cutoff_times_dup)
@@ -279,8 +282,8 @@ def test_training_window(entityset):
                                               training_window='2 hours')
 
     entityset.add_last_time_indexes()
-
-    with pytest.raises(AssertionError):
+    error_text = 'training window must be an absolute Timedelta'
+    with pytest.raises(AssertionError, match=error_text):
         feature_matrix = calculate_feature_matrix([property_feature],
                                                   entityset,
                                                   cutoff_time=cutoff_time,
@@ -645,8 +648,8 @@ def test_cutoff_time_naming(entityset):
         fm2 = calculate_feature_matrix([dfeat], entityset, cutoff_time=test_cutoff)
 
         assert all((fm1 == fm2.values).values)
-
-    with pytest.raises(AttributeError):
+    error_text = 'Name of the index variable in the target entity or "instance_id" must be present in cutoff_time'
+    with pytest.raises(AttributeError, match=error_text):
         calculate_feature_matrix([dfeat], entityset, cutoff_time=cutoff_df_wrong_index_name)
 
 
@@ -742,13 +745,14 @@ def test_calculating_number_per_chunk():
     singleton = pd.DataFrame({'time': [pd.Timestamp('2011-04-08 10:30:00')],
                               'instance_id': [0]})
     shape = cutoff_df.shape
-    with pytest.raises(ValueError):
+    error_text = "chunk_size must be None, a float between 0 and 1,a positive integer, or the string 'cutoff time'"
+    with pytest.raises(ValueError, match=error_text):
         calc_num_per_chunk(-1, shape)
 
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match=error_text):
         calc_num_per_chunk("test", shape)
 
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match=error_text):
         calc_num_per_chunk(2.5, shape)
 
     with pytest.warns(UserWarning):
@@ -939,7 +943,7 @@ def test_not_enough_memory(self, entityset, monkeypatch):
                             'Client',
                             MockClient)
         # errors if not enough memory for each worker to store the entityset
-        with pytest.raises(ValueError):
+        with pytest.raises(ValueError, match=''):
             create_client_and_cluster(n_jobs=1,
                                       num_tasks=5,
                                       dask_kwargs={},
@@ -961,8 +965,8 @@ def test_parallel_failure_raises_correct_error(entityset):
                  [datetime(2011, 4, 10, 11, 10, i * 3) for i in range(2)])
     cutoff_time = pd.DataFrame({'time': times, 'instance_id': range(17)})
     property_feature = IdentityFeature(entityset['log']['value']) > 10
-
-    with pytest.raises(AssertionError):
+    error_text = 'Need at least one worker'
+    with pytest.raises(AssertionError, match=error_text):
         calculate_feature_matrix([property_feature],
                                  entityset=entityset,
                                  cutoff_time=cutoff_time,
@@ -984,7 +988,8 @@ def test_n_jobs(entityset):
     assert n_jobs_to_workers((cpus + 1) * -1) == 1
     if cpus > 1:
         assert n_jobs_to_workers(-2) == cpus - 1
-    with pytest.raises(AssertionError):
+    error_text = 'Need at least one worker'
+    with pytest.raises(AssertionError, match=error_text):
         n_jobs_to_workers(0)
 
 
@@ -1009,8 +1014,8 @@ def test_integer_time_index_datetime_cutoffs(int_es):
     times = [datetime.now()] * 17
     cutoff_df = pd.DataFrame({'time': times, 'instance_id': range(17)})
     property_feature = IdentityFeature(int_es['log']['value']) > 10
-
-    with pytest.raises(TypeError):
+    error_text = "Cannot compare type.*"
+    with pytest.raises(TypeError, match=error_text):
         calculate_feature_matrix([property_feature],
                                  int_es,
                                  cutoff_time=cutoff_df,
@@ -1044,22 +1049,22 @@ def test_integer_time_index_mixed_cutoff(int_es):
                               'labels': labels})
     cutoff_df = cutoff_df[['time', 'instance_id', 'labels']]
     property_feature = IdentityFeature(int_es['log']['value']) > 10
-
-    with pytest.raises(TypeError):
+    error_text = 'cutoff_time times must be.*try casting via.*'
+    with pytest.raises(TypeError, match=error_text):
         calculate_feature_matrix([property_feature],
                                  int_es,
                                  cutoff_time=cutoff_df)
 
     times_str = list(range(8, 17)) + ["foobar", 19, 20, 21, 22, 25, 24, 23]
     cutoff_df['time'] = times_str
-    with pytest.raises(TypeError):
+    with pytest.raises(TypeError, match=error_text):
         calculate_feature_matrix([property_feature],
                                  int_es,
                                  cutoff_time=cutoff_df)
 
     times_date_str = list(range(8, 17)) + ['2018-04-02', 19, 20, 21, 22, 25, 24, 23]
     cutoff_df['time'] = times_date_str
-    with pytest.raises(TypeError):
+    with pytest.raises(TypeError, match=error_text):
         calculate_feature_matrix([property_feature],
                                  int_es,
                                  cutoff_time=cutoff_df)
@@ -1069,7 +1074,7 @@ def test_integer_time_index_mixed_cutoff(int_es):
     times_int_str = list(range(8, 17)) + ['17', 19, 20, 21, 22, 25, 24, 23]
     cutoff_df['time'] = times_int_str
     # calculate_feature_matrix should convert time column to ints successfully here
-    with pytest.raises(TypeError):
+    with pytest.raises(TypeError, match=error_text):
         calculate_feature_matrix([property_feature],
                                  int_es,
                                  cutoff_time=cutoff_df)
@@ -1089,28 +1094,28 @@ def test_datetime_index_mixed_cutoff(entityset):
                               'labels': labels})
     cutoff_df = cutoff_df[['time', 'instance_id', 'labels']]
     property_feature = IdentityFeature(entityset['log']['value']) > 10
-
-    with pytest.raises(TypeError):
+    error_text = 'cutoff_time times must be.*try casting via.*'
+    with pytest.raises(TypeError, match=error_text):
         calculate_feature_matrix([property_feature],
                                  entityset,
                                  cutoff_time=cutoff_df)
 
     times[9] = "foobar"
     cutoff_df['time'] = times
-    with pytest.raises(TypeError):
+    with pytest.raises(TypeError, match=error_text):
         calculate_feature_matrix([property_feature],
                                  entityset,
                                  cutoff_time=cutoff_df)
 
     cutoff_df['time'].iloc[9] = '2018-04-02 18:50:45.453216'
-    with pytest.raises(TypeError):
+    with pytest.raises(TypeError, match=error_text):
         calculate_feature_matrix([property_feature],
                                  entityset,
                                  cutoff_time=cutoff_df)
 
     times[9] = '17'
     cutoff_df['time'] = times
-    with pytest.raises(TypeError):
+    with pytest.raises(TypeError, match=error_text):
         calculate_feature_matrix([property_feature],
                                  entityset,
                                  cutoff_time=cutoff_df)
@@ -1120,8 +1125,8 @@ def test_string_time_values_in_cutoff_time(entityset):
     times = ['2011-04-09 10:31:27', '2011-04-09 10:30:18']
     cutoff_time = pd.DataFrame({'time': times, 'instance_id': [0, 0]})
     agg_feature = Sum(entityset['log']['value'], entityset['customers'])
-
-    with pytest.raises(TypeError):
+    error_text = 'cutoff_time times must be.*try casting via.*'
+    with pytest.raises(TypeError, match=error_text):
         calculate_feature_matrix([agg_feature], entityset, cutoff_time=cutoff_time)
 
 

diff --git a/featuretools/tests/computational_backend/test_encode_features.py b/featuretools/tests/computational_backend/test_encode_features.py
@@ -109,8 +109,8 @@ def test_encode_features_catches_features_mismatch(entityset):
     feature_matrix = calculate_feature_matrix(features, entityset, cutoff_time)
 
     assert 'label' in feature_matrix.columns
-
-    with pytest.raises(AssertionError):
+    error_text = 'Feature session_id not found in feature matrix'
+    with pytest.raises(AssertionError, match=error_text):
         encode_features(feature_matrix, [f1, f3])
 
 

diff --git a/featuretools/tests/dfs_tests/test_deep_feature_synthesis.py b/featuretools/tests/dfs_tests/test_deep_feature_synthesis.py
@@ -126,7 +126,8 @@ def find_other_agg_features(features):
 
 
 def test_ignores_entities(es):
-    with pytest.raises(TypeError):
+    error_text = 'ignore_entities must be a list'
+    with pytest.raises(TypeError, match=error_text):
         DeepFeatureSynthesis(target_entity_id='sessions',
                              entityset=es,
                              agg_primitives=[Last],

diff --git a/featuretools/tests/entityset_tests/test_entity.py b/featuretools/tests/entityset_tests/test_entity.py
@@ -16,7 +16,8 @@ def es():
 
 def test_enforces_variable_id_is_str(es):
     assert variable_types.Categorical("1", es["customers"])
-    with pytest.raises(AssertionError):
+    error_text = 'Variable id must be a string'
+    with pytest.raises(AssertionError, match=error_text):
         variable_types.Categorical(1, es["customers"])
 
 
@@ -66,11 +67,11 @@ def test_eq(es):
 def test_update_data(es):
     df = es['customers'].df.copy()
     df['new'] = [1, 2, 3]
-
-    with pytest.raises(ValueError) as excinfo:
+    error_text = 'Updated dataframe is missing new cohort column'
+    with pytest.raises(ValueError, match=error_text) as excinfo:
         es['customers'].update_data(df.drop(columns=['cohort']))
     assert 'Updated dataframe is missing new cohort column' in str(excinfo)
-
-    with pytest.raises(ValueError) as excinfo:
+    error_text = 'Updated dataframe contains 13 columns, expecting 12'
+    with pytest.raises(ValueError, match=error_text) as excinfo:
         es['customers'].update_data(df)
     assert 'Updated dataframe contains 13 columns, expecting 12' in str(excinfo)