Skip to content

Commit

Permalink
Fixed #297 update tests to check error strings
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeff Hernandez committed Nov 1, 2018
1 parent 32cec5e commit fca7ada
Show file tree
Hide file tree
Showing 12 changed files with 103 additions and 76 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,6 @@ ENV/
*.pickle

.pytest_cache

#IDE
.vscode
Original file line number Diff line number Diff line change
Expand Up @@ -68,27 +68,30 @@ def test_calc_feature_matrix(entityset):
verbose=True)

assert (feature_matrix == labels).values.all()

with pytest.raises(AssertionError):
error_text = 'features must be a non-empty list of features'
with pytest.raises(AssertionError, match=error_text):
feature_matrix = calculate_feature_matrix('features', entityset, cutoff_time=cutoff_time)
with pytest.raises(AssertionError):
with pytest.raises(AssertionError, match=error_text):
feature_matrix = calculate_feature_matrix([], entityset, cutoff_time=cutoff_time)
with pytest.raises(AssertionError):
with pytest.raises(AssertionError, match=error_text):
feature_matrix = calculate_feature_matrix([1, 2, 3], entityset, cutoff_time=cutoff_time)
with pytest.raises(TypeError):
error_text = ".*type object 17"
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
entityset,
instance_ids=range(17),
cutoff_time=17)
with pytest.raises(TypeError):
error_text = 'cutoff_time must be a single value or DataFrame'
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
entityset,
instance_ids=range(17),
cutoff_time=times)
cutoff_times_dup = pd.DataFrame({'time': [pd.datetime(2018, 3, 1),
pd.datetime(2018, 3, 1)],
entityset['log'].index: [1, 1]})
with pytest.raises(AssertionError):
error_text = 'Duplicated rows in cutoff time dataframe.'
with pytest.raises(AssertionError, match=error_text):
feature_matrix = calculate_feature_matrix([property_feature],
entityset=entityset,
cutoff_time=cutoff_times_dup)
Expand Down Expand Up @@ -279,8 +282,8 @@ def test_training_window(entityset):
training_window='2 hours')

entityset.add_last_time_indexes()

with pytest.raises(AssertionError):
error_text = 'training window must be an absolute Timedelta'
with pytest.raises(AssertionError, match=error_text):
feature_matrix = calculate_feature_matrix([property_feature],
entityset,
cutoff_time=cutoff_time,
Expand Down Expand Up @@ -645,8 +648,8 @@ def test_cutoff_time_naming(entityset):
fm2 = calculate_feature_matrix([dfeat], entityset, cutoff_time=test_cutoff)

assert all((fm1 == fm2.values).values)

with pytest.raises(AttributeError):
error_text = 'Name of the index variable in the target entity or "instance_id" must be present in cutoff_time'
with pytest.raises(AttributeError, match=error_text):
calculate_feature_matrix([dfeat], entityset, cutoff_time=cutoff_df_wrong_index_name)


Expand Down Expand Up @@ -742,13 +745,14 @@ def test_calculating_number_per_chunk():
singleton = pd.DataFrame({'time': [pd.Timestamp('2011-04-08 10:30:00')],
'instance_id': [0]})
shape = cutoff_df.shape
with pytest.raises(ValueError):
error_text = "chunk_size must be None, a float between 0 and 1,a positive integer, or the string 'cutoff time'"
with pytest.raises(ValueError, match=error_text):
calc_num_per_chunk(-1, shape)

with pytest.raises(ValueError):
with pytest.raises(ValueError, match=error_text):
calc_num_per_chunk("test", shape)

with pytest.raises(ValueError):
with pytest.raises(ValueError, match=error_text):
calc_num_per_chunk(2.5, shape)

with pytest.warns(UserWarning):
Expand Down Expand Up @@ -939,7 +943,7 @@ def test_not_enough_memory(self, entityset, monkeypatch):
'Client',
MockClient)
# errors if not enough memory for each worker to store the entityset
with pytest.raises(ValueError):
with pytest.raises(ValueError, match=''):
create_client_and_cluster(n_jobs=1,
num_tasks=5,
dask_kwargs={},
Expand All @@ -961,8 +965,8 @@ def test_parallel_failure_raises_correct_error(entityset):
[datetime(2011, 4, 10, 11, 10, i * 3) for i in range(2)])
cutoff_time = pd.DataFrame({'time': times, 'instance_id': range(17)})
property_feature = IdentityFeature(entityset['log']['value']) > 10

with pytest.raises(AssertionError):
error_text = 'Need at least one worker'
with pytest.raises(AssertionError, match=error_text):
calculate_feature_matrix([property_feature],
entityset=entityset,
cutoff_time=cutoff_time,
Expand All @@ -984,7 +988,8 @@ def test_n_jobs(entityset):
assert n_jobs_to_workers((cpus + 1) * -1) == 1
if cpus > 1:
assert n_jobs_to_workers(-2) == cpus - 1
with pytest.raises(AssertionError):
error_text = 'Need at least one worker'
with pytest.raises(AssertionError, match=error_text):
n_jobs_to_workers(0)


Expand All @@ -1009,8 +1014,8 @@ def test_integer_time_index_datetime_cutoffs(int_es):
times = [datetime.now()] * 17
cutoff_df = pd.DataFrame({'time': times, 'instance_id': range(17)})
property_feature = IdentityFeature(int_es['log']['value']) > 10

with pytest.raises(TypeError):
error_text = "Cannot compare type.*"
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
int_es,
cutoff_time=cutoff_df,
Expand Down Expand Up @@ -1044,22 +1049,22 @@ def test_integer_time_index_mixed_cutoff(int_es):
'labels': labels})
cutoff_df = cutoff_df[['time', 'instance_id', 'labels']]
property_feature = IdentityFeature(int_es['log']['value']) > 10

with pytest.raises(TypeError):
error_text = 'cutoff_time times must be.*try casting via.*'
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
int_es,
cutoff_time=cutoff_df)

times_str = list(range(8, 17)) + ["foobar", 19, 20, 21, 22, 25, 24, 23]
cutoff_df['time'] = times_str
with pytest.raises(TypeError):
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
int_es,
cutoff_time=cutoff_df)

times_date_str = list(range(8, 17)) + ['2018-04-02', 19, 20, 21, 22, 25, 24, 23]
cutoff_df['time'] = times_date_str
with pytest.raises(TypeError):
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
int_es,
cutoff_time=cutoff_df)
Expand All @@ -1069,7 +1074,7 @@ def test_integer_time_index_mixed_cutoff(int_es):
times_int_str = list(range(8, 17)) + ['17', 19, 20, 21, 22, 25, 24, 23]
cutoff_df['time'] = times_int_str
# calculate_feature_matrix should convert time column to ints successfully here
with pytest.raises(TypeError):
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
int_es,
cutoff_time=cutoff_df)
Expand All @@ -1089,28 +1094,28 @@ def test_datetime_index_mixed_cutoff(entityset):
'labels': labels})
cutoff_df = cutoff_df[['time', 'instance_id', 'labels']]
property_feature = IdentityFeature(entityset['log']['value']) > 10

with pytest.raises(TypeError):
error_text = 'cutoff_time times must be.*try casting via.*'
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
entityset,
cutoff_time=cutoff_df)

times[9] = "foobar"
cutoff_df['time'] = times
with pytest.raises(TypeError):
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
entityset,
cutoff_time=cutoff_df)

cutoff_df['time'].iloc[9] = '2018-04-02 18:50:45.453216'
with pytest.raises(TypeError):
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
entityset,
cutoff_time=cutoff_df)

times[9] = '17'
cutoff_df['time'] = times
with pytest.raises(TypeError):
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([property_feature],
entityset,
cutoff_time=cutoff_df)
Expand All @@ -1120,8 +1125,8 @@ def test_string_time_values_in_cutoff_time(entityset):
times = ['2011-04-09 10:31:27', '2011-04-09 10:30:18']
cutoff_time = pd.DataFrame({'time': times, 'instance_id': [0, 0]})
agg_feature = Sum(entityset['log']['value'], entityset['customers'])

with pytest.raises(TypeError):
error_text = 'cutoff_time times must be.*try casting via.*'
with pytest.raises(TypeError, match=error_text):
calculate_feature_matrix([agg_feature], entityset, cutoff_time=cutoff_time)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ def test_encode_features_catches_features_mismatch(entityset):
feature_matrix = calculate_feature_matrix(features, entityset, cutoff_time)

assert 'label' in feature_matrix.columns

with pytest.raises(AssertionError):
error_text = 'Feature session_id not found in feature matrix'
with pytest.raises(AssertionError, match=error_text):
encode_features(feature_matrix, [f1, f3])


Expand Down
3 changes: 2 additions & 1 deletion featuretools/tests/dfs_tests/test_deep_feature_synthesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ def find_other_agg_features(features):


def test_ignores_entities(es):
with pytest.raises(TypeError):
error_text = 'ignore_entities must be a list'
with pytest.raises(TypeError, match=error_text):
DeepFeatureSynthesis(target_entity_id='sessions',
entityset=es,
agg_primitives=[Last],
Expand Down
11 changes: 6 additions & 5 deletions featuretools/tests/entityset_tests/test_entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ def es():

def test_enforces_variable_id_is_str(es):
assert variable_types.Categorical("1", es["customers"])
with pytest.raises(AssertionError):
error_text = 'Variable id must be a string'
with pytest.raises(AssertionError, match=error_text):
variable_types.Categorical(1, es["customers"])


Expand Down Expand Up @@ -66,11 +67,11 @@ def test_eq(es):
def test_update_data(es):
df = es['customers'].df.copy()
df['new'] = [1, 2, 3]

with pytest.raises(ValueError) as excinfo:
error_text = 'Updated dataframe is missing new cohort column'
with pytest.raises(ValueError, match=error_text) as excinfo:
es['customers'].update_data(df.drop(columns=['cohort']))
assert 'Updated dataframe is missing new cohort column' in str(excinfo)

with pytest.raises(ValueError) as excinfo:
error_text = 'Updated dataframe contains 13 columns, expecting 12'
with pytest.raises(ValueError, match=error_text) as excinfo:
es['customers'].update_data(df)
assert 'Updated dataframe contains 13 columns, expecting 12' in str(excinfo)
Loading

0 comments on commit fca7ada

Please sign in to comment.