Skip to content

Commit

Permalink
* merge conflict fallout with test helpers
Browse files Browse the repository at this point in the history
  • Loading branch information
Taylor Miller committed Nov 10, 2017
1 parent f58edde commit 1b86e59
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 24 deletions.
6 changes: 4 additions & 2 deletions healthcareai/tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@ def fixture(file):

def assertBetween(self, minimum, maximum, value):
"""Fail if value is not between min and max (inclusive)."""
self.assertGreaterEqual(value, minimum)
self.assertLessEqual(value, maximum)
test_case = unittest.TestCase()

test_case.assertGreaterEqual(value, minimum)
test_case.assertLessEqual(value, maximum)


def generate_known_numeric(length):
Expand Down
46 changes: 24 additions & 22 deletions healthcareai/tests/test_dataframe_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
import random

import healthcareai.common.transformers as transformers
import healthcareai.tests.helpers as hcaihelpers
from healthcareai.tests.helpers import generate_known_numeric, \
assert_dataframes_identical, convert_all_columns_to_uint8, \
assert_series_equal


class TestDataframeImputer(unittest.TestCase):
Expand All @@ -24,7 +26,7 @@ def setUp(self):
'binary': np.random.choice(['a', 'b'], row_count, p=[.90, .1]),
'alphabet': self.alphabet,
'numeric': random.sample(range(0, row_count), row_count),
'known_numeric': hcaihelpers.generate_known_numeric(row_count),
'known_numeric': generate_known_numeric(row_count),
'color': self.generate_known_color(row_count)
})
self.numeric_mean = self.train_df['numeric'].mean()
Expand Down Expand Up @@ -73,7 +75,7 @@ def test_filler_generator_numeric_mean_and_nan_categorical(self):
# Drop columns with unknown distributions
result.drop(['id', 'alphabet'], inplace=True)

hcaihelpers.assert_series_equal(expected, result)
assert_series_equal(expected, result)

def test_false_returns_unmodified(self):
"""Assure that no imputation occurs."""
Expand All @@ -94,7 +96,7 @@ def test_false_returns_unmodified(self):
imputer = transformers.DataFrameImputer(impute=False).fit(df)
result = imputer.transform(df)

hcaihelpers.assert_dataframes_identical(expected, result)
assert_dataframes_identical(expected, result)

def test_converts_object_to_category(self):
df = pd.DataFrame({
Expand All @@ -108,7 +110,7 @@ def test_converts_object_to_category(self):

result = transformers.DataFrameImputer().fit_transform(df)

hcaihelpers.assert_dataframes_identical(expected, result)
assert_dataframes_identical(expected, result)

def test_columns_to_impute(self):
result = self.imputer._columns_to_impute(self.train_df)
Expand Down Expand Up @@ -149,7 +151,7 @@ def test_single_exclusion_as_string(self):
result = imputer.fit_transform(df)

self.assertFalse(result['num1'].isnull().values.any())
_assert_dataframes_identical(expected, result)
assert_dataframes_identical(expected, result)

def test_single_exclusion_as_list(self):
df = pd.DataFrame({
Expand All @@ -170,7 +172,7 @@ def test_single_exclusion_as_list(self):
result = imputer.fit_transform(df)

self.assertFalse(result['num1'].isnull().values.any())
_assert_dataframes_identical(expected, result)
assert_dataframes_identical(expected, result)

def test_removes_nans(self):
"""This should remove numeric NaNs, and not categoricals ones."""
Expand All @@ -192,7 +194,7 @@ def test_removes_nans(self):
self.assertFalse(result['num1'].isnull().values.any())
self.assertFalse(result['num2'].isnull().values.any())

hcaihelpers.assert_dataframes_identical(expected, result)
assert_dataframes_identical(expected, result)

def test_removes_nones(self):
"""This should remove numeric Nones, and not categorical ones."""
Expand All @@ -215,7 +217,7 @@ def test_removes_nones(self):
self.assertFalse(result['num1'].isnull().values.any())
self.assertFalse(result['num2'].isnull().values.any())

hcaihelpers.assert_dataframes_identical(expected, result)
assert_dataframes_identical(expected, result)

def test_get_unseen_factors(self):
"""binary column is as expected and alphabet column has new levels."""
Expand Down Expand Up @@ -357,7 +359,7 @@ def test_remembers_all_unrepresented_categories(self):
# imputer = transformers.DataFrameImputer().fit(prediction_df)
result = self.imputer.transform(prediction_df)

hcaihelpers.assert_dataframes_identical(expected, result)
assert_dataframes_identical(expected, result)


class TestDataFrameConvertTargetToBinary(unittest.TestCase):
Expand All @@ -371,7 +373,7 @@ def test_does_nothing_on_regression(self):

result = transformers.DataFrameConvertTargetToBinary('regression', 'string_outcome').fit_transform(expected)

hcaihelpers.assert_dataframes_identical(expected, result)
assert_dataframes_identical(expected, result)

def test_converts_y_n_for_classification(self):
df = pd.DataFrame({
Expand All @@ -390,7 +392,7 @@ def test_converts_y_n_for_classification(self):

result = transformers.DataFrameConvertTargetToBinary('classification', 'string_outcome').fit_transform(df)

hcaihelpers.assert_dataframes_identical(expected, result)
assert_dataframes_identical(expected, result)


class TestDataFrameCreateDummyVariables(unittest.TestCase):
Expand Down Expand Up @@ -459,7 +461,7 @@ def test_binary_object_and_category(self):

result = dummifier.transform(df)

hcaihelpers.assert_dataframes_identical(expected, result)
assert_dataframes_identical(expected, result)

def test_trinary_object_and_category(self):
df = pd.DataFrame({
Expand Down Expand Up @@ -491,7 +493,7 @@ def test_trinary_object_and_category(self):
result = transformers.DataFrameCreateDummyVariables(
'id').fit_transform(df)

hcaihelpers.assert_dataframes_identical(expected, result)
assert_dataframes_identical(expected, result)

def test_remembers_unrepresented_categories(self):
# TODO this is broken due to a pandas bug
Expand Down Expand Up @@ -529,7 +531,7 @@ def test_remembers_unrepresented_categories(self):
# trained = transformers.DataFrameCreateDummyVariables('id').fit(self.train_df)
# result = trained.transform(prediction_df)
#
# hcaihelpers._assert_dataframes_identical(expected, result)
# _assert_dataframes_identical(expected, result)

def test_none_represented(self):
prediction_df = pd.DataFrame({
Expand Down Expand Up @@ -572,10 +574,10 @@ def test_none_represented(self):
'numeric': [1, 2, 1],
})

expected = hcaihelpers.convert_all_columns_to_uint8(expected, ['id', 'numeric'])
expected = convert_all_columns_to_uint8(expected, ['id', 'numeric'])
result = self.dummifier.transform(prediction_df)

hcaihelpers.assert_dataframes_identical(expected, result)
assert_dataframes_identical(expected, result)


class TestDataFrameConvertColumnToNumeric(unittest.TestCase):
Expand All @@ -592,7 +594,7 @@ def test_integer_strings(self):
})

result = transformers.DataFrameConvertColumnToNumeric('integer_strings').fit_transform(df)
hcaihelpers.assert_dataframes_identical(expected, result)
assert_dataframes_identical(expected, result)

def test_integer(self):
df = pd.DataFrame({
Expand All @@ -606,7 +608,7 @@ def test_integer(self):

result = transformers.DataFrameConvertColumnToNumeric('numeric').fit_transform(df)

hcaihelpers.assert_dataframes_identical(expected, result)
assert_dataframes_identical(expected, result)


class TestDataframeUnderSampler(unittest.TestCase):
Expand Down Expand Up @@ -695,7 +697,7 @@ def test_removes_nan_rows(self):
'c': [3, 4, 5, None, None],
'd': [None, 8, 1, 3, None],
'label': ['Y', 'N', 'Y', 'N', None]})
hcaihelpers.assert_dataframes_identical(expected, result)
assert_dataframes_identical(expected, result)


class TestFeatureScaling(unittest.TestCase):
Expand Down Expand Up @@ -725,10 +727,10 @@ def runTest(self):
feature_scaling = transformers.DataFrameFeatureScaling()
df_final = feature_scaling.fit_transform(self.df).round(5)

hcaihelpers.assert_dataframes_identical(expected.round(5), df_final)
assert_dataframes_identical(expected.round(5), df_final)

df_reused = transformers.DataFrameFeatureScaling(reuse=feature_scaling).fit_transform(self.df_repeat).round(5)
hcaihelpers.assert_dataframes_identical(expected.round(5), df_reused)
assert_dataframes_identical(expected.round(5), df_reused)


if __name__ == '__main__':
Expand Down

0 comments on commit 1b86e59

Please sign in to comment.