New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
bug-1885978: remove raw crash from indexing code #6560
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -73,8 +73,6 @@ | |
}, | ||
} | ||
|
||
SAMPLE_RAW_CRASH = {"ProductName": "Firefox", "ReleaseChannel": "nightly"} | ||
|
||
REMOVED_VALUE = object() | ||
|
||
|
||
|
@@ -103,13 +101,12 @@ def build_crashstorage(self): | |
|
||
def test_index_crash(self, es_helper): | ||
"""Test indexing a crash document.""" | ||
raw_crash = deepcopy(SAMPLE_RAW_CRASH) | ||
processed_crash = deepcopy(SAMPLE_PROCESSED_CRASH) | ||
processed_crash["date_processed"] = date_to_string(utc_now()) | ||
|
||
crashstorage = self.build_crashstorage() | ||
crashstorage.save_processed_crash( | ||
raw_crash=raw_crash, | ||
raw_crash={}, | ||
processed_crash=processed_crash, | ||
) | ||
|
||
|
@@ -121,9 +118,6 @@ def test_index_crash(self, es_helper): | |
|
||
def test_index_crash_indexable_keys(self, es_helper): | ||
"""Test indexing ONLY indexes valid, known keys.""" | ||
raw_crash = { | ||
"InvalidKey": "alpha", | ||
} | ||
processed_crash = { | ||
"another_invalid_key": "alpha", | ||
"date_processed": date_to_string(utc_now()), | ||
|
@@ -133,7 +127,7 @@ def test_index_crash_indexable_keys(self, es_helper): | |
|
||
crashstorage = self.build_crashstorage() | ||
crashstorage.save_processed_crash( | ||
raw_crash=raw_crash, | ||
raw_crash={}, | ||
processed_crash=processed_crash, | ||
) | ||
|
||
|
@@ -143,11 +137,8 @@ def test_index_crash_indexable_keys(self, es_helper): | |
id=processed_crash["uuid"], | ||
) | ||
|
||
# Verify keys that aren't in super_search_fields aren't in the raw or processed | ||
# crash parts | ||
raw_crash = doc["_source"]["raw_crash"] | ||
assert list(sorted(raw_crash.keys())) == [] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One less "sort an empty list" in the codebase. 🎉 |
||
|
||
# Verify keys that aren't in super_search_fields aren't in the the final | ||
# document | ||
processed_crash = doc["_source"]["processed_crash"] | ||
assert list(sorted(processed_crash.keys())) == [ | ||
"date_processed", | ||
|
@@ -188,33 +179,27 @@ def test_index_crash_mapping_keys(self, es_helper): | |
|
||
# Create a crash for this week and save it | ||
now_uuid = create_new_ooid(timestamp=now) | ||
raw_crash = { | ||
"BuildID": "20200506000000", | ||
} | ||
processed_crash = { | ||
field: "this week", | ||
"date_processed": date_to_string(now), | ||
"uuid": now_uuid, | ||
} | ||
|
||
crashstorage.save_processed_crash( | ||
raw_crash=raw_crash, | ||
raw_crash={}, | ||
processed_crash=processed_crash, | ||
) | ||
|
||
# Create a crash for four weeks ago with the bum mapping and save it | ||
old_uuid = create_new_ooid(timestamp=four_weeks_ago) | ||
raw_crash = { | ||
"BuildID": "20200506000000", | ||
} | ||
processed_crash = { | ||
field: "this week", | ||
"date_processed": date_to_string(now - timedelta(days=28)), | ||
"uuid": old_uuid, | ||
} | ||
|
||
crashstorage.save_processed_crash( | ||
raw_crash=raw_crash, | ||
raw_crash={}, | ||
processed_crash=processed_crash, | ||
) | ||
|
||
|
@@ -239,9 +224,8 @@ def test_index_crash_mapping_keys(self, es_helper): | |
assert field not in doc["_source"]["processed_crash"] | ||
|
||
def test_crash_size_capture(self): | ||
"""Verify we capture raw/processed crash sizes in ES crashstorage""" | ||
"""Verify saving a processed crash emits a metric for crash document size""" | ||
crash_id = create_new_ooid() | ||
raw_crash = {"ProductName": "Firefox", "ReleaseChannel": "nightly"} | ||
processed_crash = { | ||
"date_processed": "2012-04-08 10:56:41.558922", | ||
"uuid": crash_id, | ||
|
@@ -250,13 +234,11 @@ def test_crash_size_capture(self): | |
crashstorage = self.build_crashstorage() | ||
with MetricsMock() as mm: | ||
crashstorage.save_processed_crash( | ||
raw_crash=raw_crash, | ||
raw_crash={}, | ||
processed_crash=processed_crash, | ||
) | ||
|
||
mm.assert_histogram("processor.es.raw_crash_size", value=2) | ||
mm.assert_histogram("processor.es.processed_crash_size", value=96) | ||
mm.assert_histogram("processor.es.crash_document_size", value=186) | ||
mm.assert_histogram("processor.es.crash_document_size", value=169) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This value changes because the document no longer contains a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now that I'm thinking about this, we could stop emitting the processed crash size. The "Socorro prod app metrics" dashboard only has a panel for the crash document size. I'll fix this now--one less metric. |
||
|
||
def test_index_data_capture(self, es_helper): | ||
"""Verify we capture index data in ES crashstorage""" | ||
|
@@ -352,7 +334,6 @@ def test_delete_expired_indices(self, es_helper): | |
) | ||
def test_indexing_bad_data(self, key, value, expected_value, es_helper): | ||
crash_id = create_new_ooid() | ||
raw_crash = {"ProductName": "Firefox", "ReleaseChannel": "nightly"} | ||
processed_crash = { | ||
"date_processed": date_from_ooid(crash_id), | ||
"uuid": crash_id, | ||
|
@@ -368,7 +349,7 @@ def test_indexing_bad_data(self, key, value, expected_value, es_helper): | |
# Save the crash data and then fetch it and verify the value is as expected | ||
crashstorage = self.build_crashstorage() | ||
crashstorage.save_processed_crash( | ||
raw_crash=raw_crash, | ||
raw_crash={}, | ||
processed_crash=processed_crash, | ||
) | ||
es_helper.refresh() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -97,7 +97,6 @@ def test_get_mapping(self, es_helper): | |
properties = mapping[doctype]["properties"] | ||
|
||
print(json.dumps(properties, indent=4, sort_keys=True)) | ||
assert "raw_crash" not in properties | ||
assert "processed_crash" in properties | ||
|
||
processed_crash = properties["processed_crash"]["properties"] | ||
|
@@ -218,8 +217,7 @@ def test_validate_super_search_fields(name, properties): | |
if properties.get("destination_keys"): | ||
for key in properties["destination_keys"]: | ||
possible_keys = [ | ||
# Old keys we're probably migrating from | ||
f"raw_crash.{properties['in_database_name']}", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. None of the keys start with |
||
# Old key we're possibly migrating from | ||
f"processed_crash.{properties['in_database_name']}", | ||
# New key we're probably migrating to | ||
f"processed_crash.{properties['name']}", | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One less deepcopy should make it a little faster, though I'm not sure we'd notice.