Skip to content

Commit

Permalink
bug-1579266: support storing crashes in gcs
Browse files Browse the repository at this point in the history
  • Loading branch information
relud committed Feb 20, 2024
1 parent db135b4 commit 3670126
Show file tree
Hide file tree
Showing 12 changed files with 413 additions and 88 deletions.
8 changes: 8 additions & 0 deletions .circleci/config.yml
Expand Up @@ -84,6 +84,14 @@ jobs:
docker compose up --detach --wait --wait-timeout=10 ci-web
docker compose run --rm ci shell bash -c 'cd systemtest && NGINX_TESTS=0 POST_CHECK=1 HOST=http://ci-web:8000 pytest -vv'
- run:
name: Run systemtest with gcs
command: |
echo 'CRASHMOVER_CRASHSTORAGE_CLASS=antenna.ext.gcs.crashstorage.GcsCrashStorage' >> my.env
docker compose run --rm ci shell ./bin/run_setup.sh
docker compose up --detach --wait --wait-timeout=10 ci-web
docker compose run --rm ci shell bash -c 'cd systemtest && NGINX_TESTS=0 POST_CHECK=1 HOST=http://ci-web:8000 pytest -vv'
- run:
name: Push to Dockerhub
command: |
Expand Down
21 changes: 21 additions & 0 deletions antenna/ext/crashstorage_base.py
Expand Up @@ -4,6 +4,8 @@

import logging

from antenna.util import get_date_from_crash_id


logger = logging.getLogger(__name__)

Expand All @@ -17,6 +19,25 @@ class Config:
def __init__(self, config):
self.config = config.with_options(self)

def _path_join(self, *paths):
return "/".join(paths)

def _get_raw_crash_path(self, crash_id):
date = get_date_from_crash_id(crash_id)
return self._path_join("v1", "raw_crash", date, crash_id)

def _get_dump_names_path(self, crash_id):
return self._path_join("v1", "dump_names", crash_id)

def _get_dump_name_path(self, crash_id, dump_name):
# NOTE(willkg): This is something that Socorro collector did. I'm not
# really sure why, but in order to maintain backwards compatability, we
# need to keep doing it.
if dump_name in (None, "", "upload_file_minidump"):
dump_name = "dump"

return self._path_join("v1", dump_name, crash_id)

def publish_crash(self, crash_report):
"""Save the crash report."""
raise NotImplementedError
Expand Down
41 changes: 12 additions & 29 deletions antenna/ext/fs/crashstorage.py
Expand Up @@ -9,7 +9,7 @@
from everett.manager import Option

from antenna.ext.crashstorage_base import CrashStorageBase
from antenna.util import get_date_from_crash_id, json_ordered_dumps
from antenna.util import json_ordered_dumps


logger = logging.getLogger(__name__)
Expand All @@ -19,18 +19,19 @@ class FSCrashStorage(CrashStorageBase):
"""Save raw crash files to the file system.
This generates a tree something like this which mirrors what we do
on S3:
on S3 and GCS:
::
<FS_ROOT>/
<YYYYMMDD>/
raw_crash/
<CRASHID>.json
dump_names/
<CRASHID>.json
<DUMP_NAME>/
<CRASHID>
v1/
dump_names/
<CRASHID>
<DUMPNAME>/
<CRASHID>
raw_crash/
<YYYYMMDD>/
<CRASHID>
Couple of things to note:
Expand Down Expand Up @@ -63,26 +64,8 @@ def __init__(self, config):
if not os.path.isdir(self.root):
os.makedirs(self.root)

def _get_raw_crash_path(self, crash_id):
"""Return path for where the raw crash should go."""
return os.path.join(
self.root, get_date_from_crash_id(crash_id), "raw_crash", crash_id + ".json"
)

def _get_dump_names_path(self, crash_id):
"""Return path for where the dump_names list should go."""
return os.path.join(
self.root,
get_date_from_crash_id(crash_id),
"dump_names",
crash_id + ".json",
)

def _get_dump_name_path(self, crash_id, dump_name):
"""Return path for a given dump."""
return os.path.join(
self.root, get_date_from_crash_id(crash_id), dump_name, crash_id
)
def _path_join(self, *paths):
return os.path.join(self.root, *paths)

def _save_file(self, fn, contents):
logger.debug("Saving file %r", fn)
Expand Down
3 changes: 3 additions & 0 deletions antenna/ext/gcs/__init__.py
@@ -0,0 +1,3 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
132 changes: 132 additions & 0 deletions antenna/ext/gcs/crashstorage.py
@@ -0,0 +1,132 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.

import logging
import os
import uuid

from everett.manager import Option
from google.auth.credentials import AnonymousCredentials
from google.cloud import storage

from antenna.app import register_for_verification
from antenna.ext.crashstorage_base import CrashStorageBase
from antenna.util import json_ordered_dumps

logger = logging.getLogger(__name__)


def generate_test_filepath():
"""Generate a unique-ish test filepath."""
return "test/testfile-%s.txt" % uuid.uuid4()


class GcsCrashStorage(CrashStorageBase):
"""Save raw crash files to GCS.
This will save raw crash files to GCS in a pseudo-tree something like this:
::
<BUCKET>
v1/
dump_names/
<CRASHID>
<DUMPNAME>/
<CRASHID>
raw_crash/
<YYYYMMDD>/
<CRASHID>
"""

class Config:
gcs_bucket_name = Option(
doc=(
"Google Cloud Storage bucket to save to. Note that the bucket must "
"already have been created."
),
)

def __init__(self, config):
self.config = config.with_options(self)
self.bucket = self.config("gcs_bucket_name")

if os.environ.get("STORAGE_EMULATOR_HOST"):
self.client = storage.Client(
credentials=AnonymousCredentials(), project="test"
)
else:
self.client = storage.Client()

register_for_verification(self.verify_write_to_bucket)

def _save_file(self, path, data):
"""Save a single file to GCS.
:arg str path: the path to save to
:arg bytes data: the data to save
"""
bucket = self.client.get_bucket(self.bucket)
blob = bucket.blob(path)
blob.upload_from_string(data)

def verify_write_to_bucket(self):
"""Verify GCS bucket exists and can be written to."""
self._save_file(generate_test_filepath(), b"test")

def check_health(self, state):
"""Check GCS connection health."""
try:
# get the bucket to verify GCS is up and we can connect to it.
self.client.get_bucket(self.bucket)
except Exception as exc:
state.add_error("GcsCrashStorage", repr(exc))

def save_raw_crash(self, crash_id, raw_crash):
"""Save the raw crash and related dumps.
.. Note::
If you're saving the raw crash and dumps, make sure to save the raw
crash last.
:arg str crash_id: The crash id as a string.
:arg dict raw_crash: dict The raw crash as a dict.
"""
self._save_file(
self._get_raw_crash_path(crash_id),
json_ordered_dumps(raw_crash).encode("utf-8"),
)

def save_dumps(self, crash_id, dumps):
"""Save dump data.
:arg str crash_id: The crash id
:arg dict dumps: dump name -> dump
"""
# Save dump_names even if there are no dumps
self._save_file(
self._get_dump_names_path(crash_id),
json_ordered_dumps(list(sorted(dumps.keys()))).encode("utf-8"),
)

# Save dumps
for dump_name, dump in dumps.items():
self._save_file(self._get_dump_name_path(crash_id, dump_name), dump)

def save_crash(self, crash_report):
"""Save crash data."""
crash_id = crash_report.crash_id
raw_crash = crash_report.raw_crash
dumps = crash_report.dumps

# Save dumps first
self.save_dumps(crash_id, dumps)

# Save raw crash
self.save_raw_crash(crash_id, raw_crash)
22 changes: 1 addition & 21 deletions antenna/ext/s3/crashstorage.py
Expand Up @@ -8,7 +8,7 @@

from antenna.app import register_for_verification
from antenna.ext.crashstorage_base import CrashStorageBase
from antenna.util import get_date_from_crash_id, json_ordered_dumps
from antenna.util import json_ordered_dumps


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -63,26 +63,6 @@ def check_health(self, state):
"""Check connection health."""
self.connection.check_health(state)

def _get_raw_crash_path(self, crash_id):
return "v1/raw_crash/{date}/{crash_id}".format(
date=get_date_from_crash_id(crash_id),
crash_id=crash_id,
)

def _get_dump_names_path(self, crash_id):
return f"v1/dump_names/{crash_id}"

def _get_dump_name_path(self, crash_id, dump_name):
# NOTE(willkg): This is something that Socorro collector did. I'm not
# really sure why, but in order to maintain backwards compatability, we
# need to keep doing it.
if dump_name in (None, "", "upload_file_minidump"):
dump_name = "dump"

return "v1/{dump_name}/{crash_id}".format(
dump_name=dump_name, crash_id=crash_id
)

def save_raw_crash(self, crash_id, raw_crash):
"""Save the raw crash and related dumps.
Expand Down
19 changes: 19 additions & 0 deletions docs/configuration.rst
Expand Up @@ -241,6 +241,25 @@ supported.
configuration here.


Google Cloud Storage
--------------------

The ``GcsCrashStorage`` class will save crash data to Google Cloud Storage.

.. autocomponentconfig:: antenna.ext.gcs.crashstorage.GcsCrashStorage
:show-docstring:
:case: upper
:namespace: crashmover_crashstorage
:show-table:

When set as the CrashMover crashstorage class, configuration
for this class is in the ``CRASHMOVER_CRASHSTORAGE`` namespace.

Example::

CRASHMOVER_CRASHSTORAGE_GCS_BUCKET_NAME=mybucket


Crash publish
=============

Expand Down

0 comments on commit 3670126

Please sign in to comment.