From b286c379f12a63fd67c8d438c0a639c6673ac741 Mon Sep 17 00:00:00 2001 From: Robert Marinov Date: Tue, 14 Jul 2020 12:37:07 +0300 Subject: [PATCH] Add runbook --- .github/runbooks.yml | 38 +++++++++++++ runbooks/runbook.md | 131 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 169 insertions(+) create mode 100644 .github/runbooks.yml create mode 100644 runbooks/runbook.md diff --git a/.github/runbooks.yml b/.github/runbooks.yml new file mode 100644 index 0000000..abb471e --- /dev/null +++ b/.github/runbooks.yml @@ -0,0 +1,38 @@ +# These settings determine the behaviour of runbook.md's Runbook Validator bot +# https://github.com/financial-times/runbook.md + +# Everything below is optional + +runbooks: + # All available settings are listed below + + # Disable checks for this repo + # defaults to `false` + disabled: false + + ### SUPPORT FOR MULTIPLE RUNBOOKS IN A SINGLE REPOSITORY + + # Fail checks if `any` | `all` | `none` runbooks fail validation + # defaults to `any` + failOn: any + + ### SUPPORT FOR UPDATING BIZ-OPS + + # Update valid Biz-Ops runbooks on merge to a specific branch + #### IF YOUR DEPLOYMENTS ARE AUTOMATED (CircleCI, Heroku Pipelines) + #### PLEASE INTEGRATE WITH CHANGE-API INSTEAD + #### https://github.com/Financial-Times/change-api#change-api---v2 + # defaults to `false` + updateOnMerge: true + + # Merges to this branch trigger Biz-Ops updates updateOnMerge is `true` + # defaults to `master` + updateBranch: master + + ### UPDATING BIZ-OPS REQUIRES EACH RUNBOOK TO BE TIED TO A VALID SYSTEM CODE + # Mappings between paths and system codes + # unless a custom mapping is specified here + # runbook.md tries to parse the system code from the runbook's filename (format: my-sys-code_runbook.md) + systemCodes: + # paths are relative to root, omitting ./ (case-insensitive) + synth-image-pub-monitor: runbooks/runbook.md diff --git a/runbooks/runbook.md b/runbooks/runbook.md new file mode 100644 index 0000000..5b7aa58 --- /dev/null +++ b/runbooks/runbook.md @@ -0,0 +1,131 @@ +# UPP - Synthetic image publication monitor + +This service performs test image publications and monitors for problems. + +## Code + + synth-image-pub-monitor + +## Primary URL + + + +## Service Tier + +Platinum + +## Lifecycle Stage + +Production + +## Delivered By + +content + +## Supported By + +content + +## Known About By + +- dimitar.terziev +- hristo.georgiev +- elitsa.pavlova +- elina.kaneva +- kalin.arsov +- ivan.nikolov +- miroslav.gatsanoga +- mihail.mihaylov +- tsvetan.dimitrov +- georgi.ivanov +- robert.marinov + +## Host Platform + +AWS + +## Architecture + +This service periodically hits the current stack's cms-notifier with a random image, which then is picked up by the kafka-bridge +and forwarded to the containerised stack's cms-notifier. The image is then stored in a specific S3 bucket. The service after +a given waiting time tests whether the content in S3 matches the published image. In case of "Image not found" is reported +the service will trigger a job from cronjob that will trace the missing +transaction ID thought services in kubernetes and will alert in Slack channel. + +## Contains Personal Data + +No + +## Contains Sensitive Data + +No + +## Dependencies + +- upp-prod-delivery-eu +- upp-prod-delivery-us + +## Failover Architecture Type + +ActiveActive + +## Failover Process Type + +FullyAutomated + +## Failback Process Type + +FullyAutomated + +## Failover Details + +The service is deployed in both Delivery clusters. +The failover guide for the cluster is located here: + + +## Data Recovery Process Type + +NotApplicable + +## Data Recovery Details + +The service does not store data, so it does not require any data recovery steps. + +## Release Process Type + +PartiallyAutomated + +## Rollback Process Type + +Manual + +## Release Details + +Manual failover is needed when a new version of +the service is deployed to production. +Otherwise, an automated failover is going to take place when releasing. +For more details about the failover process please see: + +## Key Management Process Type + +Manual + +## Key Management Details + +To access the service clients need to provide basic auth credentials. +To rotate credentials you need to login to a particular cluster and update varnish-auth secrets. + +## Monitoring + +Service in UPP K8S delivery clusters: + +- Delivery-Prod-EU health: +- Delivery-Prod-US health: + +## First Line Troubleshooting + + + +## Second Line Troubleshooting + +Please refer to the GitHub repository README for troubleshooting information.