Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions azure/cleanup-ecs-pr-proxies.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,16 @@ schedules:
parameters:
- name: retries
type: object
displayName: Retries
default:
- "0"
- "1"
- "2"
- "3"
- name: retain_hours
displayName: Retain hours
type: string
default: "72"

jobs:
- job: build
Expand Down Expand Up @@ -63,6 +68,7 @@ jobs:
- template: ./components/cleanup-ecs-pr-proxies-job.yml
parameters:
retry: '${{ retry }}'
retain_hours: '${{ parameters.retain_hours }}'

- bash: |
echo "AWS role session has timed out after multiple retries"
Expand Down
40 changes: 22 additions & 18 deletions azure/components/cleanup-ecs-pr-proxies-job.yml
Original file line number Diff line number Diff line change
@@ -1,27 +1,31 @@
parameters:
- name: retry
type: string
- name: retain_hours
type: string

steps:
- template: ./aws-assume-role.yml
parameters:
role: "auto-ops"
profile: "apm_ptl"
- template: ./aws-assume-role.yml
parameters:
role: "auto-ops"
profile: "apm_ptl"

- bash: make remove-stale-locks
displayName: Remove stale locks

- bash: |
make remove-stale-locks
export retain_hours=72
ANSIBLE_FORCE_COLOR=yes make -C ansible remove-old-ecs-pr-deploys
ERROR_CODE=$?
echo ERROR_CODE - $ERROR_CODE
- bash: |
export retain_hours="${{ parameters.retain_hours }}"
ANSIBLE_FORCE_COLOR=yes make -C ansible remove-old-ecs-pr-deploys
ERROR_CODE=$?
echo ERROR_CODE - $ERROR_CODE

if [ $ERROR_CODE -ne 0 ] ; then
echo "\n\nansible has unhandled error, re-trying"
echo "##vso[task.setvariable variable=should_retry;]true"
if [ $ERROR_CODE -ne 0 ] ; then
echo "\n\nansible has unhandled error, re-trying"
echo "##vso[task.setvariable variable=should_retry;]true"

else
echo "##vso[task.setvariable variable=should_retry;]false"
fi
else
echo "##vso[task.setvariable variable=should_retry;]false"
fi

displayName: "cleanup older pr deploys"
condition: or(eq( ${{ parameters.retry }}, '0'), eq(variables['should_retry'], 'true'))
displayName: cleanup older pr deploys
condition: or(eq( ${{ parameters.retry }}, '0'), eq(variables['should_retry'], 'true'))
49 changes: 39 additions & 10 deletions scripts/terraform_force_unlock.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,41 +26,70 @@


@click.command()
@click.option("--min-age-hr", type=int, default=8)
@click.option("--key-prefix", type=str, default="nhsd-apm-management-ptl-terraform/env:/api-deployment:ptl:")
@click.option("--min-age-hr", type=int, default=4)
@click.option(
"--key-prefix",
type=str,
default="nhsd-apm-management-ptl-terraform/env:/api-deployment:ptl:",
)
@click.option("--table-name", type=str, default="terraform-state-lock")
@click.option("--profile", type=str, default="apm_ptl")
def main(min_age_hr, key_prefix, table_name, profile):

accepted_envs = ["apm_ptl", "apm_prod"]

if profile not in accepted_envs:
raise ValueError("Profile must be apm_ptl or apm_prod")

terraform_lock_table = boto3.Session(profile_name=profile).resource("dynamodb").Table(table_name)
terraform_lock_table = (
boto3.Session(profile_name=profile).resource("dynamodb").Table(table_name)
)

filter_expr = "begins_with(#n0, :v0) AND attribute_exists(#n1)"

ExpressionAttributeNames = {"#n0": "LockID", "#n1": "Info"}
ExpressionAttributeValues = {
":v0": key_prefix,
}
items = terraform_lock_table.scan(FilterExpression=filter_expr, ExpressionAttributeNames=ExpressionAttributeNames, ExpressionAttributeValues=ExpressionAttributeValues)
print(f"Found {len(items['Items'])} locks which start with key prefix '{key_prefix}'")
items = terraform_lock_table.scan(
FilterExpression=filter_expr,
ExpressionAttributeNames=ExpressionAttributeNames,
ExpressionAttributeValues=ExpressionAttributeValues,
)

total_items = items["Items"]

while "LastEvaluatedKey" in items:
items = terraform_lock_table.scan(
FilterExpression=filter_expr,
ExpressionAttributeNames=ExpressionAttributeNames,
ExpressionAttributeValues=ExpressionAttributeValues,
ExclusiveStartKey=items["LastEvaluatedKey"],
)
total_items.extend(items["Items"])

print(
f"Found {len(items['Items'])} locks which start with key prefix '{key_prefix}'"
)

removed_count = 0
for lock_item in items["Items"]:
for lock_item in total_items:
lock_item_info = json.loads(lock_item["Info"])
lock_id = lock_item["LockID"]
created_at = dateutil.parser.parse(lock_item_info["Created"])

if datetime.datetime.now(datetime.timezone.utc) - created_at > datetime.timedelta(hours=min_age_hr):
print(f"{lock_id} {created_at=} is more than {min_age_hr} hours old, deleting lock...")
if datetime.datetime.now(
datetime.timezone.utc
) - created_at > datetime.timedelta(hours=min_age_hr):
print(
f"{lock_id} {created_at=} is more than {min_age_hr} hours old, deleting lock..."
)
terraform_lock_table.delete_item(Key={"LockID": lock_id})
removed_count += 1

else:
print(f"{lock_id} {created_at=} is not more than {min_age_hr} hours old, leaving it alone!")
print(
f"{lock_id} {created_at=} is not more than {min_age_hr} hours old, leaving it alone!"
)

print(f"Removed {removed_count} locks")

Expand Down