Skip to content

Commit

Permalink
fix: Failure to update runner if image hasn't changed (#103)
Browse files Browse the repository at this point in the history
This affected Lambda runners and Windows runners.
We create images as part of the stack, but we also update them on a schedule and delete the old ones. That means any stack update that touches just the runner and not the image will fail. It will fail because the Lambda function or CodeBuild project will be updated, but the stack will still try to update it with the old image as it's not aware of the new image. This code forces the stack to always get the latest image digest for Lambda, even if the image wasn't created by the stack. On the Windows side, we just tell it to use the 'latest' tag, so we don't have to worry about this issue. That means it's now just a Lambda runner problem and can be confined in the Lambda runner code. Lambda is the only runner provider that needs a digest instead of a tag.

BREAKING CHANGE: RunnerImage.imageDigest was removed
  • Loading branch information
kichik committed Sep 15, 2022
1 parent bf5983b commit 6d32001
Show file tree
Hide file tree
Showing 9 changed files with 947 additions and 223 deletions.
17 changes: 0 additions & 17 deletions API.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 0 additions & 9 deletions src/providers/common.ts
Expand Up @@ -99,15 +99,6 @@ export interface RunnerImage {
*/
readonly imageTag: string;

/**
* Image digest for providers that need to know the digest like Lambda.
*
* If the digest is not specified, imageTag must always point to a new tag on update. If not, the build may try to use the old image.
*
* WARNING: the digest might change when the builder automatically rebuilds the image on a schedule. Do not expect for this digest to stay the same between deploys.
*/
readonly imageDigest?: string;

/**
* Architecture of the image.
*/
Expand Down
16 changes: 9 additions & 7 deletions src/providers/image-builders/codebuild.ts
Expand Up @@ -147,7 +147,11 @@ export interface CodeBuildImageBuilderProps {
* ```
*/
export class CodeBuildImageBuilder extends Construct implements IImageBuilder {
private static BUILDSPEC_VERSION = 1;
/**
* Bump this number every time the buildspec or any important setting of the project changes. It will force a rebuild of the image.
* @private
*/
private static BUILDSPEC_VERSION = 2;

private readonly architecture: Architecture;
private readonly os: Os;
Expand Down Expand Up @@ -340,13 +344,12 @@ export class CodeBuildImageBuilder extends Construct implements IImageBuilder {

this.boundImage = {
imageRepository: ecr.Repository.fromRepositoryAttributes(this, 'Dependable Image', {
repositoryName: this.repository.repositoryName,
// There are simpler ways to get the ARN, but we want an image object that depends on the custom resource.
// There are simpler ways to get name and ARN, but we want an image object that depends on the custom resource.
// We want whoever is using this image to automatically wait for CodeBuild to start and finish through the custom resource.
repositoryName: cr.getAttString('Name'),
repositoryArn: cr.ref,
}),
imageTag: 'latest',
imageDigest: cr.getAtt('Digest').toString(),
architecture: this.architecture,
os: this.os,
logGroup,
Expand Down Expand Up @@ -406,8 +409,7 @@ export class CodeBuildImageBuilder extends Construct implements IImageBuilder {
post_build: {
commands: this.postBuild.concat([
'STATUS="SUCCESS"',
'DIGEST="UNKNOWN"',
'if [ $CODEBUILD_BUILD_SUCCEEDING -ne 1 ]; then STATUS="FAILED"; else DIGEST=`docker inspect "$REPO_URI" | jq -r \'.[0].RepoDigests[0] | split("@")[1] | split(":")[1]\'`; fi',
'if [ $CODEBUILD_BUILD_SUCCEEDING -ne 1 ]; then STATUS="FAILED"; fi',
'cat <<EOF > /tmp/payload.json\n' +
'{\n' +
' "StackId": "$STACK_ID",\n' +
Expand All @@ -416,7 +418,7 @@ export class CodeBuildImageBuilder extends Construct implements IImageBuilder {
' "PhysicalResourceId": "$REPO_ARN",\n' +
' "Status": "$STATUS",\n' +
` "Reason": "See logs in ${logGroup.logGroupName}/$CODEBUILD_LOG_PATH (deploy again with \'cdk deploy -R\' or logRemovalPolicy=RemovalPolicy.RETAIN if they are already deleted)",\n` +
' "Data": {"Digest": "$DIGEST"}\n' + // include the digest to mark the resource updated so the runner providers get updated with the latest digest too (specifically Lambda)
` "Data": {"Name": "${repository.repositoryName}"}\n` +
'}\n' +
'EOF',
'if [ "$RESPONSE_URL" != "unspecified" ]; then jq . /tmp/payload.json; curl -fsSL -X PUT -H "Content-Type:" -d "@/tmp/payload.json" "$RESPONSE_URL"; fi',
Expand Down
2 changes: 1 addition & 1 deletion src/providers/image-builders/container.ts
Expand Up @@ -669,7 +669,7 @@ export class ContainerImageBuilder extends Construct implements IImageBuilder {
// we can't use image.attrName because it comes up with upper case
cdk.Fn.split(':', cdk.Fn.split('/', image.attrImageUri, 2)[1], 2)[0],
),
imageTag: cdk.Fn.split(':', image.attrImageUri, 2)[1],
imageTag: 'latest',
os: this.os,
architecture: this.architecture,
logGroup: log,
Expand Down
87 changes: 75 additions & 12 deletions src/providers/lambda.ts
@@ -1,14 +1,14 @@
import * as path from 'path';
import * as cdk from 'aws-cdk-lib';
import {
Annotations,
aws_ec2 as ec2,
aws_events as events,
aws_events_targets as events_targets,
aws_iam as iam,
aws_lambda as lambda,
aws_stepfunctions as stepfunctions,
aws_stepfunctions_tasks as stepfunctions_tasks,
custom_resources as cr,
} from 'aws-cdk-lib';
import { RetentionDays } from 'aws-cdk-lib/aws-logs';
import { Construct } from 'constructs';
Expand Down Expand Up @@ -164,24 +164,30 @@ export class LambdaRunner extends Construct implements IRunnerProvider {
throw new Error(`Unable to find support Lambda architecture for ${image.os.name}/${image.architecture.name}`);
}

let code;
if (image.imageDigest) {
code = lambda.DockerImageCode.fromEcr(image.imageRepository, { tagOrDigest: `sha256:${image.imageDigest}` });
} else {
if (image.imageTag == 'latest') {
Annotations.of(this).addWarning('imageTag is `latest` even though imageDigest is not specified! This means any updates to the image by the' +
'stack will be used.');
}
code = lambda.DockerImageCode.fromEcr(image.imageRepository, { tagOrDigest: image.imageTag });
}
// get image digest and make sure to get it every time the lambda function might be updated
// pass all variables that may change and cause a function update
// if we don't get the latest digest, the update may fail as a new image was already built outside the stack on a schedule
// we automatically delete old images, so we must always get the latest digest
const imageDigest = this.imageDigest(image, {
version: 1, // bump this for any non-user changes like description or defaults
label: this.label,
architecture: architecture.name,
vpc: this.vpc?.vpcId,
securityGroups: this.securityGroup?.securityGroupId,
vpcSubnets: props.subnetSelection?.subnets?.map(s => s.subnetId),
timeout: props.timeout?.toSeconds(),
memorySize: props.memorySize,
ephemeralStorageSize: props.ephemeralStorageSize?.toKibibytes(),
logRetention: props.logRetention?.toFixed(),
});

this.function = new lambda.DockerImageFunction(
this,
'Function',
{
description: `GitHub Actions runner for "${this.label}" label`,
// CDK requires "sha256:" literal prefix -- https://github.com/aws/aws-cdk/blob/ba91ca45ad759ab5db6da17a62333e2bc11e1075/packages/%40aws-cdk/aws-ecr/lib/repository.ts#L184
code,
code: lambda.DockerImageCode.fromEcr(image.imageRepository, { tagOrDigest: `sha256:${imageDigest}` }),
architecture,
vpc: this.vpc,
securityGroups: this.securityGroup && [this.securityGroup],
Expand Down Expand Up @@ -282,4 +288,61 @@ export class LambdaRunner extends Construct implements IRunnerProvider {
// the event never triggers without this - not sure why
(rule.node.defaultChild as events.CfnRule).addDeletionOverride('Properties.EventPattern.resources');
}

private imageDigest(image: RunnerImage, variableSettings: any): string {
// describe ECR image to get its digest
// the physical id is random so the resource always runs and always gets the latest digest, even if a scheduled build replaced the stack image
const reader = new cr.AwsCustomResource(this, 'Image Digest Reader', {
onCreate: {
service: 'ECR',
action: 'describeImages',
parameters: {
repositoryName: image.imageRepository.repositoryName,
imageIds: [
{
imageTag: image.imageTag,
},
],
},
physicalResourceId: cr.PhysicalResourceId.of('ImageDigest'),
},
onUpdate: {
service: 'ECR',
action: 'describeImages',
parameters: {
repositoryName: image.imageRepository.repositoryName,
imageIds: [
{
imageTag: image.imageTag,
},
],
},
physicalResourceId: cr.PhysicalResourceId.of('ImageDigest'),
},
onDelete: {
// this will NOT be called thanks to RemovalPolicy.RETAIN below
// we only use this to force the custom resource to be called again and get a new digest
service: 'fake',
action: 'fake',
parameters: variableSettings,
},
policy: cr.AwsCustomResourcePolicy.fromSdkCalls({
resources: [image.imageRepository.repositoryArn],
}),
resourceType: 'Custom::EcrImageDigest',
installLatestAwsSdk: false, // no need and it takes 60 seconds
logRetention: RetentionDays.ONE_MONTH,
});

const res = reader.node.tryFindChild('Resource') as cdk.CustomResource | undefined;
if (res) {
// don't actually call the fake onDelete above
res.applyRemovalPolicy(cdk.RemovalPolicy.RETAIN);
} else {
throw new Error('Resource not found in AwsCustomResource. Report this bug at https://github.com/CloudSnorkel/cdk-github-runners/issues.');
}

// return only the digest because CDK expects 'sha256:' literal above
return cdk.Fn.split(':', reader.getResponseField('imageDetails.0.imageDigest'), 2)[1];
}
}
17 changes: 15 additions & 2 deletions test/default.integ.snapshot/github-runners-test.assets.json
Expand Up @@ -105,6 +105,19 @@
}
}
},
"864aa5eb2d6ca4e0d4d65c940bc9e4d5a29db1e4f3f3a098ddb56f76b2129ac4": {
"source": {
"path": "asset.864aa5eb2d6ca4e0d4d65c940bc9e4d5a29db1e4f3f3a098ddb56f76b2129ac4",
"packaging": "zip"
},
"destinations": {
"current_account-current_region": {
"bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}",
"objectKey": "864aa5eb2d6ca4e0d4d65c940bc9e4d5a29db1e4f3f3a098ddb56f76b2129ac4.zip",
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}"
}
}
},
"35c99ca05f12b61868c715d657cd142b535de141a93e018fd30f8198753d147e": {
"source": {
"path": "asset.35c99ca05f12b61868c715d657cd142b535de141a93e018fd30f8198753d147e",
Expand Down Expand Up @@ -196,15 +209,15 @@
}
}
},
"eec7ddf5ddef416beed0b559678e3fc361291c42a70d00a7942df957d8d5f6fb": {
"281fea18609a1af0cc35f88caf7c5208faeffc506b1c05c52d8c4ebcd9c00b72": {
"source": {
"path": "github-runners-test.template.json",
"packaging": "file"
},
"destinations": {
"current_account-current_region": {
"bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}",
"objectKey": "eec7ddf5ddef416beed0b559678e3fc361291c42a70d00a7942df957d8d5f6fb.json",
"objectKey": "281fea18609a1af0cc35f88caf7c5208faeffc506b1c05c52d8c4ebcd9c00b72.json",
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}"
}
}
Expand Down

0 comments on commit 6d32001

Please sign in to comment.