diff --git a/packages/@aws-cdk/aws-logs/lib/log-retention-provider/index.ts b/packages/@aws-cdk/aws-logs/lib/log-retention-provider/index.ts index da537c149f013..d2c14e5a72cc7 100644 --- a/packages/@aws-cdk/aws-logs/lib/log-retention-provider/index.ts +++ b/packages/@aws-cdk/aws-logs/lib/log-retention-provider/index.ts @@ -46,8 +46,6 @@ async function createLogGroupSafe(logGroupName: string, region?: string, options throw new Error('Out of attempts to create a logGroup'); } } - // Any other error - console.error(error); throw error; } } while (true); // exit happens on retry count check @@ -62,12 +60,36 @@ async function createLogGroupSafe(logGroupName: string, region?: string, options * @param retentionInDays the number of days to retain the log events in the specified log group. */ async function setRetentionPolicy(logGroupName: string, region?: string, options?: SdkRetryOptions, retentionInDays?: number) { - const cloudwatchlogs = new AWS.CloudWatchLogs({ apiVersion: '2014-03-28', region, ...options }); - if (!retentionInDays) { - await cloudwatchlogs.deleteRetentionPolicy({ logGroupName }).promise(); - } else { - await cloudwatchlogs.putRetentionPolicy({ logGroupName, retentionInDays }).promise(); - } + // The same as in createLogGroupSafe(), here we could end up with the race + // condition where a log group is either already being created or its retention + // policy is being updated. This would result in an OperationAbortedException, + // which we will try to catch and retry the command a number of times before failing + let retryCount = options?.maxRetries == undefined ? 10 : options.maxRetries; + const delay = options?.retryOptions?.base == undefined ? 10 : options.retryOptions.base; + do { + try { + const cloudwatchlogs = new AWS.CloudWatchLogs({ apiVersion: '2014-03-28', region, ...options }); + if (!retentionInDays) { + await cloudwatchlogs.deleteRetentionPolicy({ logGroupName }).promise(); + } else { + await cloudwatchlogs.putRetentionPolicy({ logGroupName, retentionInDays }).promise(); + } + return; + + } catch (error) { + if (error.code === 'OperationAbortedException') { + if (retryCount > 0) { + retryCount--; + await new Promise(resolve => setTimeout(resolve, delay)); + continue; + } else { + // The log group is still being created by another execution but we are out of retries + throw new Error('Out of attempts to create a logGroup'); + } + } + throw error; + } + } while (true); // exit happens on retry count check } export async function handler(event: AWSLambda.CloudFormationCustomResourceEvent, context: AWSLambda.Context) { @@ -92,10 +114,10 @@ export async function handler(event: AWSLambda.CloudFormationCustomResourceEvent // Set a retention policy of 1 day on the logs of this very function. // Due to the async nature of the log group creation, the log group for this function might // still be not created yet at this point. Therefore we attempt to create it. - // In case it is being created, createLogGroupSafe will handle the conflic. + // In case it is being created, createLogGroupSafe will handle the conflict. const region = process.env.AWS_REGION; await createLogGroupSafe(`/aws/lambda/${context.functionName}`, region, retryOptions); - // If createLogGroupSafe fails, the log group is not created even after multiple attempts + // If createLogGroupSafe fails, the log group is not created even after multiple attempts. // In this case we have nothing to set the retention policy on but an exception will skip // the next line. await setRetentionPolicy(`/aws/lambda/${context.functionName}`, region, retryOptions, 1); diff --git a/packages/@aws-cdk/aws-logs/test/log-retention-provider.test.ts b/packages/@aws-cdk/aws-logs/test/log-retention-provider.test.ts index c41a05e452fcf..d1f980ac60465 100644 --- a/packages/@aws-cdk/aws-logs/test/log-retention-provider.test.ts +++ b/packages/@aws-cdk/aws-logs/test/log-retention-provider.test.ts @@ -238,7 +238,7 @@ describe('log retention provider', () => { }); - test('does not if when operations on provider log group fails', async () => { + test('succeeds when createLogGroup for provider log group returns OperationAbortedException twice', async () => { let attempt = 2; const createLogGroupFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => { if (params.logGroupName === '/aws/lambda/provider') { @@ -280,7 +280,7 @@ describe('log retention provider', () => { }); - test('does not fail if operations on CDK lambda log group fails twice', async () => { + test('succeeds when createLogGroup for CDK lambda log group returns OperationAbortedException twice', async () => { let attempt = 2; const createLogGroupFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => { if (params.logGroupName === 'group') { @@ -322,7 +322,7 @@ describe('log retention provider', () => { }); - test('does fail if operations on CDK lambda log group fails indefinitely', async () => { + test('fails when createLogGroup for CDK lambda log group fails with OperationAbortedException indefinitely', async () => { const createLogGroupFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => { if (params.logGroupName === 'group') { return Promise.reject(new MyError( @@ -356,6 +356,204 @@ describe('log retention provider', () => { expect(request.isDone()).toEqual(true); + }); + + test('succeeds when putRetentionPolicy for provider log group returns OperationAbortedException twice', async () => { + let attempt = 2; + const putRetentionPolicyFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => { + if (params.logGroupName === '/aws/lambda/provider') { + if (attempt > 0) { + attempt--; + return Promise.reject(new MyError( + 'A conflicting operation is currently in progress against this resource. Please try again.', + 'OperationAbortedException')); + } else { + return Promise.resolve({}); + } + } + return Promise.resolve({}); + }; + + const createLogGroupFake = sinon.fake.resolves({}); + const deleteRetentionPolicyFake = sinon.fake.resolves({}); + + AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake); + AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake); + AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake); + + const event = { + ...eventCommon, + RequestType: 'Create', + ResourceProperties: { + ServiceToken: 'token', + RetentionInDays: '30', + LogGroupName: 'group', + }, + }; + + const request = createRequest('SUCCESS'); + + await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context); + + expect(request.isDone()).toEqual(true); + + + }); + + test('succeeds when putRetentionPolicy for CDK lambda log group returns OperationAbortedException twice', async () => { + let attempt = 2; + const putRetentionPolicyFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => { + if (params.logGroupName === 'group') { + if (attempt > 0) { + attempt--; + return Promise.reject(new MyError( + 'A conflicting operation is currently in progress against this resource. Please try again.', + 'OperationAbortedException')); + } else { + return Promise.resolve({}); + } + } + return Promise.resolve({}); + }; + + const createLogGroupFake = sinon.fake.resolves({}); + const deleteRetentionPolicyFake = sinon.fake.resolves({}); + + AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake); + AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake); + AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake); + + const event = { + ...eventCommon, + RequestType: 'Create', + ResourceProperties: { + ServiceToken: 'token', + RetentionInDays: '30', + LogGroupName: 'group', + }, + }; + + const request = createRequest('SUCCESS'); + + await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context); + + expect(request.isDone()).toEqual(true); + + + }); + + test('fails when putRetentionPolicy for CDK lambda log group fails with OperationAbortedException indefinitely', async () => { + const putRetentionPolicyFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => { + if (params.logGroupName === 'group') { + return Promise.reject(new MyError( + 'A conflicting operation is currently in progress against this resource. Please try again.', + 'OperationAbortedException')); + } + return Promise.resolve({}); + }; + + const createLogGroupFake = sinon.fake.resolves({}); + const deleteRetentionPolicyFake = sinon.fake.resolves({}); + + AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake); + AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake); + AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake); + + const event = { + ...eventCommon, + RequestType: 'Create', + ResourceProperties: { + ServiceToken: 'token', + RetentionInDays: '30', + LogGroupName: 'group', + }, + }; + + const request = createRequest('FAILED'); + + await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context); + + expect(request.isDone()).toEqual(true); + + + }); + + test('succeeds when deleteRetentionPolicy for provider log group returns OperationAbortedException twice', async () => { + let attempt = 2; + const deleteRetentionPolicyFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => { + if (params.logGroupName === '/aws/lambda/provider') { + if (attempt > 0) { + attempt--; + return Promise.reject(new MyError( + 'A conflicting operation is currently in progress against this resource. Please try again.', + 'OperationAbortedException')); + } else { + return Promise.resolve({}); + } + } + return Promise.resolve({}); + }; + + const createLogGroupFake = sinon.fake.resolves({}); + const putRetentionPolicyFake = sinon.fake.resolves({}); + + AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake); + AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake); + AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake); + + const event = { + ...eventCommon, + RequestType: 'Create', + ResourceProperties: { + ServiceToken: 'token', + RetentionInDays: '0', // Setting this to 0 triggers the call to deleteRetentionPolicy + LogGroupName: 'group', + }, + }; + + const request = createRequest('SUCCESS'); + + await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context); + + expect(request.isDone()).toEqual(true); + + + }); + + test('fails when deleteRetentionPolicy for provider log group fails with OperationAbortedException indefinitely', async () => { + const deleteRetentionPolicyFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => { + if (params.logGroupName === 'group') { + return Promise.reject(new MyError( + 'A conflicting operation is currently in progress against this resource. Please try again.', + 'OperationAbortedException')); + } + return Promise.resolve({}); + }; + + const createLogGroupFake = sinon.fake.resolves({}); + const putRetentionPolicyFake = sinon.fake.resolves({}); + + AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake); + AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake); + AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake); + + const event = { + ...eventCommon, + RequestType: 'Create', + ResourceProperties: { + ServiceToken: 'token', + RetentionInDays: '0', // Setting this to 0 triggers the call to deleteRetentionPolicy + LogGroupName: 'group', + }, + }; + + const request = createRequest('FAILED'); + + await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context); + + expect(request.isDone()).toEqual(true); + + }); test('response data contains the log group name', async () => {