Skip to content

Commit

Permalink
fix(logs): log retention fails with OperationAbortedException (aws#17688
Browse files Browse the repository at this point in the history
)

Fixes: aws#17546

This adds to the fix in aws#16083 that was addressing the issue where the LogRetention Lambda can be executed concurrently and create a race condition where multiple invocations are trying to create or modify the same log group.

The previous fix addressed the issue if it occurred during log group creation, in the `createLogGroupSafe` method, but did not account for the same problem happening when modifying a log group's retention period in the `setRetentionPolicy` method. This fix applies the same logic from the last fix to the other method.

----

*By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
  • Loading branch information
horsmand authored and TikiTDO committed Feb 21, 2022
1 parent 4ea9f33 commit 2c8a235
Show file tree
Hide file tree
Showing 2 changed files with 233 additions and 13 deletions.
42 changes: 32 additions & 10 deletions packages/@aws-cdk/aws-logs/lib/log-retention-provider/index.ts
Expand Up @@ -46,8 +46,6 @@ async function createLogGroupSafe(logGroupName: string, region?: string, options
throw new Error('Out of attempts to create a logGroup');
}
}
// Any other error
console.error(error);
throw error;
}
} while (true); // exit happens on retry count check
Expand All @@ -62,12 +60,36 @@ async function createLogGroupSafe(logGroupName: string, region?: string, options
* @param retentionInDays the number of days to retain the log events in the specified log group.
*/
async function setRetentionPolicy(logGroupName: string, region?: string, options?: SdkRetryOptions, retentionInDays?: number) {
const cloudwatchlogs = new AWS.CloudWatchLogs({ apiVersion: '2014-03-28', region, ...options });
if (!retentionInDays) {
await cloudwatchlogs.deleteRetentionPolicy({ logGroupName }).promise();
} else {
await cloudwatchlogs.putRetentionPolicy({ logGroupName, retentionInDays }).promise();
}
// The same as in createLogGroupSafe(), here we could end up with the race
// condition where a log group is either already being created or its retention
// policy is being updated. This would result in an OperationAbortedException,
// which we will try to catch and retry the command a number of times before failing
let retryCount = options?.maxRetries == undefined ? 10 : options.maxRetries;
const delay = options?.retryOptions?.base == undefined ? 10 : options.retryOptions.base;
do {
try {
const cloudwatchlogs = new AWS.CloudWatchLogs({ apiVersion: '2014-03-28', region, ...options });
if (!retentionInDays) {
await cloudwatchlogs.deleteRetentionPolicy({ logGroupName }).promise();
} else {
await cloudwatchlogs.putRetentionPolicy({ logGroupName, retentionInDays }).promise();
}
return;

} catch (error) {
if (error.code === 'OperationAbortedException') {
if (retryCount > 0) {
retryCount--;
await new Promise(resolve => setTimeout(resolve, delay));
continue;
} else {
// The log group is still being created by another execution but we are out of retries
throw new Error('Out of attempts to create a logGroup');
}
}
throw error;
}
} while (true); // exit happens on retry count check
}

export async function handler(event: AWSLambda.CloudFormationCustomResourceEvent, context: AWSLambda.Context) {
Expand All @@ -92,10 +114,10 @@ export async function handler(event: AWSLambda.CloudFormationCustomResourceEvent
// Set a retention policy of 1 day on the logs of this very function.
// Due to the async nature of the log group creation, the log group for this function might
// still be not created yet at this point. Therefore we attempt to create it.
// In case it is being created, createLogGroupSafe will handle the conflic.
// In case it is being created, createLogGroupSafe will handle the conflict.
const region = process.env.AWS_REGION;
await createLogGroupSafe(`/aws/lambda/${context.functionName}`, region, retryOptions);
// If createLogGroupSafe fails, the log group is not created even after multiple attempts
// If createLogGroupSafe fails, the log group is not created even after multiple attempts.
// In this case we have nothing to set the retention policy on but an exception will skip
// the next line.
await setRetentionPolicy(`/aws/lambda/${context.functionName}`, region, retryOptions, 1);
Expand Down
204 changes: 201 additions & 3 deletions packages/@aws-cdk/aws-logs/test/log-retention-provider.test.ts
Expand Up @@ -238,7 +238,7 @@ describe('log retention provider', () => {

});

test('does not if when operations on provider log group fails', async () => {
test('succeeds when createLogGroup for provider log group returns OperationAbortedException twice', async () => {
let attempt = 2;
const createLogGroupFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
if (params.logGroupName === '/aws/lambda/provider') {
Expand Down Expand Up @@ -280,7 +280,7 @@ describe('log retention provider', () => {

});

test('does not fail if operations on CDK lambda log group fails twice', async () => {
test('succeeds when createLogGroup for CDK lambda log group returns OperationAbortedException twice', async () => {
let attempt = 2;
const createLogGroupFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
if (params.logGroupName === 'group') {
Expand Down Expand Up @@ -322,7 +322,7 @@ describe('log retention provider', () => {

});

test('does fail if operations on CDK lambda log group fails indefinitely', async () => {
test('fails when createLogGroup for CDK lambda log group fails with OperationAbortedException indefinitely', async () => {
const createLogGroupFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
if (params.logGroupName === 'group') {
return Promise.reject(new MyError(
Expand Down Expand Up @@ -356,6 +356,204 @@ describe('log retention provider', () => {
expect(request.isDone()).toEqual(true);


});

test('succeeds when putRetentionPolicy for provider log group returns OperationAbortedException twice', async () => {
let attempt = 2;
const putRetentionPolicyFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
if (params.logGroupName === '/aws/lambda/provider') {
if (attempt > 0) {
attempt--;
return Promise.reject(new MyError(
'A conflicting operation is currently in progress against this resource. Please try again.',
'OperationAbortedException'));
} else {
return Promise.resolve({});
}
}
return Promise.resolve({});
};

const createLogGroupFake = sinon.fake.resolves({});
const deleteRetentionPolicyFake = sinon.fake.resolves({});

AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake);
AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake);
AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake);

const event = {
...eventCommon,
RequestType: 'Create',
ResourceProperties: {
ServiceToken: 'token',
RetentionInDays: '30',
LogGroupName: 'group',
},
};

const request = createRequest('SUCCESS');

await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context);

expect(request.isDone()).toEqual(true);


});

test('succeeds when putRetentionPolicy for CDK lambda log group returns OperationAbortedException twice', async () => {
let attempt = 2;
const putRetentionPolicyFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
if (params.logGroupName === 'group') {
if (attempt > 0) {
attempt--;
return Promise.reject(new MyError(
'A conflicting operation is currently in progress against this resource. Please try again.',
'OperationAbortedException'));
} else {
return Promise.resolve({});
}
}
return Promise.resolve({});
};

const createLogGroupFake = sinon.fake.resolves({});
const deleteRetentionPolicyFake = sinon.fake.resolves({});

AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake);
AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake);
AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake);

const event = {
...eventCommon,
RequestType: 'Create',
ResourceProperties: {
ServiceToken: 'token',
RetentionInDays: '30',
LogGroupName: 'group',
},
};

const request = createRequest('SUCCESS');

await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context);

expect(request.isDone()).toEqual(true);


});

test('fails when putRetentionPolicy for CDK lambda log group fails with OperationAbortedException indefinitely', async () => {
const putRetentionPolicyFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
if (params.logGroupName === 'group') {
return Promise.reject(new MyError(
'A conflicting operation is currently in progress against this resource. Please try again.',
'OperationAbortedException'));
}
return Promise.resolve({});
};

const createLogGroupFake = sinon.fake.resolves({});
const deleteRetentionPolicyFake = sinon.fake.resolves({});

AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake);
AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake);
AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake);

const event = {
...eventCommon,
RequestType: 'Create',
ResourceProperties: {
ServiceToken: 'token',
RetentionInDays: '30',
LogGroupName: 'group',
},
};

const request = createRequest('FAILED');

await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context);

expect(request.isDone()).toEqual(true);


});

test('succeeds when deleteRetentionPolicy for provider log group returns OperationAbortedException twice', async () => {
let attempt = 2;
const deleteRetentionPolicyFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
if (params.logGroupName === '/aws/lambda/provider') {
if (attempt > 0) {
attempt--;
return Promise.reject(new MyError(
'A conflicting operation is currently in progress against this resource. Please try again.',
'OperationAbortedException'));
} else {
return Promise.resolve({});
}
}
return Promise.resolve({});
};

const createLogGroupFake = sinon.fake.resolves({});
const putRetentionPolicyFake = sinon.fake.resolves({});

AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake);
AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake);
AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake);

const event = {
...eventCommon,
RequestType: 'Create',
ResourceProperties: {
ServiceToken: 'token',
RetentionInDays: '0', // Setting this to 0 triggers the call to deleteRetentionPolicy
LogGroupName: 'group',
},
};

const request = createRequest('SUCCESS');

await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context);

expect(request.isDone()).toEqual(true);


});

test('fails when deleteRetentionPolicy for provider log group fails with OperationAbortedException indefinitely', async () => {
const deleteRetentionPolicyFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => {
if (params.logGroupName === 'group') {
return Promise.reject(new MyError(
'A conflicting operation is currently in progress against this resource. Please try again.',
'OperationAbortedException'));
}
return Promise.resolve({});
};

const createLogGroupFake = sinon.fake.resolves({});
const putRetentionPolicyFake = sinon.fake.resolves({});

AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake);
AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake);
AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake);

const event = {
...eventCommon,
RequestType: 'Create',
ResourceProperties: {
ServiceToken: 'token',
RetentionInDays: '0', // Setting this to 0 triggers the call to deleteRetentionPolicy
LogGroupName: 'group',
},
};

const request = createRequest('FAILED');

await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context);

expect(request.isDone()).toEqual(true);


});

test('response data contains the log group name', async () => {
Expand Down

0 comments on commit 2c8a235

Please sign in to comment.