diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..e11e88a --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,13 @@ +Copyright 2015, Sumo Logic Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/s3/README.md b/s3/README.md new file mode 100644 index 0000000..63730a7 --- /dev/null +++ b/s3/README.md @@ -0,0 +1,56 @@ +# S3 to SumoLogic +This function reads files from an S3 bucket to a SumoLogic hosted HTTP collector. Files in the source bucket can be gzipped, or in cleartext, but should contain only texts. + +## How it works +The function receives S3 notifications on new files uploaded to the source S3 bucket. It then reads these files, or unzips them if the file names end with `gz`, and sends to the target Sumo endpoint. + +## Lambda Setup +For the Sumo collector configuration, do not enable multiline processing or +one message per request -- the idea is to send as many messages in one request +as possible to Sumo and let Sumo break them apart as needed. + +In the AWS console, use a code entry type of 'Edit code inline' and paste in the +code (doublecheck the hostname and path as per your collector setup). + +In configuration specify index.handler as the Handler. Specify a Role that has +sufficient privileges to read from the *source* bucket, and invoke a lambda +function. One can use the AWSLambdaBasicExecution and the AWSS3ReadOnlyAccess role, although it is *strongly* recommended to customize them to restrict to relevant resources in production: + +
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Effect": "Allow",
+      "Action": [
+        "logs:CreateLogGroup",
+        "logs:CreateLogStream",
+        "logs:PutLogEvents"
+      ],
+      "Resource": "arn:aws:logs:*:*:*"
+    }
+  ]
+}
+
+ +AND + +
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Effect": "Allow",
+      "Action": [
+        "s3:Get*",
+        "s3:List*"
+      ],
+      "Resource": "*"
+    }
+  ]
+}
+
+ +Once the function is created, you can tie it to the source S3 bucket. From the S3 Management console, select the bucket, goto its Properties, select Events and add a Notification. From there, provide a name for the notification, select *ObjectCreated (All)* as the Events, and select *Lambda* as the *Send To* option. Finally, select the Lambda function created above and Save. + + diff --git a/s3/s3.js b/s3/s3.js new file mode 100755 index 0000000..7fb9ba7 --- /dev/null +++ b/s3/s3.js @@ -0,0 +1,84 @@ +var AWS = require('aws-sdk'); +var s3 = new AWS.S3(); +var https = require('https'); +var zlib = require('zlib'); + +/////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Remember to change the hostname and path to match your collection API and specific HTTP-source endpoint +// See more at: https://service.sumologic.com/help/Default.htm#Collector_Management_API.htm +/////////////////////////////////////////////////////////////////////////////////////////////////////////// + +var options = { 'hostname': 'endpoint1.collection.sumologic.com', + 'path': 'https://endpoint1.collection.sumologic.com/receiver/v1/http/', + 'method': 'POST' + }; + + +function s3LogsToSumo(bucket, objKey,context) { + var req = https.request(options, function(res) { + var body = ''; + console.log('Status:', res.statusCode); + res.setEncoding('utf8'); + res.on('data', function(chunk) { body += chunk; }); + res.on('end', function() { + console.log('Successfully processed HTTPS response'); + context.succeed(); + }); + }); + + var finalData = ''; + var totalBytes = 0; + var isCompressed = false; + if (objKey.match(/\.gz$/)) { + isCompressed = true; + } + + var finishFnc = function() { + console.log("End of stream"); + console.log("Final total byte read: "+totalBytes); + req.end(); + context.succeed(); + } + + var s3Stream = s3.getObject({Bucket: bucket, Key: objKey}).createReadStream(); + s3Stream.on('error', function() { + console.log( + 'Error getting object "' + objKey + '" from bucket "' + bucket + '". ' + + 'Make sure they exist and your bucket is in the same region as this function.'); + context.fail(); + }); + + req.write('Bucket: '+bucket + ' ObjectKey: ' + objKey +'\n'); + + if (!isCompressed) { + s3Stream.on('data',function(data) { + //console.log("Read bytes:" +data.length); + finalData += data; + req.write(data+'\n'); + totalBytes += data.length; + }); + s3Stream.on('end',finishFnc); + } else { + var gunzip = zlib.createGunzip(); + s3Stream.pipe(gunzip); + + gunzip.on('data',function(data) { + totalBytes += data.length; + req.write(data.toString()+'\n'); + finalData += data.toString(); + }).on('end',finishFnc) + .on('error',function(error) { + context.fail(error); + }) + } +} + +exports.handler = function(event, context) { + options.agent = new https.Agent(options); + event.Records.forEach(function(record) { + var bucket = record.s3.bucket.name; + var objKey = decodeURIComponent(record.s3.object.key.replace(/\+/g, ' ')); + console.log('Bucket: '+bucket + ' ObjectKey: ' + objKey); + s3LogsToSumo(bucket, objKey, context); + }); +}