getredash · rameshramachandran2 · Feb 6, 2024 · Feb 7, 2024 · Feb 9, 2024
diff --git a/client/app/assets/images/db-logos/dynamodb.png b/client/app/assets/images/db-logos/dynamodb.png
diff --git a/client/app/assets/images/db-logos/s3.png b/client/app/assets/images/db-logos/s3.png
diff --git a/redash/query_runner/dynamodb.py b/redash/query_runner/dynamodb.py
@@ -0,0 +1,182 @@
+import logging
+import sys
+import boto3
+
+from redash.query_runner import *
+from redash.utils import json_dumps, json_loads
+
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+try:
+    from dql import Engine, FragmentEngine
+    from dynamo3 import DynamoDBError
+    from pyparsing import ParseException
+    enabled = True
+except ImportError as e:
+    enabled = False
+
+TYPES_MAP = {
+    "bool": TYPE_BOOLEAN,
+    "datetime64[ns]": TYPE_DATETIME,
+    "datetime64[s]": TYPE_DATETIME,
+    "float64": TYPE_FLOAT,
+    "int64": TYPE_INTEGER,
+    "object": TYPE_STRING
+}
+
+
+class DynamoDB(BaseSQLQueryRunner):
+    should_annotate_query = False
+
+    @classmethod
+    def configuration_schema(cls):
+        return {
+            "type": "object",
+            "properties": {
+                "region": {
+                    "type": "string",
+                    "default": "us-east-1"
+                },
+                "aws_iam_role_arn": {"type": "string", "title": "IAM Role ARN"},
+            },
+            "required": ["aws_iam_role_arn"],
+        }
+
+    def _get_client(self):
+        sts = boto3.client('sts')
+        response = sts.assume_role(
+            RoleArn=self.configuration.get('aws_iam_role_arn'),
+            RoleSessionName="redash-session"
+        )
+        dynamodb = boto3.client(
+            "dynamodb",
+            region_name=self.configuration.get("region"),
+            aws_access_key_id=response['Credentials']['AccessKeyId'],
+            aws_secret_access_key=response['Credentials']['SecretAccessKey'],
+            aws_session_token=response["Credentials"]["SessionToken"]
+        )
+        logger.info("-----------DynamoDB client created------------")
+        return dynamodb
+
+    def test_connection(self):
+        dynamodb_client = self._get_client()
+        dynamodb_client.close()
+
+
+    @classmethod
+    def type(cls):
+        return "dynamodb"
+
+    @classmethod
+    def name(cls):
+        return "DynamoDB"
+
+    # def _connect(self):
+    #     sts = boto3.client('sts')
+    #     response = sts.assume_role(
+    #         RoleArn=self.configuration.get('aws_iam_role_arn'),
+    #         RoleSessionName="redash-session"
+    #     )
+
+    #     engine = FragmentEngine()
+    #     logger.info("-----------FragmentEngine Created------------")
+
+    #     config = self.configuration.to_dict()
+
+    #     if not config.get('region'):
+    #         config['region'] = 'us-east-1'
+
+    #     if config.get('host') == '':
+    #         config['host'] = None
+
+    #     config['access_key'] = response['Credentials']['AccessKeyId']
+    #     config['secret_key'] = response['Credentials']['SecretAccessKey']
+    #     config.pop('aws_iam_role_arn')
+
+    #     engine.connect(**config)
+    #     logger.info("-----------FragmentEngine Connected to DynamoDB------------")
+
+    #     return engine
+
+    # def _get_tables(self, schema):
+    #     engine = self._connect()
+
+    #     # We can't use describe_all because sometimes a user might give List permission
+    #     # for * (all tables), but describe permission only for some of them.
+    #     tables = engine.connection.list_tables()
+    #     for table_name in tables:
+    #         try:
+    #             table = engine.describe(table_name, True)
+    #             schema[table.name] = {'name': table.name,
+    #                                   'columns': table.attrs.keys()}
+    #         except DynamoDBError:
+    #             pass
+
+    def run_query(self, query, user):
+        dynamodb_client = None
+        try:
+            dynamodb_client = self._get_client()
+
+            # if not query.endswith(';'):
+            #     query = query + ';'
+
+            result = dynamodb_client.execute_statement(Statement=query)
+            logger.info("----------------Query has been executed!-----------------")
+            logger.info("JSON Dump: %s", json_dumps(result))
+            df = pd.DataFrame([i.decode('utf-8') for i in results['Items']])
+            logger.info("DataFrame: %s", df.to_string())
+
+            columns = []
+            rows = df.to_dict('records')
+
+            for col in df.columns:
+                columns.append(
+                    {
+                        "name": col,
+                        "friendly_name": col,
+                        "type": TYPES_MAP[str(df[col].dtype)]
+                    }
+                )
+
+            # # When running a count query it returns the value as a string, in which case
+            # # we transform it into a dictionary to be the same as regular queries.
+            # if isinstance(result, basestring):
+            #     # when count < scanned_count, dql returns a string with number of rows scanned
+            #     value = result.split(" (")[0]
+            #     if value:
+            #         value = int(value)
+            #     result = [{"value": value}]
+
+            # for item in result:
+            #     if not columns:
+            #         for k, v in item.iteritems():
+            #             columns.append({
+            #                 'name': k,
+            #                 'friendly_name': k,
+            #                 'type': types_map.get(str(type(v)).upper(), None)
+            #             })
+            #     rows.append(item)
+
+            # Returning the query results in Redash format
+            data = {"columns": columns, "rows": rows}
+            error = None
+            json_data = json_dumps(data)
+        except ParseException as e:
+            error = u"Error parsing query at line {} (column {}):\n{}".format(e.lineno, e.column, e.line)
+            json_data = None
+        except (SyntaxError, RuntimeError) as e:
+            error = e.message
+            json_data = None
+        except KeyboardInterrupt:
+            if engine and engine.connection:
+                engine.connection.cancel()
+            error = "Query cancelled by user."
+            json_data = None
+        finally:
+            dynamodb_client.close()
+        return json_data, error
+
+
+register(DynamoDB)
diff --git a/redash/query_runner/s3.py b/redash/query_runner/s3.py
@@ -0,0 +1,114 @@
+import boto3
+import pandas as pd
+from redash.query_runner import BaseQueryRunner, register
+from redash.query_runner import TYPE_STRING, TYPE_INTEGER, TYPE_BOOLEAN, TYPE_FLOAT, TYPE_DATE, TYPE_DATETIME
+from redash.utils import json_dumps, json_loads
+import logging
+
+TYPES_MAP = {
+    "bool": TYPE_BOOLEAN,
+    "datetime64[ns]": TYPE_DATETIME,
+    "datetime64[s]": TYPE_DATETIME,
+    "float64": TYPE_FLOAT,
+    "int64": TYPE_INTEGER,
+    "object": TYPE_STRING
+}
+
+logger = logging.getLogger(__name__)
+
+class S3(BaseQueryRunner):
+    @classmethod
+    def name(cls):
+        return "Amazon S3"
+    @classmethod
+    def configuration_schema(cls):
+        return {
+            "type": "object",
+            "properties": {
+                "region": {"type": "string", "title": "AWS Region"},
+                "bucket_name": {"type": "string", "title": "Bucket Name"},
+                "object_key": {"type": "string", "title": "Object Key"}
+            },
+            "required": ["region", "bucket_name", "object_key"],
+            "order": ["region", "bucket_name", "object_key"],
+        }
+    def test_connection(self):
+        region = self.configuration["region"]
+        bucket_name = self.configuration["bucket_name"]
+        object_key = self.configuration["object_key"]
+
+        # Set S3 client using Boto3
+        s3_client = boto3.client("s3")
+
+        query = "SELECT * from S3Object"
+        # As of now we are required to pass in the object key so we are configuring the data source to a particular S3 object temporarily
+        resp = s3_client.select_object_content(
+            Bucket=bucket_name,
+            Key= object_key, # We need the CSV file (Object Key)
+            ExpressionType='SQL',
+            Expression=query,
+            InputSerialization = {'CSV': {"FileHeaderInfo": "Use"}, 'CompressionType': 'NONE'},
+            OutputSerialization = {'JSON': {}},
+        )
+
+        # Need to first deploy this to see how response data schema is before we can parse it into rows/columns
+        for event in resp['Payload']:
+            if 'Records' in event:
+                records = event['Records']['Payload']
+                logger.info("Records: %s", records)
+
+    def run_query(self, query, user):
+        region = self.configuration["region"]
+        bucket_name = self.configuration["bucket_name"]
+        object_key = self.configuration["object_key"]
+
+        # Set S3 client using Boto3
+        s3_client = boto3.client("s3")
+
+        # As of now we are required to pass in the object key so we are configuring the data source to a particular S3 object temporarily
+        resp = s3_client.select_object_content(
+            Bucket=bucket_name,
+            Key= object_key, # We need the CSV file (Object Key)
+            ExpressionType='SQL',
+            Expression=query,
+            InputSerialization = {'CSV': {"FileHeaderInfo": "Use"}, 'CompressionType': 'NONE'},
+            OutputSerialization = {'JSON': {}},
+        )
+
+        # Need to first deploy this to see how response data schema is before we can parse it into rows/columns
+        json_result = ""
+        for event in resp['Payload']:
+            if 'Records' in event:
+                json_result = event['Records']['Payload']
+                logger.info("Records: %s", json_result)
+
+        json_result = json_result.decode('utf8')
+        json_result = json_result.replace('\n', '')
+        json_result = json_result.replace('\\r', '')
+        json_result = json_result.replace('}{', '},{')
+        json_result = "[" + json_result + "]"
+        logger.info("JSON: %s", json_result)
+        dict_result = json_loads(json_result)
+        logger.info("DictResult: %s", dict_result)
+        df = pd.DataFrame(dict_result)
+        logger.info("DataFrame: %s", df.to_string())
+        columns = []
+        rows = df.to_dict('records')
+
+        for col in df.columns:
+            columns.append(
+                {
+                    "name": col,
+                    "friendly_name": col,
+                    "type": TYPES_MAP[str(df[col].dtype)]
+                }
+            )
+
+        # Returning the query results in Redash format
+        data = {"columns": columns, "rows": rows}
+        error = None
+        json_data = json_dumps(data)
+        return json_data, error
+
+# Registering custom S3 query runner
+register(S3)
diff --git a/redash/settings/__init__.py b/redash/settings/__init__.py
@@ -280,6 +280,7 @@ def email_server_is_configured():
     "redash.query_runner.google_spreadsheets",
     "redash.query_runner.graphite",
     "redash.query_runner.mongodb",
+    "redash.query_runner.s3",
     "redash.query_runner.couchbase",
     "redash.query_runner.mysql",
     "redash.query_runner.pg",
@@ -298,7 +299,7 @@ def email_server_is_configured():
     "redash.query_runner.vertica",
     "redash.query_runner.clickhouse",
     "redash.query_runner.tinybird",
-    "redash.query_runner.yandex_metrica",
+    #"redash.query_runner.yandex_metrica",
     "redash.query_runner.yandex_disk",
     "redash.query_runner.rockset",
     "redash.query_runner.treasuredata",
@@ -339,6 +340,7 @@ def email_server_is_configured():
     "redash.query_runner.ignite",
     "redash.query_runner.oracle",
     "redash.query_runner.e6data",
+    "redash.query_runner.dynamodb"
 ]
 
 enabled_query_runners = array_from_string(