Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix columns duplication on MongoDB Query Runner #6640 #6641

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
15 changes: 8 additions & 7 deletions redash/query_runner/mongodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,14 @@ def parse_results(results):

parsed_row = _parse_dict(row)
for column_name, value in parsed_row.items():
columns.append(
{
"name": column_name,
"friendly_name": column_name,
"type": TYPES_MAP.get(type(value), TYPE_STRING),
}
)
if _get_column_by_name(columns, column_name) is None:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

First of all thanks for the fix on this, I was having this problem recently : D

In here, to make this more efficient, can't we just get the first row, and then generate the columns array from that outside for this for loop?

In this case every cell in the table requires this comparison which is in-efficient and whether we check row1 or rowN the columns will be same.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@clearnote01 I understand your comment. I also think that if it is normal RDB data, I should do that.

I thought that MongoDB can have a different column for each row, so I implemented it like this. Does such a case not exist?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@masayuki038 You are right, I don't think it's a problem when projection is specifically configured via $project, but there could be other cases that would break it.

Maybe it's safer to keep it as it is, and have a note that this could be optimized in the future if someone can rule out the edge cases.

columns.append(
{
"name": column_name,
"friendly_name": column_name,
"type": TYPES_MAP.get(type(value), TYPE_STRING),
}
)

rows.append(parsed_row)

Expand Down
66 changes: 65 additions & 1 deletion tests/query_runner/test_mongodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from mock import patch
from pytz import utc

from redash.query_runner import TYPE_INTEGER, TYPE_STRING
from redash.query_runner.mongodb import (
MongoDB,
_get_column_by_name,
Expand All @@ -15,7 +16,7 @@


@patch("redash.query_runner.mongodb.pymongo.MongoClient")
class TestUserPassOverride(TestCase):
class TestMongoDB(TestCase):
def test_username_password_present_overrides_username_from_uri(self, mongo_client):
config = {
"connectionString": "mongodb://localhost:27017/test",
Expand All @@ -37,6 +38,66 @@ def test_username_password_absent_does_not_pass_args(self, mongo_client):
self.assertNotIn("username", mongo_client.call_args.kwargs)
self.assertNotIn("password", mongo_client.call_args.kwargs)

def test_run_query_with_fields(self, mongo_client):
config = {
"connectionString": "mongodb://localhost:27017/test",
"username": "test_user",
"password": "test_pass",
"dbName": "test",
}
mongo_qr = MongoDB(config)

query = {"collection": "test", "query": {"age": 10}, "fields": {"_id": 1, "name": 2}}

return_value = [{"_id": "6569ee53d53db7930aaa0cc0", "name": "test2"}]

expected = {
"columns": [
{"name": "_id", "friendly_name": "_id", "type": TYPE_STRING},
{"name": "name", "friendly_name": "name", "type": TYPE_STRING},
],
"rows": return_value,
}

mongo_client().__getitem__().__getitem__().find.return_value = return_value
result, err = mongo_qr.run_query(json_dumps(query), None)

self.assertIsNone(err)
self.assertEqual(expected, result)

def test_run_query_with_aggregate(self, mongo_client):
config = {
"connectionString": "mongodb://localhost:27017/test",
"username": "test_user",
"password": "test_pass",
"dbName": "test",
}
mongo_qr = MongoDB(config)

query = {
"collection": "test",
"aggregate": [
{"$unwind": "$tags"},
{"$group": {"_id": "$tags", "count": {"$sum": 1}}},
{"$sort": [{"name": "count", "direction": -1}, {"name": "_id", "direction": -1}]},
],
}

return_value = [{"_id": "foo", "count": 10}, {"_id": "bar", "count": 9}]

expected = {
"columns": [
{"name": "_id", "friendly_name": "_id", "type": TYPE_STRING},
{"name": "count", "friendly_name": "count", "type": TYPE_INTEGER},
],
"rows": return_value,
}

mongo_client().__getitem__().__getitem__().aggregate.return_value = return_value
result, err = mongo_qr.run_query(json_dumps(query), None)
self.assertIsNone(err)
self.assertEqual(expected, result)


class TestParseQueryJson(TestCase):
def test_ignores_non_isodate_fields(self):
Expand Down Expand Up @@ -130,6 +191,7 @@ def test_parses_regular_results(self):
for i, row in enumerate(rows):
self.assertDictEqual(row, raw_results[i])

self.assertEqual(3, len(columns))
self.assertIsNotNone(_get_column_by_name(columns, "column"))
self.assertIsNotNone(_get_column_by_name(columns, "column2"))
self.assertIsNotNone(_get_column_by_name(columns, "column3"))
Expand Down Expand Up @@ -161,9 +223,11 @@ def test_parses_nested_results(self):
},
)

self.assertEqual(7, len(columns))
self.assertIsNotNone(_get_column_by_name(columns, "column"))
self.assertIsNotNone(_get_column_by_name(columns, "column2"))
self.assertIsNotNone(_get_column_by_name(columns, "column3"))
self.assertIsNotNone(_get_column_by_name(columns, "nested.a"))
self.assertIsNotNone(_get_column_by_name(columns, "nested.b"))
self.assertIsNotNone(_get_column_by_name(columns, "nested.c"))
self.assertIsNotNone(_get_column_by_name(columns, "nested.d.e"))