Skip to content

Commit

Permalink
get_duplicates is more utilitarian
Browse files Browse the repository at this point in the history
  • Loading branch information
lzy7071 committed May 7, 2020
1 parent 3c4676e commit ccf5030
Showing 1 changed file with 2 additions and 3 deletions.
5 changes: 2 additions & 3 deletions datanator_query_python/util/mongo_util.py
Expand Up @@ -69,10 +69,9 @@ def get_duplicates(self, collection_str, _key, **kwargs):
"""
collection = self.db_obj[collection_str]
_id = "$"+_key
pipeline = [{"$group": {"_id": _id, "count": {"$sum": 1}}},
pipeline = [{"$group": {"_id": {_key: _id}, "count": {"$sum": 1}, "uniqueIds": {"$addToSet": "$_id"}}},
{"$match": {"count": {"$gt": 1}}},
{"$sort": {"count": -1}},
{"$project": {"name": "$_id", "_id": 0, "count": 1}}]
{"$sort": {"count": -1}}]
count_pipeline = copy.deepcopy(pipeline)
count_pipeline[-1] = {"$count": "total_return"}
counts = collection.aggregate(count_pipeline, **kwargs)
Expand Down

0 comments on commit ccf5030

Please sign in to comment.