# Aggregation Mini Challenge 1

Apply what you have learned so far about the aggregation framework to try solve this challenge

In [13]:
# Import pymongo dependencies
from  pymongo import MongoClient

In [14]:
# Set up your MongoClient and database variable
mongodb_url = "mongodb://localhost:27017/"
db_name = "aggregation_test"
client = MongoClient(mongodb_url)
db = client[db_name]

In [15]:
# Function which prints out all the results of a cursor
def print_cursor(cursor):
    for document in cursor:
        print(document, end="\n\n")

In [98]:
### Challenge Part 1:
###   Return entries of each unique 'seller_id' associated with the  
###   'products' collection that have a "Beauty" tag. These entries should
###   have an attribute called 'beauty_product_seller_id' which is set 
###   equal to the related 'seller_id' of the product.
###
### Example entry from cursor: 
###  {'beauty_product_seller_id': ObjectId(...)}

#official
#mysoulution
beauty_cursor = db.products.aggregate([

    {"$match": {"tags" : "Beauty"} },
    #sort duplicate seller ids. Get distinct seller ids
    {"$group": {"_id": "$seller_id" }},
    # change the name
     {"$project": {"_id":0, "beauty_product_seller_id": "$_id"}}
])
print_cursor(beauty_cursor)

print("=================================================================================================")

#mysoulution
# wrong duplicates arenot resolved
beauty_cursor = db.products.aggregate([
    {"$unwind": "$tags"},
    {"$match": {"tags" : "Beauty"} },
    {"$project": {"_id":0, "beauty_product_seller_id": "$seller_id"}}
])
print_cursor(beauty_cursor)


{'beauty_product_seller_id': ObjectId('640b4ff43c493ec195566197')}

{'beauty_product_seller_id': ObjectId('640b4ff43c493ec195566193')}

{'beauty_product_seller_id': ObjectId('640b4ff43c493ec195566196')}

{'beauty_product_seller_id': ObjectId('640b4ff43c493ec195566193')}

{'beauty_product_seller_id': ObjectId('640b4ff43c493ec195566193')}

{'beauty_product_seller_id': ObjectId('640b4ff43c493ec195566196')}

{'beauty_product_seller_id': ObjectId('640b4ff43c493ec195566197')}



In [None]:
# Print out your results


In [102]:
### Challenge Part 2:
###   Get an array of 'seller_id' attributes from products for each avaiable
###   'tag' entry. Result entries should include a 'tag' attribute and a
###   'seller_ids' attribute which is an array with all the 'seller_id'.
###  
### Example entry from cursor: 
### {'tag': 'Kitchen',"seller_ids": [ObjectId(...)]}

#official solution
beauty_cursor = db.products.aggregate([
    {"$unwind": "$tags"},
     {"$group": { "_id":"$tags", "seller_ids":{
         "$addToSet":"$seller_id"
     }} },
    {"$project": {"_id":0, "tag": "$_id", "seller_ids": "$seller_ids"}}
])
print_cursor(beauty_cursor)
print("=================================================================================================")
### my solution
### us used push but it isnot counting duplicates into account
beauty_cursor = db.products.aggregate([
    {"$unwind": "$tags"},
    {"$group": { "_id":"$tags", "seller_ids":{"$push":"$seller_id"}} },
    {"$project": {"_id":0, "tag": "$_id", "seller_ids": "$seller_ids"}}
])
print_cursor(beauty_cursor)

{'tag': 'Office', 'seller_ids': [ObjectId('640b4ff43c493ec195566194')]}

{'tag': 'Beauty', 'seller_ids': [ObjectId('640b4ff43c493ec195566197'), ObjectId('640b4ff43c493ec195566193'), ObjectId('640b4ff43c493ec195566196')]}

{'tag': 'Kitchen', 'seller_ids': [ObjectId('640b4ff43c493ec195566193')]}

{'tag': 'Home', 'seller_ids': [ObjectId('640b4ff43c493ec195566193')]}

{'tag': 'School', 'seller_ids': [ObjectId('640b4ff43c493ec195566194')]}

{'tag': 'Office', 'seller_ids': [ObjectId('640b4ff43c493ec195566194'), ObjectId('640b4ff43c493ec195566194')]}

{'tag': 'Beauty', 'seller_ids': [ObjectId('640b4ff43c493ec195566193'), ObjectId('640b4ff43c493ec195566193'), ObjectId('640b4ff43c493ec195566196'), ObjectId('640b4ff43c493ec195566197')]}

{'tag': 'Kitchen', 'seller_ids': [ObjectId('640b4ff43c493ec195566193')]}

{'tag': 'Home', 'seller_ids': [ObjectId('640b4ff43c493ec195566193')]}

{'tag': 'School', 'seller_ids': [ObjectId('640b4ff43c493ec195566194'), ObjectId('640b4ff43c493ec195566194')]}



In [None]:
# Print out your results


In [107]:
### Challenge Part 3:
###   Display the number of unique seller ids there are for each availble 
###   'tag' for 'products'. Result entries should include a 'tag' attribute
###   and a 'num_sellers' attribute which is a integer. Sort by 'tag' name.
###  
### Example entry from cursor: 
### {'tag': 'Kitchen',"num_sellers": 1}



#official solution
beauty_cursor = db.products.aggregate([
    {"$unwind": "$tags"},
     {"$group": { "_id":"$tags", "seller_ids":{
         "$addToSet":"$seller_id"
     }} },
    {"$project": {"_id":0, "tag": "$_id",  "num_sellers":{  "$size": "$seller_ids"} }},
    {"$sort":{"tag" : 1}},
])
print_cursor(beauty_cursor)
print("=================================================================================================")


{'tag': 'Beauty', 'num_sellers': 3}

{'tag': 'Home', 'num_sellers': 1}

{'tag': 'Kitchen', 'num_sellers': 1}

{'tag': 'Office', 'num_sellers': 1}

{'tag': 'School', 'num_sellers': 1}



In [105]:
#my solution
sortByCount_cursor = db.products.aggregate([
    {"$unwind":"$tags"},
    {"$group": {"_id":"$tags", "seller_ids":{  "$addToSet":"$seller_id"} }},
    {"$project": {"_id":0, "tag":"$_id", "num_sellers":{  "$size": "$seller_ids"} }},
    {"$sort":{"num_sellers" : -1}},
])
print_cursor(sortByCount_cursor)

{'tag': 'Beauty', 'num_sellers': 3}

{'tag': 'Office', 'num_sellers': 1}

{'tag': 'Kitchen', 'num_sellers': 1}

{'tag': 'Home', 'num_sellers': 1}

{'tag': 'School', 'num_sellers': 1}



In [None]:
# Print out your results
