In [None]:
import pymongo
from pymongo import MongoClient
import pprint
from IPython.display import clear_output

# Replace XXXX with your connection URI from the Atlas UI
client = MongoClient(XXX)


pipeline = [
    {
        '$group': {
            '_id': {"countries": "$countries"},
            'count': {'$sum': 1} 
            # group is grouping the data by specified categories , in this case it is countrie, 
            # and then count starts counting the number of appearance of each category using {'$sum': 1} which means that
            # count will be starting from 1 and above (cuz once we find the country appearance it means that we started from 1)
        }
    },
    {
        '$sort': { 'count' : -1}
        # sort will sort our data based on the metrics we give it , in this case it the count metrics 
        # and -1 specify that we will be sorting descendingly
        # obvioulsy 1 will mean that we sort ascendingly
    },
    {
        # $facet means that we r going to perform 2 different aggregation pipelines within the same stage
        # i.e the two piplines will be receiving the same input data

        '$facet': {
            'top language combinations are : ': [{'$limit': 100}],
            # here we will display the top language combinations as we specified the limit to be 100 (i.e best 100 combinations)
            'unusual combinations shared by': [{
                '$skip': 100,
            },

            # here we take the same input data that sort had outputen and we skip the first 100 combinations ( because we've already included them among the best 100 combinations before)
            # $bucketAuto is going to group the rest of data in buckets according to their count
            # "group by" property will generate a range (min included and max excluded)
            # which means that in the ouput we find : there are X number of combinations of x languages where x ∈ ]max, min] 
            #  for ex if max=2 and min = 1 then simply this means that its a movie with a single language
            
            {
                '$bucketAuto': {
                'groupBy':'$count',
                'buckets': 5,
                'output': {
                    'language combinations': {'$sum': 1}
                    }
            }
            }]
            
        }
    }
]


clear_output()
pprint.pprint(list(client.sample_mflix.movies.aggregate(pipeline)))

In [None]:
import pymongo
from pymongo import MongoClient
import pprint
from IPython.display import clear_output

# Replace XXXX with your connection URI from the Atlas UI
client = MongoClient(XXX)


pipeline = [
    {
        '$sortByCount' : '$country'
    } 
    # sort by count will do both: grouping data according to their country (or any specified value) than sort them descendingly
]


clear_output()
pprint.pprint(list(client.sample_mflix.movies.aggregate(pipeline)))

In [11]:
import pymongo
from pymongo import MongoClient
import pprint
from IPython.display import clear_output

# Replace XXXX with your connection URI from the Atlas UI
client = MongoClient(XXX)

# this is how it is done using mongodb query language
pipeline = [
    {
        '$match': { "year": 2016}
        # match is one of the filtering fields used to filter data according to specific property u mention

    }
]


# this is how it is done using python language
filter = { "year": 2016}

clear_output()
pprint.pprint(list(client.sample_mflix.movies.find(filter)))


# pprint.pprint(list(client.sample_mflix.movies.aggregate(pipeline)))