# Mongo Queries

In [1]:
import pymongo
import numpy

mongoclient = pymongo.MongoClient("mongodb://localhost:27017/")
dblp = mongoclient["testdb"]
articles_collection = dblp["articles"]
proceeding_collection = dblp["proceedings"]
inproceeding_collection = dblp["inproceedings"]

#### E1: Who is the publisher of the PODS conference proceedings?

In [2]:
#Test comment (Nico)

#Query E1
doc = proceeding_collection.find({"booktitle": "PODS"}, {"publisher" : 1})

print(doc[0].get('publisher'))

ACM


#### E2: What are the titles of the articles that Martin Grohe wrote in the Theory of Computing Systems journal? (Sort in alphabetic order)


In [3]:
docs = articles_collection.find({"author" : "Martin Grohe", "journal": "Theory Comput. Syst."}, {"title": 1}).sort("title")
for doc in docs:
    print(doc.get('title'))


Database Query Processing Using Finite Cursor Machines.
Learnability and Definability in Trees and Similar Structures.
Tight Lower and Upper Bounds for the Complexity of Canonical Colour Refinement.


#### M1: How many articles were published in the SIGMOD conference proceedings this year?

In [4]:
count = articles_collection.count_documents({"journal": "SIGMOD Rec.", "year": "2022"})
print(count)

32


#### M2: How many articles were published in the oldest journal, and what is its title?

In [30]:
query = articles_collection.aggregate([
    {
        "$set":
        {
            "year":
            {
                "$cond": [{"$eq": ['$year', numpy.NaN]}, "MAXVAL", '$year']
            }
        }
    },
    {
        "$group": 
        {
            "_id": "$journal",
            "count": {"$sum": 1},
            "year": {"$min": "$year"}
        }
    },
    {
        "$sort": {"year": 1}
    },
    {
        "$limit": 1
    }
])
for doc in query:
    print(doc)




{'_id': 'J. Symb. Log.', 'count': 4864, 'year': '1936'}


In [17]:
q2 = articles_collection.aggregate([
    {
        "$match": {
            "journal": { "$ne": numpy.NaN },
            "journal": articles_collection.find({ "journal": { "$ne": numpy.NaN } }).sort("year", 1).limit(1)[0]["journal"]
        }
    },
    {
        "$group": {
            "_id": "$journal",
            "count": { "$sum": 1 }
        }
    }
])
for doc in q2:
    print(doc)

{'_id': 'J. Symb. Log.', 'count': 4864}


#### M3: What was the median amount of articles published for each year of the CIDR conference?

In [29]:
m3 = inproceeding_collection.aggregate([
    {
        "$match": {
            "booktitle": "CIDR"
        }
    },
    {
        "$group": {
            "_id": "$year",
            "num_records": { "$sum": 1 }
        }
    },
    {
        "$group": {
            "_id": "CIDR",
            "median_num_articles_per_year": {
                "$avg": {
                    "$avg": "$num_records"
                }
            }
        }
    }  
])
for doc in m3:
    print(doc)

{'_id': 'CIDR', 'median_num_articles_per_year': 46.75}
