In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
%load_ext autoreload
%autoreload 2

<IPython.core.display.Javascript object>

In [3]:
# !pip3 install pymongo
# !pip3 install mongoengine
# !pip3 install Faker

<IPython.core.display.Javascript object>

In [4]:
# Prepare environment for importing from src
import sys

sys.path.insert(0, "..")

<IPython.core.display.Javascript object>

## Establishing a Connection 

In [5]:
import pymongo

from pymongo import MongoClient


client = MongoClient("mongodb://127.0.0.1:27017")

<IPython.core.display.Javascript object>

In [6]:
# from mongoengine import connect, get_connection
# You can also use mongomock, but it doesn't support all types of queries.
# https://docs.mongoengine.org/guide/mongomock.html
# connect("assignment", host="mongomock://localhost")
# client = get_connection()

<IPython.core.display.Javascript object>

## Accessing Databases 

In [7]:
client.drop_database("test_database")
client.drop_database("assignment")
client.drop_database("health")
client.drop_database("examples")

<IPython.core.display.Javascript object>

In [8]:
print(client.list_database_names())

['admin', 'config', 'local']


<IPython.core.display.Javascript object>

In [9]:
db = client.test_database
# If your database name is such that using attribute style access won’t work (like test-database),
# you can use dictionary style access instead:
# db = client['test-database']

<IPython.core.display.Javascript object>

In [10]:
# "test_database" database not created yet:
print(client.list_database_names())

['admin', 'config', 'local']


<IPython.core.display.Javascript object>

## Getting a Collection

In [11]:
collection = db.test_collection
# collection = db['test-collection']

<IPython.core.display.Javascript object>

In [12]:
# "test_database" database not created yet
print(client.list_database_names())

['admin', 'config', 'local']


<IPython.core.display.Javascript object>

In [13]:
# "test_collection" collection not created yet
print(db.list_collection_names())

[]


<IPython.core.display.Javascript object>

An important note about collections (and databases) in MongoDB is that they are created lazily - none of the above commands have actually performed any operations on the MongoDB server. Collections and databases are created when the first document is inserted into them.

## Inserting Documents 

Data in MongoDB is represented (and stored) using JSON-style documents. In PyMongo we use dictionaries to represent documents.

In [14]:
import datetime

post = {
    "author": "Mike",
    "text": "My first blog post!",
    "tags": ["mongodb", "python", "pymongo"],
    "date": datetime.datetime.utcnow(),
}

<IPython.core.display.Javascript object>

Note that documents can contain native Python types (like datetime.datetime instances) which will be automatically converted to and from the appropriate BSON types.

In [15]:
# Get posts collection
posts = db.posts

<IPython.core.display.Javascript object>

In [16]:
result = posts.insert_one(post)

<IPython.core.display.Javascript object>

In [17]:
# "test_database" database is finally created!
print(client.list_database_names())

['admin', 'config', 'local', 'test_database']


<IPython.core.display.Javascript object>

In [18]:
# "posts" collection is finally created!
print(db.list_collection_names())

['posts']


<IPython.core.display.Javascript object>

In [19]:
result.inserted_id

ObjectId('6002b003370550a4f2c3633f')

<IPython.core.display.Javascript object>

In [20]:
post_id = result.inserted_id

<IPython.core.display.Javascript object>

When a document is inserted a special key, "_id", is automatically added if the document doesn’t already contain an "_id" key. The value of "_id" must be unique across the collection. insert_one() returns an instance of InsertOneResult.

## Retrieving Documents 

In [21]:
posts.find_one()

{'_id': ObjectId('6002b003370550a4f2c3633f'),
 'author': 'Mike',
 'text': 'My first blog post!',
 'tags': ['mongodb', 'python', 'pymongo'],
 'date': datetime.datetime(2021, 1, 16, 9, 21, 7, 637000)}

<IPython.core.display.Javascript object>

In [22]:
posts.find_one({"author": "Mike"})

{'_id': ObjectId('6002b003370550a4f2c3633f'),
 'author': 'Mike',
 'text': 'My first blog post!',
 'tags': ['mongodb', 'python', 'pymongo'],
 'date': datetime.datetime(2021, 1, 16, 9, 21, 7, 637000)}

<IPython.core.display.Javascript object>

In [23]:
posts.find_one({"author": "Ali"})

<IPython.core.display.Javascript object>

In [24]:
posts.find_one({"_id": post_id})

{'_id': ObjectId('6002b003370550a4f2c3633f'),
 'author': 'Mike',
 'text': 'My first blog post!',
 'tags': ['mongodb', 'python', 'pymongo'],
 'date': datetime.datetime(2021, 1, 16, 9, 21, 7, 637000)}

<IPython.core.display.Javascript object>

In [25]:
post_id_str = str(post_id)
posts.find_one({"_id": post_id_str})

<IPython.core.display.Javascript object>

In [26]:
from bson.objectid import ObjectId

posts.find_one({"_id": ObjectId(post_id_str)})

{'_id': ObjectId('6002b003370550a4f2c3633f'),
 'author': 'Mike',
 'text': 'My first blog post!',
 'tags': ['mongodb', 'python', 'pymongo'],
 'date': datetime.datetime(2021, 1, 16, 9, 21, 7, 637000)}

<IPython.core.display.Javascript object>

## Inserting Multiple Documents 

In [27]:
new_posts = [
    {
        "author": "Mike",
        "text": "Another post!",
        "tags": ["bulk", "insert"],
        "date": datetime.datetime(2009, 11, 12, 11, 14),
    },
    {
        "author": "Eliot",
        "title": "MongoDB is fun",
        "text": "and pretty easy too!",
        "date": datetime.datetime(2009, 11, 10, 10, 45),
    },
]
result = posts.insert_many(new_posts)

<IPython.core.display.Javascript object>

In [28]:
result.inserted_ids

[ObjectId('6002b004370550a4f2c36340'), ObjectId('6002b004370550a4f2c36341')]

<IPython.core.display.Javascript object>

In [29]:
list(posts.find())

[{'_id': ObjectId('6002b003370550a4f2c3633f'),
  'author': 'Mike',
  'text': 'My first blog post!',
  'tags': ['mongodb', 'python', 'pymongo'],
  'date': datetime.datetime(2021, 1, 16, 9, 21, 7, 637000)},
 {'_id': ObjectId('6002b004370550a4f2c36340'),
  'author': 'Mike',
  'text': 'Another post!',
  'tags': ['bulk', 'insert'],
  'date': datetime.datetime(2009, 11, 12, 11, 14)},
 {'_id': ObjectId('6002b004370550a4f2c36341'),
  'author': 'Eliot',
  'title': 'MongoDB is fun',
  'text': 'and pretty easy too!',
  'date': datetime.datetime(2009, 11, 10, 10, 45)}]

<IPython.core.display.Javascript object>

In [30]:
for post in posts.find():
    print(post)

{'_id': ObjectId('6002b003370550a4f2c3633f'), 'author': 'Mike', 'text': 'My first blog post!', 'tags': ['mongodb', 'python', 'pymongo'], 'date': datetime.datetime(2021, 1, 16, 9, 21, 7, 637000)}
{'_id': ObjectId('6002b004370550a4f2c36340'), 'author': 'Mike', 'text': 'Another post!', 'tags': ['bulk', 'insert'], 'date': datetime.datetime(2009, 11, 12, 11, 14)}
{'_id': ObjectId('6002b004370550a4f2c36341'), 'author': 'Eliot', 'title': 'MongoDB is fun', 'text': 'and pretty easy too!', 'date': datetime.datetime(2009, 11, 10, 10, 45)}


<IPython.core.display.Javascript object>

## Counting 

In [31]:
posts.count_documents({})

3

<IPython.core.display.Javascript object>

In [32]:
posts.count_documents({"author": "Mike"})

2

<IPython.core.display.Javascript object>

## Range Queries 

In [33]:
d = datetime.datetime(2009, 11, 12, 12)
for post in posts.find({"date": {"$lt": d}}).sort("author"):
    print(post)

{'_id': ObjectId('6002b004370550a4f2c36341'), 'author': 'Eliot', 'title': 'MongoDB is fun', 'text': 'and pretty easy too!', 'date': datetime.datetime(2009, 11, 10, 10, 45)}
{'_id': ObjectId('6002b004370550a4f2c36340'), 'author': 'Mike', 'text': 'Another post!', 'tags': ['bulk', 'insert'], 'date': datetime.datetime(2009, 11, 12, 11, 14)}


<IPython.core.display.Javascript object>

## Indexing 

In [34]:
result = db.profiles.create_index([("user_id", pymongo.ASCENDING)], unique=True)

<IPython.core.display.Javascript object>

In [35]:
sorted(list(db.profiles.index_information()))

['_id_', 'user_id_1']

<IPython.core.display.Javascript object>

Notice that we have two indexes now: one is the index on _id that MongoDB creates automatically, and the other is the index on user_id we just created.

In [36]:
user_profiles = [{"user_id": 211, "name": "Luke"}, {"user_id": 212, "name": "Ziltoid"}]
result = db.profiles.insert_many(user_profiles)

<IPython.core.display.Javascript object>

In [37]:
duplicate_profile = {"user_id": 212, "name": "Tommy"}

<IPython.core.display.Javascript object>

In [38]:
result = db.profiles.insert_one(duplicate_profile)  # DuplicateKeyError

DuplicateKeyError: E11000 duplicate key error collection: test_database.profiles index: user_id_1 dup key: { user_id: 212 }, full error: {'index': 0, 'code': 11000, 'keyPattern': {'user_id': 1}, 'keyValue': {'user_id': 212}, 'errmsg': 'E11000 duplicate key error collection: test_database.profiles index: user_id_1 dup key: { user_id: 212 }'}

<IPython.core.display.Javascript object>

## MongoEngine 

In [39]:
import datetime
from mongoengine import *

<IPython.core.display.Javascript object>

In [40]:
client = connect(db="health")

<IPython.core.display.Javascript object>

In [41]:
from src.data.health_schema import *

<IPython.core.display.Javascript object>

This looks similar to how the structure of a table would be defined in a regular ORM. The key difference is that this schema will never be passed on to MongoDB — this will only be enforced at the application level, making future changes easy to manage.

In [42]:
client.list_database_names()

['admin', 'config', 'local', 'test_database']

<IPython.core.display.Javascript object>

In [43]:
company = Company(name="pfizer")

<IPython.core.display.Javascript object>

In [44]:
company.save()

<Company: Company object>

<IPython.core.display.Javascript object>

In [45]:
client.list_database_names()

['admin', 'config', 'health', 'local', 'test_database']

<IPython.core.display.Javascript object>

In [46]:
for comp in Company.objects():
    print(comp.name)

pfizer


<IPython.core.display.Javascript object>

## Examples 

In [47]:
import datetime

from mongoengine import connect, get_connection, disconnect_all

from src.data import initialize_db
from src.utils import drop_db

<IPython.core.display.Javascript object>

In [48]:
disconnect_all()

<IPython.core.display.Javascript object>

In [49]:
client = connect("examples", host="mongodb://127.0.0.1:27017")

<IPython.core.display.Javascript object>

In [50]:
drop_db(client, "examples")

<IPython.core.display.Javascript object>

In [51]:
initialize_db()

<IPython.core.display.Javascript object>

<div style="direction:rtl">
دکتر هایی که تخصص آنها پاتولوژی است.
</div>

In [52]:
list(client.examples.doctor.find({"specialty": "Pathology"}))

[{'_id': ObjectId('6002b013370550a4f2c363de'),
  'national_id': '6845909022',
  'first_name': 'Tiffany',
  'last_name': 'Gamble',
  'specialty': 'Pathology',
  'background': 55},
 {'_id': ObjectId('6002b014370550a4f2c364b0'),
  'national_id': '6684482373',
  'first_name': 'Courtney',
  'last_name': 'Hayes',
  'specialty': 'Pathology',
  'background': 55},
 {'_id': ObjectId('6002b014370550a4f2c364d3'),
  'national_id': '2817751239',
  'first_name': 'Nathan',
  'last_name': 'Hart',
  'specialty': 'Pathology',
  'background': 33},
 {'_id': ObjectId('6002b014370550a4f2c364f6'),
  'national_id': '0127622398',
  'first_name': 'Jacqueline',
  'last_name': 'Torres',
  'specialty': 'Pathology',
  'background': 65}]

<IPython.core.display.Javascript object>

<div style="direction:rtl">
دکتر هایی که بالای 70 سال سابقه کار دارند.
</div>

In [53]:
list(client.examples.doctor.find({"background": {"$gt": 70}}))

[{'_id': ObjectId('6002b013370550a4f2c36352'),
  'national_id': '7346706562',
  'first_name': 'Mary',
  'last_name': 'Calhoun',
  'specialty': 'Otolaryngologic',
  'background': 77},
 {'_id': ObjectId('6002b013370550a4f2c3637f'),
  'national_id': '1383670241',
  'first_name': 'Bradley',
  'last_name': 'Hall',
  'specialty': 'Radiography',
  'background': 77},
 {'_id': ObjectId('6002b013370550a4f2c363a2'),
  'national_id': '4335048889',
  'first_name': 'Sara',
  'last_name': 'Moore',
  'specialty': 'Dentistry',
  'background': 74},
 {'_id': ObjectId('6002b013370550a4f2c363e8'),
  'national_id': '9915668633',
  'first_name': 'Jonathon',
  'last_name': 'Carter',
  'specialty': 'SpeechPathology',
  'background': 77},
 {'_id': ObjectId('6002b013370550a4f2c363f2'),
  'national_id': '3820236848',
  'first_name': 'Theresa',
  'last_name': 'Jacobson',
  'specialty': 'Urologic',
  'background': 73},
 {'_id': ObjectId('6002b014370550a4f2c3643d'),
  'national_id': '4346066643',
  'first_name': 'Li

<IPython.core.display.Javascript object>

<div style="direction:rtl">
قیمت فروش و نام دارو هایی که دارای 6 کربن و 4 هیدروژن هستند.
</div>

In [54]:
list(
    client.examples.sale.aggregate(
        [
            {
                "$lookup": {
                    "from": "drug",
                    "localField": "drug_id",
                    "foreignField": "_id",
                    "as": "drug",
                }
            },
            {
                "$lookup": {
                    "from": "pharmacy",
                    "localField": "pharmacy_id",
                    "foreignField": "_id",
                    "as": "pharmacy",
                }
            },
            {"$unwind": "$drug"},
            {"$unwind": "$pharmacy"},
            {
                "$match": {
                    "$and": [
                        {"drug.formula": {"$regex": ".*C6.*"}},
                        {"drug.formula": {"$regex": ".*H4.*"}},
                    ]
                }
            },
            {"$project": {"_id": 0, "price": 1, "drug.name": 1}},
        ]
    )
)

[{'price': 389.07453649748, 'drug': {'name': 'Botulinum Toxin Type A'}},
 {'price': 230.48871395916348, 'drug': {'name': 'Botulinum Toxin Type A'}}]

<IPython.core.display.Javascript object>

<div style="direction:rtl">
نام دارو هایی که دارای 6 کربن و 4 هیدروژن هستند و فروخته شده اند.
</div>

In [55]:
list(
    client.examples.sale.aggregate(
        [
            {
                "$lookup": {
                    "from": "drug",
                    "localField": "drug_id",
                    "foreignField": "_id",
                    "as": "drug",
                }
            },
            {
                "$lookup": {
                    "from": "pharmacy",
                    "localField": "pharmacy_id",
                    "foreignField": "_id",
                    "as": "pharmacy",
                }
            },
            {"$unwind": "$drug"},
            {"$unwind": "$pharmacy"},
            {
                "$match": {
                    "$and": [
                        {"drug.formula": {"$regex": ".*C6.*"}},
                        {"drug.formula": {"$regex": ".*H4.*"}},
                    ]
                }
            },
            {"$project": {"_id": 0, "drug.name": 1}},
            {"$replaceRoot": {"newRoot": "$drug"}},
        ]
    )
)

[{'name': 'Botulinum Toxin Type A'}, {'name': 'Botulinum Toxin Type A'}]

<IPython.core.display.Javascript object>

<div style="direction:rtl">
تعداد داروهایی که بعد از تاریخ datetime.datetime(2020, 12, 21, 0, 0) تجویز شده اند
</div>

In [56]:
list(
    client.examples.prescription.aggregate(
        [
            {"$match": {"date": {"$gt": datetime.datetime(2020, 12, 21, 0, 0)}}},
            {"$unwind": "$items"},
            {"$group": {"_id": None, "sumdaroo": {"$sum": "$items.quantity"}}},
        ]
    )
)[0]

{'_id': None, 'sumdaroo': 378}

<IPython.core.display.Javascript object>