In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
%load_ext autoreload
%autoreload 2

<IPython.core.display.Javascript object>

In [3]:
# !pip3 install pymongo
# !pip3 install mongoengine
# !pip3 install Faker

<IPython.core.display.Javascript object>

In [4]:
# Prepare environment for importing from src
import sys

sys.path.insert(0, "..")

<IPython.core.display.Javascript object>

## Establishing a Connection 

In [40]:
import pymongo

from pymongo import MongoClient

client = MongoClient("mongodb://127.0.0.1:27017")

<IPython.core.display.Javascript object>

## Accessing Databases 

In [13]:
client.drop_database("test_database")
client.drop_database("health")

<IPython.core.display.Javascript object>

In [14]:
print(client.list_database_names())

['admin', 'assignment', 'config', 'local']


<IPython.core.display.Javascript object>

In [15]:
db = client.test_database
# If your database name is such that using attribute style access won’t work (like test-database),
# you can use dictionary style access instead:
# db = client['test-database']

<IPython.core.display.Javascript object>

In [16]:
# "test_database" database not created yet:
print(client.list_database_names())

['admin', 'assignment', 'config', 'local']


<IPython.core.display.Javascript object>

## Getting a Collection

In [17]:
collection = db.test_collection
# collection = db['test-collection']

<IPython.core.display.Javascript object>

In [18]:
# "test_database" database not created yet
print(client.list_database_names())

['admin', 'assignment', 'config', 'local']


<IPython.core.display.Javascript object>

In [19]:
# "test_collection" collection not created yet
print(db.list_collection_names())

[]


<IPython.core.display.Javascript object>

An important note about collections (and databases) in MongoDB is that they are created lazily - none of the above commands have actually performed any operations on the MongoDB server. Collections and databases are created when the first document is inserted into them.

## Inserting Documents 

Data in MongoDB is represented (and stored) using JSON-style documents. In PyMongo we use dictionaries to represent documents.

In [20]:
import datetime

post = {
    "author": "Mike",
    "text": "My first blog post!",
    "tags": ["mongodb", "python", "pymongo"],
    "date": datetime.datetime.utcnow(),
}

<IPython.core.display.Javascript object>

Note that documents can contain native Python types (like datetime.datetime instances) which will be automatically converted to and from the appropriate BSON types.

In [21]:
# Get posts collection
posts = db.posts

<IPython.core.display.Javascript object>

In [22]:
result = posts.insert_one(post)

<IPython.core.display.Javascript object>

In [23]:
# "test_database" database is finally created!
print(client.list_database_names())

['admin', 'assignment', 'config', 'local', 'test_database']


<IPython.core.display.Javascript object>

In [24]:
# "posts" collection is finally created!
print(db.list_collection_names())

['posts']


<IPython.core.display.Javascript object>

In [25]:
result.inserted_id

ObjectId('5ff49c498f64d81a9581670d')

<IPython.core.display.Javascript object>

In [26]:
post_id = result.inserted_id

<IPython.core.display.Javascript object>

When a document is inserted a special key, "_id", is automatically added if the document doesn’t already contain an "_id" key. The value of "_id" must be unique across the collection. insert_one() returns an instance of InsertOneResult.

## Retrieving Documents 

In [27]:
posts.find_one()

{'_id': ObjectId('5ff49c498f64d81a9581670d'),
 'author': 'Mike',
 'text': 'My first blog post!',
 'tags': ['mongodb', 'python', 'pymongo'],
 'date': datetime.datetime(2021, 1, 5, 17, 5, 11, 548000)}

<IPython.core.display.Javascript object>

In [28]:
posts.find_one({"author": "Mike"})

{'_id': ObjectId('5ff49c498f64d81a9581670d'),
 'author': 'Mike',
 'text': 'My first blog post!',
 'tags': ['mongodb', 'python', 'pymongo'],
 'date': datetime.datetime(2021, 1, 5, 17, 5, 11, 548000)}

<IPython.core.display.Javascript object>

In [29]:
posts.find_one({"author": "Ali"})

<IPython.core.display.Javascript object>

In [30]:
posts.find_one({"_id": post_id})

{'_id': ObjectId('5ff49c498f64d81a9581670d'),
 'author': 'Mike',
 'text': 'My first blog post!',
 'tags': ['mongodb', 'python', 'pymongo'],
 'date': datetime.datetime(2021, 1, 5, 17, 5, 11, 548000)}

<IPython.core.display.Javascript object>

In [31]:
post_id_str = str(post_id)
posts.find_one({"_id": post_id_str})

<IPython.core.display.Javascript object>

In [32]:
from bson.objectid import ObjectId

posts.find_one({"_id": ObjectId(post_id_str)})

{'_id': ObjectId('5ff49c498f64d81a9581670d'),
 'author': 'Mike',
 'text': 'My first blog post!',
 'tags': ['mongodb', 'python', 'pymongo'],
 'date': datetime.datetime(2021, 1, 5, 17, 5, 11, 548000)}

<IPython.core.display.Javascript object>

## Inserting Multiple Documents 

In [33]:
new_posts = [
    {
        "author": "Mike",
        "text": "Another post!",
        "tags": ["bulk", "insert"],
        "date": datetime.datetime(2009, 11, 12, 11, 14),
    },
    {
        "author": "Eliot",
        "title": "MongoDB is fun",
        "text": "and pretty easy too!",
        "date": datetime.datetime(2009, 11, 10, 10, 45),
    },
]
result = posts.insert_many(new_posts)

<IPython.core.display.Javascript object>

In [34]:
result.inserted_ids

[ObjectId('5ff49c5f8f64d81a9581670e'), ObjectId('5ff49c5f8f64d81a9581670f')]

<IPython.core.display.Javascript object>

In [35]:
for post in posts.find():
    print(post)

{'_id': ObjectId('5ff49c498f64d81a9581670d'), 'author': 'Mike', 'text': 'My first blog post!', 'tags': ['mongodb', 'python', 'pymongo'], 'date': datetime.datetime(2021, 1, 5, 17, 5, 11, 548000)}
{'_id': ObjectId('5ff49c5f8f64d81a9581670e'), 'author': 'Mike', 'text': 'Another post!', 'tags': ['bulk', 'insert'], 'date': datetime.datetime(2009, 11, 12, 11, 14)}
{'_id': ObjectId('5ff49c5f8f64d81a9581670f'), 'author': 'Eliot', 'title': 'MongoDB is fun', 'text': 'and pretty easy too!', 'date': datetime.datetime(2009, 11, 10, 10, 45)}


<IPython.core.display.Javascript object>

## Counting 

In [36]:
posts.count_documents({})

3

<IPython.core.display.Javascript object>

In [37]:
posts.count_documents({"author": "Mike"})

2

<IPython.core.display.Javascript object>

## Range Queries 

In [38]:
d = datetime.datetime(2009, 11, 12, 12)
for post in posts.find({"date": {"$lt": d}}).sort("author"):
    print(post)

{'_id': ObjectId('5ff49c5f8f64d81a9581670f'), 'author': 'Eliot', 'title': 'MongoDB is fun', 'text': 'and pretty easy too!', 'date': datetime.datetime(2009, 11, 10, 10, 45)}
{'_id': ObjectId('5ff49c5f8f64d81a9581670e'), 'author': 'Mike', 'text': 'Another post!', 'tags': ['bulk', 'insert'], 'date': datetime.datetime(2009, 11, 12, 11, 14)}


<IPython.core.display.Javascript object>

## Indexing 

In [41]:
result = db.profiles.create_index([("user_id", pymongo.ASCENDING)], unique=True)

<IPython.core.display.Javascript object>

In [42]:
sorted(list(db.profiles.index_information()))

['_id_', 'user_id_1']

<IPython.core.display.Javascript object>

Notice that we have two indexes now: one is the index on _id that MongoDB creates automatically, and the other is the index on user_id we just created.

In [43]:
user_profiles = [{"user_id": 211, "name": "Luke"}, {"user_id": 212, "name": "Ziltoid"}]
result = db.profiles.insert_many(user_profiles)

<IPython.core.display.Javascript object>

In [44]:
duplicate_profile = {"user_id": 212, "name": "Tommy"}

<IPython.core.display.Javascript object>

In [45]:
result = db.profiles.insert_one(duplicate_profile) # DuplicateKeyError

DuplicateKeyError: E11000 duplicate key error collection: test_database.profiles index: user_id_1 dup key: { user_id: 212 }, full error: {'index': 0, 'code': 11000, 'keyPattern': {'user_id': 1}, 'keyValue': {'user_id': 212}, 'errmsg': 'E11000 duplicate key error collection: test_database.profiles index: user_id_1 dup key: { user_id: 212 }'}

<IPython.core.display.Javascript object>

## MongoEngine 

In [46]:
import datetime
from mongoengine import *

<IPython.core.display.Javascript object>

In [47]:
client = connect(db="health")

<IPython.core.display.Javascript object>

In [48]:
from src.data.health_schema import *

<IPython.core.display.Javascript object>

This looks similar to how the structure of a table would be defined in a regular ORM. The key difference is that this schema will never be passed on to MongoDB — this will only be enforced at the application level, making future changes easy to manage.

In [49]:
client.list_database_names()

['admin', 'assignment', 'config', 'local', 'test_database']

<IPython.core.display.Javascript object>

In [50]:
company = Company(name="pfizer")

<IPython.core.display.Javascript object>

In [51]:
company.save()

<Company: Company object>

<IPython.core.display.Javascript object>

In [52]:
client.list_database_names()

['admin', 'assignment', 'config', 'health', 'local', 'test_database']

<IPython.core.display.Javascript object>

In [53]:
for comp in Company.objects():
    print(comp.name)

pfizer


<IPython.core.display.Javascript object>