In [33]:
%load_ext nb_black

The nb_black extension is already loaded. To reload it, use:
  %reload_ext nb_black


<IPython.core.display.Javascript object>

In [2]:
%load_ext autoreload
%autoreload 2

<IPython.core.display.Javascript object>

In [4]:
# !pip3 install pymongo
# !pip3 install mongoengine
# !pip3 install Faker

<IPython.core.display.Javascript object>

In [17]:
# Prepare environment for importing from src
import sys

sys.path.insert(0, "..")

<IPython.core.display.Javascript object>

## Establishing a Connection 

In [5]:
from pymongo import MongoClient

client = MongoClient("mongodb://127.0.0.1:27017")

<IPython.core.display.Javascript object>

## Accessing Databases 

In [12]:
print(client.list_database_names())

['admin', 'config', 'local']


In [20]:
db = client.test_database
# If your database name is such that using attribute style access won’t work (like test-database),
# you can use dictionary style access instead:
# db = client['test-database']

In [24]:
# "test_database" database not created yet:
print(client.list_database_names())

['admin', 'config', 'local']


## Getting a Collection

In [25]:
collection = db.test_collection
# collection = db['test-collection']

In [26]:
# "test_database" database not created yet
print(client.list_database_names())

['admin', 'config', 'local']


In [None]:
# "test_collection" collection not created yet
print(db.list_collection_names())

An important note about collections (and databases) in MongoDB is that they are created lazily - none of the above commands have actually performed any operations on the MongoDB server. Collections and databases are created when the first document is inserted into them.

## Inserting Documents 

Data in MongoDB is represented (and stored) using JSON-style documents. In PyMongo we use dictionaries to represent documents.

In [27]:
import datetime
post = {"author": "Mike",
         "text": "My first blog post!",
         "tags": ["mongodb", "python", "pymongo"],
         "date": datetime.datetime.utcnow()}

Note that documents can contain native Python types (like datetime.datetime instances) which will be automatically converted to and from the appropriate BSON types.

In [28]:
# Get posts collection
posts = db.posts

In [29]:
result = posts.insert_one(post)

In [31]:
# "test_database" database is finally created!
print(client.list_database_names())

['admin', 'config', 'local', 'test_database']


In [32]:
# "posts" collection is finally created!
print(db.list_collection_names())

['posts']


In [38]:
result.inserted_id

ObjectId('5fe1b02c1085889e2e19e467')

In [43]:
post_id = result.inserted_id

When a document is inserted a special key, "_id", is automatically added if the document doesn’t already contain an "_id" key. The value of "_id" must be unique across the collection. insert_one() returns an instance of InsertOneResult.

## Retrieving Documents 

In [39]:
posts.find_one()

{'_id': ObjectId('5fe1b02c1085889e2e19e467'),
 'author': 'Mike',
 'text': 'My first blog post!',
 'tags': ['mongodb', 'python', 'pymongo'],
 'date': datetime.datetime(2020, 12, 22, 8, 34, 21, 162000)}

In [40]:
posts.find_one({"author": "Mike"})

{'_id': ObjectId('5fe1b02c1085889e2e19e467'),
 'author': 'Mike',
 'text': 'My first blog post!',
 'tags': ['mongodb', 'python', 'pymongo'],
 'date': datetime.datetime(2020, 12, 22, 8, 34, 21, 162000)}

In [42]:
posts.find_one({"author": "Ali"})

In [44]:
posts.find_one({"_id": post_id})

{'_id': ObjectId('5fe1b02c1085889e2e19e467'),
 'author': 'Mike',
 'text': 'My first blog post!',
 'tags': ['mongodb', 'python', 'pymongo'],
 'date': datetime.datetime(2020, 12, 22, 8, 34, 21, 162000)}

In [47]:
post_id_str = str(post_id)
posts.find_one({"_id": post_id_str})

In [48]:
from bson.objectid import ObjectId
posts.find_one({"_id": ObjectId(post_id_str)})

{'_id': ObjectId('5fe1b02c1085889e2e19e467'),
 'author': 'Mike',
 'text': 'My first blog post!',
 'tags': ['mongodb', 'python', 'pymongo'],
 'date': datetime.datetime(2020, 12, 22, 8, 34, 21, 162000)}

## Inserting Multiple Documents 

In [49]:
new_posts = [{"author": "Mike",
               "text": "Another post!",
               "tags": ["bulk", "insert"],
               "date": datetime.datetime(2009, 11, 12, 11, 14)},
              {"author": "Eliot",
               "title": "MongoDB is fun",
               "text": "and pretty easy too!",
               "date": datetime.datetime(2009, 11, 10, 10, 45)}]
result = posts.insert_many(new_posts)

In [51]:
result.inserted_ids

[ObjectId('5fe1b78e1085889e2e19e468'), ObjectId('5fe1b78e1085889e2e19e469')]

In [55]:
for post in posts.find():
    print(post)

{'_id': ObjectId('5fe1b02c1085889e2e19e467'), 'author': 'Mike', 'text': 'My first blog post!', 'tags': ['mongodb', 'python', 'pymongo'], 'date': datetime.datetime(2020, 12, 22, 8, 34, 21, 162000)}
{'_id': ObjectId('5fe1b78e1085889e2e19e468'), 'author': 'Mike', 'text': 'Another post!', 'tags': ['bulk', 'insert'], 'date': datetime.datetime(2009, 11, 12, 11, 14)}
{'_id': ObjectId('5fe1b78e1085889e2e19e469'), 'author': 'Eliot', 'title': 'MongoDB is fun', 'text': 'and pretty easy too!', 'date': datetime.datetime(2009, 11, 10, 10, 45)}


## Counting 

In [57]:
posts.count_documents({})

3

In [58]:
posts.count_documents({"author": "Mike"})

2

## Range Queries 

In [59]:
d = datetime.datetime(2009, 11, 12, 12)
for post in posts.find({"date": {"$lt": d}}).sort("author"):
    print(post)

{'_id': ObjectId('5fe1b78e1085889e2e19e469'), 'author': 'Eliot', 'title': 'MongoDB is fun', 'text': 'and pretty easy too!', 'date': datetime.datetime(2009, 11, 10, 10, 45)}
{'_id': ObjectId('5fe1b78e1085889e2e19e468'), 'author': 'Mike', 'text': 'Another post!', 'tags': ['bulk', 'insert'], 'date': datetime.datetime(2009, 11, 12, 11, 14)}


## Indexing 

In [62]:
result = db.profiles.create_index([('user_id', pymongo.ASCENDING)], unique=True)

In [63]:
sorted(list(db.profiles.index_information()))

['_id_', 'user_id_1']

Notice that we have two indexes now: one is the index on _id that MongoDB creates automatically, and the other is the index on user_id we just created.

In [64]:
user_profiles = [
     {'user_id': 211, 'name': 'Luke'},
     {'user_id': 212, 'name': 'Ziltoid'}]
result = db.profiles.insert_many(user_profiles)

In [65]:
duplicate_profile = {'user_id': 212, 'name': 'Tommy'}

In [66]:
result = db.profiles.insert_one(duplicate_profile)

DuplicateKeyError: E11000 duplicate key error collection: test_database.profiles index: user_id_1 dup key: { user_id: 212 }, full error: {'index': 0, 'code': 11000, 'keyPattern': {'user_id': 1}, 'keyValue': {'user_id': 212}, 'errmsg': 'E11000 duplicate key error collection: test_database.profiles index: user_id_1 dup key: { user_id: 212 }'}

## MongoEngine 

In [74]:
import datetime
from mongoengine import *

<IPython.core.display.Javascript object>

In [7]:
client = connect("health")

<IPython.core.display.Javascript object>

In [13]:
class Doctor(Document):
    national_id = StringField(
        min_length=10, max_length=10, required=True, primary_key=True
    )
    first_name = StringField(max_length=50, required=True)
    last_name = StringField(max_length=50, required=True)
    specialty = StringField(max_length=50, required=True)
    background = IntField(min_value=0, max_value=100, required=True)


class Patient(Document):
    national_id = StringField(
        min_length=10, max_length=10, required=True, primary_key=True
    )
    first_name = StringField(max_length=50, required=True)
    last_name = StringField(max_length=50, required=True)
    address = StringField(max_length=256)
    birthdate = DateField(required=True)
    password = StringField(max_length=256, required=True)
    main_doctor = ReferenceField(Doctor, required=True)


class Pharmacy(Document):
    name = StringField(max_length=256, required=True)
    address = StringField(max_length=256)
    telephone = StringField(max_length=256)


class Company(Document):
    name = StringField(max_length=50, required=True, primary_key=True)
    telephone = StringField(max_length=15)


class Contract(Document):
    text = StringField()
    start_date = DateField(required=True)
    end_date = DateField(required=True)
    pharmacy = ReferenceField(Pharmacy, required=True)
    company = ReferenceField(Company, required=True)


class Drug(Document):
    name = StringField(
        max_length=50,
        required=True,
        unique_with="company",
    )
    company = ReferenceField(Company, required=True)
    formula = StringField(max_length=100, required=True)


class Sale(Document):
    drug = ReferenceField(Drug, required=True, unique_with="store")
    store = ReferenceField(Pharmacy, required=True)
    price = FloatField(required=True)


class PrescriptionItem(EmbeddedDocument):
    drugs = ReferenceField(Drug, required=True)
    quantity = IntField(min_value=0, required=True)


class Prescription(Document):
    date = DateField(required=True)
    items = EmbeddedDocumentListField(PrescriptionItem, required=True)
    doctor = ReferenceField(Doctor, required=True)
    patient = ReferenceField(Patient, required=True)

<IPython.core.display.Javascript object>

This looks similar to how the structure of a table would be defined in a regular ORM. The key difference is that this schema will never be passed on to MongoDB — this will only be enforced at the application level, making future changes easy to manage.

In [9]:
client.list_database_names()

['admin', 'config', 'local', 'test_database']

<IPython.core.display.Javascript object>

In [14]:
company = Company(name="pfizer")

<IPython.core.display.Javascript object>

In [15]:
company.save()

<Company: Company object>

<IPython.core.display.Javascript object>

In [16]:
client.list_database_names()

['admin', 'config', 'health', 'local', 'test_database']

<IPython.core.display.Javascript object>

In [25]:
for comp in Company.objects():
    print(comp.name)

pfizer


<IPython.core.display.Javascript object>