In [None]:
# https://realpython.com/introduction-to-mongodb-and-python/

In [1]:
import pymongo

In [2]:
# establish connection 
from pymongo import MongoClient
client = MongoClient()
client
# (establishes a connection to the default host (localhost) and port (27017))

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)

In [3]:
# OR can provide custom host and port:
client = MongoClient(host="localhost", port=27017)

In [4]:
# OR use MongoDB URI format
client = MongoClient("mongodb://localhost:27017")

In [5]:
# define db you want to use (use dot notation)
db = client.rptutorials

If db doesn't exisit in above then it creates it for you.

In [6]:
# OR you can call by this:
db = client["rptutorials"]

Create documents and insert

In [7]:
# create dictionaries to create documents
tutorial1 = {
     "title": "Working With JSON Data in Python",
     "author": "Lucas",
     "contributors": [
         "Aldren",
         "Dan",
         "Joanna"
     ],
     "url": "https://realpython.com/python-json/"
 }

In [8]:
# Specify which collection you want to use by using dot notation on db
tutorial = db.tutorial
tutorial

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'rptutorials'), 'tutorial')

In this case tutorial is an instance of Collection and represents a physical collection of documents in your database. You can insert documents into tutorial by calling .insert_one() on it with a document as an argument:

In [9]:
result = tutorial.insert_one(tutorial1)
result

<pymongo.results.InsertOneResult at 0x7faf2a4c3300>

In [10]:
print(f"One tutorial: {result.inserted_id}")

One tutorial: 61787f0fd505378347f2bc95


If you have many documents to add to the database, then you can use .insert_many() to insert them in one go:

In [11]:
tutorial2 = {
     "title": "Python's Requests Library (Guide)",
     "author": "Alex",
     "contributors": [
         "Aldren",
         "Brad",
         "Joanna"
     ],
     "url": "https://realpython.com/python-requests/"
 }

tutorial3 = {
     "title": "Object-Oriented Programming (OOP) in Python 3",
     "author": "David",
     "contributors": [
        "Aldren",
         "Joanna",
         "Jacob"
     ],
     "url": "https://realpython.com/python3-object-oriented-programming/"
 }

new_result = tutorial.insert_many([tutorial2, tutorial3])
print(f"Multiple tutorials: {new_result.inserted_ids}")

Multiple tutorials: [ObjectId('61787fbed505378347f2bc96'), ObjectId('61787fbed505378347f2bc97')]


#### Retreive Documents

In [12]:
# you can use .find(). Without arguments, .find() returns a Cursor object that yields the documents in the collection on demand:

import pprint

for doc in tutorial.find():
    pprint.pprint(doc)

{'_id': ObjectId('617873e96b58f3ff3bbdf292'),
 'author': 'Jon',
 'contributors': ['Aldren', 'Geir Arne', 'Joanna', 'Jason'],
 'title': 'Reading and Writing CSV Files in Python',
 'url': 'https://realpython.com/python-csv/'}
{'_id': ObjectId('61787f0fd505378347f2bc95'),
 'author': 'Lucas',
 'contributors': ['Aldren', 'Dan', 'Joanna'],
 'title': 'Working With JSON Data in Python',
 'url': 'https://realpython.com/python-json/'}
{'_id': ObjectId('61787fbed505378347f2bc96'),
 'author': 'Alex',
 'contributors': ['Aldren', 'Brad', 'Joanna'],
 'title': "Python's Requests Library (Guide)",
 'url': 'https://realpython.com/python-requests/'}
{'_id': ObjectId('61787fbed505378347f2bc97'),
 'author': 'David',
 'contributors': ['Aldren', 'Joanna', 'Jacob'],
 'title': 'Object-Oriented Programming (OOP) in Python 3',
 'url': 'https://realpython.com/python3-object-oriented-programming/'}


In [13]:
# You can also use .find_one() to retrieve a single document. In this case, you can use a dictionary that contains fields to match.

import pprint

jon_tutorial = tutorial.find_one({"author": "Jon"})

pprint.pprint(jon_tutorial)

{'_id': ObjectId('617873e96b58f3ff3bbdf292'),
 'author': 'Jon',
 'contributors': ['Aldren', 'Geir Arne', 'Joanna', 'Jason'],
 'title': 'Reading and Writing CSV Files in Python',
 'url': 'https://realpython.com/python-csv/'}


#### Close Connections 
Establishing a connection to a MongoDB database is typically an expensive operation. If you have an application that constantly retrieves and manipulates data in a MongoDB database, then you probably don’t want to be opening and closing the connection all the time since this might affect your application’s performance.

In this kind of situation, you should keep your connection alive and only close it before exiting the application to clear all the acquired resources. You can close the connection by calling .close() on the MongoClient instance:

In [14]:
client.close()

Another situation is when you have an application that occasionally uses a MongoDB database. In this case, you might want to open the connection when needed and close it immediately after use for freeing the acquired resources. A consistent approach to this problem would be to use the with statement. Yes, MongoClient implements the context manager protocol:

In [15]:
import pprint
from pymongo import MongoClient

with MongoClient() as client:
    db = client.rptutorials
    for doc in db.tutorial.find():
        pprint.pprint(doc)

{'_id': ObjectId('617873e96b58f3ff3bbdf292'),
 'author': 'Jon',
 'contributors': ['Aldren', 'Geir Arne', 'Joanna', 'Jason'],
 'title': 'Reading and Writing CSV Files in Python',
 'url': 'https://realpython.com/python-csv/'}
{'_id': ObjectId('61787f0fd505378347f2bc95'),
 'author': 'Lucas',
 'contributors': ['Aldren', 'Dan', 'Joanna'],
 'title': 'Working With JSON Data in Python',
 'url': 'https://realpython.com/python-json/'}
{'_id': ObjectId('61787fbed505378347f2bc96'),
 'author': 'Alex',
 'contributors': ['Aldren', 'Brad', 'Joanna'],
 'title': "Python's Requests Library (Guide)",
 'url': 'https://realpython.com/python-requests/'}
{'_id': ObjectId('61787fbed505378347f2bc97'),
 'author': 'David',
 'contributors': ['Aldren', 'Joanna', 'Jacob'],
 'title': 'Object-Oriented Programming (OOP) in Python 3',
 'url': 'https://realpython.com/python3-object-oriented-programming/'}


If you use the with statement to handle your MongoDB client, then at the end of the with code block, the client’s .__exit__() method gets called, which at the same time closes the connection by calling .close().

## MongoEngine (ODM - object-document-mapper)

In [16]:
from mongoengine import connect
connect(db="rptutorials", host="localhost", port=27017)

# Here first set the database name db to "rptutorials", which is the name of the database you want to work in.

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True, read_preference=Primary())

Similar to ORMs, ODMs like MongoEngine provide a base or model class for you to define a document schema. In ORMs, that class is equivalent to a table, and its instances are equivalent to rows. In MongoEngine, the class is equivalent to a collection, and its instances are equivalent to documents.

To create a model, you need to subclass Document and provide the required fields as class attributes.

In [17]:
from mongoengine import Document, ListField, StringField, URLField

class Tutorial(Document):
    title = StringField(required=True, max_length=70)
    author = StringField(required=True, max_length=20)
    contributors = ListField(StringField(max_length=20))
    url = URLField(required=True)
    

With this model, you tell MongoEngine that you expect a Tutorial document to have a .title, an .author, a list of .contributors, and a .url. The base class, Document, uses that information along with the field types to validate the input data for you.

There are a few general parameters that you can use to validate fields. Here are some of the more commonly used parameters:

* **db_field** specifies a different field name.
* **required** ensures that the field is provided.
* **default** provides a default value for a given field if no value is given.
* **unique** ensures that no other document in the collection has the same value for this field.

To save a document to your database, you need to call .save() on a document object. If the document already exists, then all the changes will be applied to the existing document. If the document doesn’t exist, then it’ll be created.

In [18]:
tutorial1 = Tutorial(
    title="Beautiful Soup: Build a Web Scraper With Python",
    author="Martin",
    contributors=["Aldren", "Geir Arne", "Jaya", "Joanna", "Mike"],
    url="https://realpython.com/beautiful-soup-web-scraper-python/"
)

tutorial1.save()  # Insert the new tutorial

<Tutorial: Tutorial object>

In [20]:
# .save() insterst new document into collection by default, pymongo performs data validation when you call it.

# try to make mistake (no title)
tutorial2 = Tutorial()
tutorial2.author = "Alex"
tutorial2.contributors = ["Aldren", "Jon", "Joanna"]
tutorial2.url = "https://realpython.com/convert-python-string-to-int/"
tutorial2.save()

# will raise validation error

Each Document subclass has an .objects attribute that you can use to access the documents in the associated collection. For example, here’s how you can print the .title of all your current tutorials:

In [21]:
for doc in Tutorial.objects:
    print(doc.title)

Reading and Writing CSV Files in Python
Working With JSON Data in Python
Python's Requests Library (Guide)
Object-Oriented Programming (OOP) in Python 3
Beautiful Soup: Build a Web Scraper With Python


In [22]:
for doc in Tutorial.objects(author="Alex"):
    print(doc.title)

Python's Requests Library (Guide)
