In [19]:
import pymongo

In [21]:
from pymongo import MongoClient
client = MongoClient()

# Explicit host and port
# client = MongoClient("localhost", 27017)
# client = MongoClient("mongodb://localhost:27017/")

client

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)

In [22]:
db = client["bigdata-database"]
db

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'bigdata-database')

In [23]:
collection = db["test-collection"]
collection

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'bigdata-database'), 'test-collection')

In [24]:
import datetime

post = {
    "author": "Mike",
    "text": "My first blog post!",
    "tags": ["mongodb", "python", "pymongo"],
    "date": datetime.datetime.now(tz=datetime.timezone.utc),
}

# Insert a single document
posts = db.posts
post_id = posts.insert_one(post).inserted_id
post_id

ObjectId('6819f621ecf57e05415f6ae1')

In [25]:
db.list_collection_names()

['posts']

In [26]:
import pprint

pprint.pprint(posts.find_one())

{'_id': ObjectId('6819f621ecf57e05415f6ae1'),
 'author': 'Mike',
 'date': datetime.datetime(2025, 5, 6, 11, 44, 33, 533000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}


In [27]:
pprint.pprint(posts.find_one({"author": "Mike"}))

{'_id': ObjectId('6819f621ecf57e05415f6ae1'),
 'author': 'Mike',
 'date': datetime.datetime(2025, 5, 6, 11, 44, 33, 533000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}


In [29]:
print(post_id)
pprint.pprint(posts.find_one({"_id": post_id}))

6819f621ecf57e05415f6ae1
{'_id': ObjectId('6819f621ecf57e05415f6ae1'),
 'author': 'Mike',
 'date': datetime.datetime(2025, 5, 6, 11, 44, 33, 533000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}


In [34]:
from bson.objectid import ObjectId

post_id_as_str = str(post_id)

print(posts.find_one({"_id": post_id_as_str}))  # No result

print(posts.find_one({'_id': ObjectId(post_id_as_str)}))

None
{'_id': ObjectId('6819f621ecf57e05415f6ae1'), 'author': 'Mike', 'text': 'My first blog post!', 'tags': ['mongodb', 'python', 'pymongo'], 'date': datetime.datetime(2025, 5, 6, 11, 44, 33, 533000)}


In [35]:
new_posts = [
    {
        "author": "Mike",
        "text": "Another post!",
        "tags": ["bulk", "insert"],
        "date": datetime.datetime(2009, 11, 12, 11, 14),
    },
    {
        "author": "Eliot",
        "title": "MongoDB is fun",
        "text": "and pretty easy too!",
        "date": datetime.datetime(2009, 11, 10, 10, 45),
    },
]

result = posts.insert_many(new_posts)
result.inserted_ids

[ObjectId('6819f802ecf57e05415f6ae2'), ObjectId('6819f802ecf57e05415f6ae3')]

In [37]:
for post in posts.find():
    pprint.pprint(post)

{'_id': ObjectId('6819f621ecf57e05415f6ae1'),
 'author': 'Mike',
 'date': datetime.datetime(2025, 5, 6, 11, 44, 33, 533000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}
{'_id': ObjectId('6819f802ecf57e05415f6ae2'),
 'author': 'Mike',
 'date': datetime.datetime(2009, 11, 12, 11, 14),
 'tags': ['bulk', 'insert'],
 'text': 'Another post!'}
{'_id': ObjectId('6819f802ecf57e05415f6ae3'),
 'author': 'Eliot',
 'date': datetime.datetime(2009, 11, 10, 10, 45),
 'text': 'and pretty easy too!',
 'title': 'MongoDB is fun'}


In [38]:
for post in posts.find({"author": "Mike"}):
    pprint.pprint(post)

{'_id': ObjectId('6819f621ecf57e05415f6ae1'),
 'author': 'Mike',
 'date': datetime.datetime(2025, 5, 6, 11, 44, 33, 533000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}
{'_id': ObjectId('6819f802ecf57e05415f6ae2'),
 'author': 'Mike',
 'date': datetime.datetime(2009, 11, 12, 11, 14),
 'tags': ['bulk', 'insert'],
 'text': 'Another post!'}


In [39]:
posts.count_documents({"author": "Mike"})

2

In [41]:
d = datetime.datetime(2009, 11, 12, 12)
for post in posts.find({"date": {"$lt": d}}).sort("date"):
    pprint.pprint(post)

{'_id': ObjectId('6819f802ecf57e05415f6ae3'),
 'author': 'Eliot',
 'date': datetime.datetime(2009, 11, 10, 10, 45),
 'text': 'and pretty easy too!',
 'title': 'MongoDB is fun'}
{'_id': ObjectId('6819f802ecf57e05415f6ae2'),
 'author': 'Mike',
 'date': datetime.datetime(2009, 11, 12, 11, 14),
 'tags': ['bulk', 'insert'],
 'text': 'Another post!'}


# Indexing

In [42]:
result = db.profiles.create_index([("user_id", pymongo.ASCENDING)], unique=True)
sorted(list(db.profiles.index_information()))

['_id_', 'user_id_1']

In [43]:
user_profiles = [{"user_id": 211, "name": "Luke"}, {"user_id": 212, "name": "Ziltoid"}]

result = db.profiles.insert_many(user_profiles)

In [None]:
new_profile = {"user_id": 213, "name": "Drew"}

duplicate_profile = {"user_id": 212, "name": "Tommy"}

result = db.profiles.insert_one(new_profile)  # This is fine.

result = db.profiles.insert_one(duplicate_profile) # The index prevents us from inserting a document whose user_id is already in the collection

DuplicateKeyError: E11000 duplicate key error collection: bigdata-database.profiles index: user_id_1 dup key: { user_id: 212 }, full error: {'index': 0, 'code': 11000, 'errmsg': 'E11000 duplicate key error collection: bigdata-database.profiles index: user_id_1 dup key: { user_id: 212 }', 'keyPattern': {'user_id': 1}, 'keyValue': {'user_id': 212}}