### <span style=color:blue> Some examples of pymongo "CRUD" - inserts (create), find (read), updates, deletes  </span>

<span style=color:blue>These are all illustrated using pymongo.  A useful exercise would be to understand how to express all of these in mongosh.</span>

In [1]:
# my usual collection of package imports; not really using them in this notebook

import sys
import json
import csv
import yaml

import importlib

import pandas as pd
import numpy as np

import matplotlib as mpl
import matplotlib.pyplot as plt
import os
from dotenv import load_dotenv

from datetime import time
from datetime import date
from datetime import datetime
# with the above choices, the imported datetime.time(2023,07,01) is recognized
# from datetime import date
# from datetime import datetime

import pprint

import psycopg2
from sqlalchemy import create_engine, text as sql_text

# Create an utilities file util.py in a folder benchmarking and import it
# Note: I moved my util.py file into the directory "helper_functions", which 
#    seems like a better name
# sys.path.append('helper_functions/')
# import util as util
# import util

<span style=color:blue>Getting mongodb connection set up</span>

In [2]:
from pymongo import MongoClient

client = MongoClient()
# the default port for MongoDB is 27017
# could have written client = MongoClient("localhost", 27017)
#                 or client = MongoClient("mongodb://localhost:27017/")

<span style=color:blue>Creating a small database with 2 collections</span>

In [3]:
# set variable db_test to hold the test database in MongoDB, 
#    or create it if it doesn't already exist
db_test = client.test

# dropping the collections to have a fresh start
db_test.inventory.drop()
db_test.prices.drop()

print(db_test.list_collection_names())

# set up for creation of inventory and price_list collections in db_test
inv = db_test.inventory
pr = db_test.prices

# A collection is not actually created until it has at least one document
print()
print(db_test.list_collection_names())


inv_list = [{ "item": "journal", "qty": 25, "size": { "h": 14, "w": 21, "uom": "cm" }, "loc": "NY" },
            { "item": "journal", "qty": 50, "size": { "h": 14, "w": 21, "uom": "cm" }, "loc": "LA" },
            { "item": "notebook", "qty": 80, "size": { "h": 8.5, "w": 11, "uom": "in" }, "loc": "NY" },
            { "item": "notebook", "qty": 20, "size": { "h": 8.5, "w": 11, "uom": "in" }, "loc": "LA" },
            { "item": "notebook", "qty": 30, "size": { "h": 8.5, "w": 11, "uom": "in" }, "loc": "SF" },
            { "item": "bottle", "qty": 30, "size": { "h": 4, "w": 10, "uom": "in" }, "loc": "NY" },
            { "item": "bottle", "qty": 40, "size": { "h": 4, "w": 10, "uom": "in" }, "loc": "SF" },
            { "item": "paper", "qty": 100, "size": { "h": 8.5, "w": 11, "uom": "in" }, "loc": "NY" },
            { "item": "paper", "qty": 120, "size": { "h": 8.5, "w": 11, "uom": "in" }, "loc": "SF" },
            { "item": "planner", "qty": 75, "size": { "h": 22.85, "w": 30, "uom": "cm" }, "loc": "LA" }
            ]

price_list = [{"descrip": "journal", "price": 9.50},
              {"descrip": "notebook", "price": 7.44},
              {"descrip": "envelopes", "price": 6.75}
             ]

# bulk inserts in pymongo
inv.insert_many(inv_list)
pr.insert_many(price_list)

print()
print(db_test.list_collection_names())

print()
# if no condition, then return everything
invDocs = inv.find()
for doc in invDocs:
    pprint.pp(doc)

print()
prDocs = pr.find()
for doc in prDocs:
    pprint.pp(doc)

[]

[]

['prices', 'inventory']

{'_id': ObjectId('6654bf84978a669a87e16f1d'),
 'item': 'journal',
 'qty': 25,
 'size': {'h': 14, 'w': 21, 'uom': 'cm'},
 'loc': 'NY'}
{'_id': ObjectId('6654bf84978a669a87e16f1e'),
 'item': 'journal',
 'qty': 50,
 'size': {'h': 14, 'w': 21, 'uom': 'cm'},
 'loc': 'LA'}
{'_id': ObjectId('6654bf84978a669a87e16f1f'),
 'item': 'notebook',
 'qty': 80,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'NY'}
{'_id': ObjectId('6654bf84978a669a87e16f20'),
 'item': 'notebook',
 'qty': 20,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'LA'}
{'_id': ObjectId('6654bf84978a669a87e16f21'),
 'item': 'notebook',
 'qty': 30,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'SF'}
{'_id': ObjectId('6654bf84978a669a87e16f22'),
 'item': 'bottle',
 'qty': 30,
 'size': {'h': 4, 'w': 10, 'uom': 'in'},
 'loc': 'NY'}
{'_id': ObjectId('6654bf84978a669a87e16f23'),
 'item': 'bottle',
 'qty': 40,
 'size': {'h': 4, 'w': 10, 'uom': 'in'},
 'loc': 'SF'}
{'_id': ObjectId('6654bf849

<span style=color:blue>A few example queries</span>

In [4]:
for doc in pr.find({"descrip": "journal"}):
    pprint.pp(doc)

{'_id': ObjectId('6654bf84978a669a87e16f27'),
 'descrip': 'journal',
 'price': 9.5}


### <span style=color:blue>As part of Problem Set 4, please write the mongosh command that performs the operation in the next cell.  This is "Question 1."</span>

In [5]:
for doc in pr.find({"price": {"$lt": 7.50}}):
    pprint.pp(doc)

{'_id': ObjectId('6654bf84978a669a87e16f28'),
 'descrip': 'notebook',
 'price': 7.44}
{'_id': ObjectId('6654bf84978a669a87e16f29'),
 'descrip': 'envelopes',
 'price': 6.75}


<span style=color:blue>Digression: what is the output of collection.find()?  It is a cursor</span>

In [6]:
cursor = inv.find({"qty": {"$gte": 40}})
print(type(cursor))

print()
pprint.pp(cursor.next())
print()
pprint.pp(cursor.next())
print()
# convert the remainder of cursor into a list, without reading more of it
l = list(cursor)
print(len(l))
# print the rest of the contents of cursor
print()
pprint.pp(l)
# because we used "list", the cursor has traversed the rest of the query output
print()
try:
    pprint.pp(cursor.next())
except:
    print('The cursor has reached the end of the query results')

<class 'pymongo.cursor.Cursor'>

{'_id': ObjectId('6654bf84978a669a87e16f1e'),
 'item': 'journal',
 'qty': 50,
 'size': {'h': 14, 'w': 21, 'uom': 'cm'},
 'loc': 'LA'}

{'_id': ObjectId('6654bf84978a669a87e16f1f'),
 'item': 'notebook',
 'qty': 80,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'NY'}

4

[{'_id': ObjectId('6654bf84978a669a87e16f23'),
  'item': 'bottle',
  'qty': 40,
  'size': {'h': 4, 'w': 10, 'uom': 'in'},
  'loc': 'SF'},
 {'_id': ObjectId('6654bf84978a669a87e16f24'),
  'item': 'paper',
  'qty': 100,
  'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
  'loc': 'NY'},
 {'_id': ObjectId('6654bf84978a669a87e16f25'),
  'item': 'paper',
  'qty': 120,
  'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
  'loc': 'SF'},
 {'_id': ObjectId('6654bf84978a669a87e16f26'),
  'item': 'planner',
  'qty': 75,
  'size': {'h': 22.85, 'w': 30, 'uom': 'cm'},
  'loc': 'LA'}]

The cursor has reached the end of the query results


<span style=color:blue>More query examples</span>

<span style=color:blue>Example of "projection" similar to relational model.  If a target column is an array, the number will restrict the number of elements of the array to be produced.</span>

In [7]:
for doc in inv.find({"qty": {"$gte": 70}},projection={'item':1,'qty':1, 'loc':1}):
    pprint.pp(doc)

{'_id': ObjectId('6654bf84978a669a87e16f1f'),
 'item': 'notebook',
 'qty': 80,
 'loc': 'NY'}
{'_id': ObjectId('6654bf84978a669a87e16f24'),
 'item': 'paper',
 'qty': 100,
 'loc': 'NY'}
{'_id': ObjectId('6654bf84978a669a87e16f25'),
 'item': 'paper',
 'qty': 120,
 'loc': 'SF'}
{'_id': ObjectId('6654bf84978a669a87e16f26'),
 'item': 'planner',
 'qty': 75,
 'loc': 'LA'}


<span style=color:blue>More query examples</span>

In [8]:
for doc in inv.find({'size.h': {'$gt': 9}}):
    pprint.pp(doc)

{'_id': ObjectId('6654bf84978a669a87e16f1d'),
 'item': 'journal',
 'qty': 25,
 'size': {'h': 14, 'w': 21, 'uom': 'cm'},
 'loc': 'NY'}
{'_id': ObjectId('6654bf84978a669a87e16f1e'),
 'item': 'journal',
 'qty': 50,
 'size': {'h': 14, 'w': 21, 'uom': 'cm'},
 'loc': 'LA'}
{'_id': ObjectId('6654bf84978a669a87e16f26'),
 'item': 'planner',
 'qty': 75,
 'size': {'h': 22.85, 'w': 30, 'uom': 'cm'},
 'loc': 'LA'}


In [9]:
# string matching with regex
#    see https://www.geeksforgeeks.org/how-to-query-mongodb-documents-with-regex-in-python/

# simple matching:  Here "^" marks the beginning of a string, and "$" can be used to mark the end of string
for doc in inv.find({'item' : {'$regex': '^n'}}):
    pprint.pp(doc)

{'_id': ObjectId('6654bf84978a669a87e16f1f'),
 'item': 'notebook',
 'qty': 80,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'NY'}
{'_id': ObjectId('6654bf84978a669a87e16f20'),
 'item': 'notebook',
 'qty': 20,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'LA'}
{'_id': ObjectId('6654bf84978a669a87e16f21'),
 'item': 'notebook',
 'qty': 30,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'SF'}


In [10]:
for doc in inv.find({'item' : {'$regex': 'r$'}}):
    pprint.pp(doc)

{'_id': ObjectId('6654bf84978a669a87e16f24'),
 'item': 'paper',
 'qty': 100,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'NY'}
{'_id': ObjectId('6654bf84978a669a87e16f25'),
 'item': 'paper',
 'qty': 120,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'SF'}
{'_id': ObjectId('6654bf84978a669a87e16f26'),
 'item': 'planner',
 'qty': 75,
 'size': {'h': 22.85, 'w': 30, 'uom': 'cm'},
 'loc': 'LA'}


In [11]:
# find names with given substring
for doc in inv.find({'item' : {'$regex': '^.*bo.*$'}}):
    pprint.pp(doc)

{'_id': ObjectId('6654bf84978a669a87e16f1f'),
 'item': 'notebook',
 'qty': 80,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'NY'}
{'_id': ObjectId('6654bf84978a669a87e16f20'),
 'item': 'notebook',
 'qty': 20,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'LA'}
{'_id': ObjectId('6654bf84978a669a87e16f21'),
 'item': 'notebook',
 'qty': 30,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'SF'}
{'_id': ObjectId('6654bf84978a669a87e16f22'),
 'item': 'bottle',
 'qty': 30,
 'size': {'h': 4, 'w': 10, 'uom': 'in'},
 'loc': 'NY'}
{'_id': ObjectId('6654bf84978a669a87e16f23'),
 'item': 'bottle',
 'qty': 40,
 'size': {'h': 4, 'w': 10, 'uom': 'in'},
 'loc': 'SF'}


In [12]:
# you can also create richer patterns using the python regex package (re)
#    e.g., see https://www.w3schools.com/python/python_regex.asp
#    the '|' is "or"
import re
pattern = re.compile('^p|^.*bo.*$')
for doc in inv.find({'item' : pattern}):
    pprint.pp(doc)

{'_id': ObjectId('6654bf84978a669a87e16f1f'),
 'item': 'notebook',
 'qty': 80,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'NY'}
{'_id': ObjectId('6654bf84978a669a87e16f20'),
 'item': 'notebook',
 'qty': 20,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'LA'}
{'_id': ObjectId('6654bf84978a669a87e16f21'),
 'item': 'notebook',
 'qty': 30,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'SF'}
{'_id': ObjectId('6654bf84978a669a87e16f22'),
 'item': 'bottle',
 'qty': 30,
 'size': {'h': 4, 'w': 10, 'uom': 'in'},
 'loc': 'NY'}
{'_id': ObjectId('6654bf84978a669a87e16f23'),
 'item': 'bottle',
 'qty': 40,
 'size': {'h': 4, 'w': 10, 'uom': 'in'},
 'loc': 'SF'}
{'_id': ObjectId('6654bf84978a669a87e16f24'),
 'item': 'paper',
 'qty': 100,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'NY'}
{'_id': ObjectId('6654bf84978a669a87e16f25'),
 'item': 'paper',
 'qty': 120,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'SF'}
{'_id': ObjectId('6654bf84978a669a87e16f26'),
 'item': 'plan

### <span style=color:blue>As part of Problem Set 4, please write the mongosh command that performs the operation in the next cell.  This is "Question 2."</span>

In [13]:
# boolean combinations of conditions
for doc in inv.find({'$or' : [{'size.h': {'$gt': 9}}, {'item' : {'$regex': '^.*bo.*$'}} ]}):
    pprint.pp(doc)

{'_id': ObjectId('6654bf84978a669a87e16f1d'),
 'item': 'journal',
 'qty': 25,
 'size': {'h': 14, 'w': 21, 'uom': 'cm'},
 'loc': 'NY'}
{'_id': ObjectId('6654bf84978a669a87e16f1e'),
 'item': 'journal',
 'qty': 50,
 'size': {'h': 14, 'w': 21, 'uom': 'cm'},
 'loc': 'LA'}
{'_id': ObjectId('6654bf84978a669a87e16f1f'),
 'item': 'notebook',
 'qty': 80,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'NY'}
{'_id': ObjectId('6654bf84978a669a87e16f20'),
 'item': 'notebook',
 'qty': 20,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'LA'}
{'_id': ObjectId('6654bf84978a669a87e16f21'),
 'item': 'notebook',
 'qty': 30,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'SF'}
{'_id': ObjectId('6654bf84978a669a87e16f22'),
 'item': 'bottle',
 'qty': 30,
 'size': {'h': 4, 'w': 10, 'uom': 'in'},
 'loc': 'NY'}
{'_id': ObjectId('6654bf84978a669a87e16f23'),
 'item': 'bottle',
 'qty': 40,
 'size': {'h': 4, 'w': 10, 'uom': 'in'},
 'loc': 'SF'}
{'_id': ObjectId('6654bf84978a669a87e16f26'),
 'item': 'plan

<span style=color:blue>A small example of left join using aggregation and $lookup.  Note that the info coming from the "right" collection is incorporated into docs from the "left" collection as an array (i.e., list). </span>

<span style=color:blue>E.g., see https://www.mongodb.com/docs/v6.2/reference/operator/aggregation/lookup/ and https://www.w3schools.com/mongodb/mongodb_aggregations_lookup.php</span>

### <span style=color:blue>As part of Problem Set 4, please write the mongosh command that performs the operation in the next cell.  This is "Question 3."</span>

In [14]:
pipeline = [{ '$lookup':
               {'from': 'inventory',       # need collection name here; do not use 
                                           # "inv", which is variable holding collection
                'localField': 'descrip',
                'foreignField': 'item',
                'as': 'invinfo'}
           }]
for doc in pr.aggregate(pipeline):
    pprint.pp(doc)

{'_id': ObjectId('6654bf84978a669a87e16f27'),
 'descrip': 'journal',
 'price': 9.5,
 'invinfo': [{'_id': ObjectId('6654bf84978a669a87e16f1d'),
              'item': 'journal',
              'qty': 25,
              'size': {'h': 14, 'w': 21, 'uom': 'cm'},
              'loc': 'NY'},
             {'_id': ObjectId('6654bf84978a669a87e16f1e'),
              'item': 'journal',
              'qty': 50,
              'size': {'h': 14, 'w': 21, 'uom': 'cm'},
              'loc': 'LA'}]}
{'_id': ObjectId('6654bf84978a669a87e16f28'),
 'descrip': 'notebook',
 'price': 7.44,
 'invinfo': [{'_id': ObjectId('6654bf84978a669a87e16f1f'),
              'item': 'notebook',
              'qty': 80,
              'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
              'loc': 'NY'},
             {'_id': ObjectId('6654bf84978a669a87e16f20'),
              'item': 'notebook',
              'qty': 20,
              'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
              'loc': 'LA'},
             {'_id': Obje

<span style=color:blue>Simple update example: setting a value</span>

In [15]:
selection_condition = { 'item': {'$regex': '^p'}}
new_values = { '$set': { 'qty' : 100}}

upd = inv.update_many(selection_condition, new_values)

print('The number of documents matched is:', upd.matched_count)
print('The number of documents modified is:', upd.modified_count)

print()
print('The type of variable upd is:', type(upd))

print()
for doc in inv.find(selection_condition):
    pprint.pp(doc)


The number of documents matched is: 3
The number of documents modified is: 2

The type of variable upd is: <class 'pymongo.results.UpdateResult'>

{'_id': ObjectId('6654bf84978a669a87e16f24'),
 'item': 'paper',
 'qty': 100,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'NY'}
{'_id': ObjectId('6654bf84978a669a87e16f25'),
 'item': 'paper',
 'qty': 100,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'SF'}
{'_id': ObjectId('6654bf84978a669a87e16f26'),
 'item': 'planner',
 'qty': 100,
 'size': {'h': 22.85, 'w': 30, 'uom': 'cm'},
 'loc': 'LA'}


<span style=color:blue>Updating with simple arithmetic </span>

In [16]:
selection_condition = { 'item': {'$regex': '^.*bo.*$'}}
# adding 25 to the qty values
new_values = { '$inc': { 'qty' : 25}}

upd = inv.update_many(selection_condition, new_values)

print('The number of documents matched is:', upd.matched_count)
print('The number of documents modified is:', upd.modified_count)

print()
for doc in inv.find(selection_condition):
    pprint.pp(doc)


The number of documents matched is: 5
The number of documents modified is: 5

{'_id': ObjectId('6654bf84978a669a87e16f1f'),
 'item': 'notebook',
 'qty': 105,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'NY'}
{'_id': ObjectId('6654bf84978a669a87e16f20'),
 'item': 'notebook',
 'qty': 45,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'LA'}
{'_id': ObjectId('6654bf84978a669a87e16f21'),
 'item': 'notebook',
 'qty': 55,
 'size': {'h': 8.5, 'w': 11, 'uom': 'in'},
 'loc': 'SF'}
{'_id': ObjectId('6654bf84978a669a87e16f22'),
 'item': 'bottle',
 'qty': 55,
 'size': {'h': 4, 'w': 10, 'uom': 'in'},
 'loc': 'NY'}
{'_id': ObjectId('6654bf84978a669a87e16f23'),
 'item': 'bottle',
 'qty': 65,
 'size': {'h': 4, 'w': 10, 'uom': 'in'},
 'loc': 'SF'}


<span style=color:blue>Changing a value, and adding a column</span>

In [17]:
selection_condition = { '$and': [ {'item': {'$regex': '^.*o.*$'}}, {'size.uom': 'cm'}]}

new_values = { '$set': {'size.h' : 10, 'color':'red'}}

upd = inv.update_many(selection_condition, new_values)

print('The number of documents matched is:', upd.matched_count)
print('The number of documents modified is:', upd.modified_count)

print()
for doc in inv.find(selection_condition):
    pprint.pp(doc)


The number of documents matched is: 2
The number of documents modified is: 2

{'_id': ObjectId('6654bf84978a669a87e16f1d'),
 'item': 'journal',
 'qty': 25,
 'size': {'h': 10, 'w': 21, 'uom': 'cm'},
 'loc': 'NY',
 'color': 'red'}
{'_id': ObjectId('6654bf84978a669a87e16f1e'),
 'item': 'journal',
 'qty': 50,
 'size': {'h': 10, 'w': 21, 'uom': 'cm'},
 'loc': 'LA',
 'color': 'red'}


<span style=color:blue>Simple delete example</span>

In [18]:
selection_condition = { 'item': {'$regex': '^p'}}

dlt = inv.delete_many(selection_condition)

print('The number of documents deleted is:', dlt.deleted_count)


The number of documents deleted is: 3


<span style=color:blue>Working with object identifiers</span>

<span style=color:blue>See https://pymongo.readthedocs.io/en/stable/api/bson/objectid.html</span>

In [21]:
doc = inv.find_one()
pprint.pp(doc)

obj_pointer = doc['_id']
print()
print(type(obj_pointer))

print()
print(obj_pointer)

print()
print(type(doc['qty']))

print('\nYou can retrieve an object based on its object id')
pprint.pp(inv.find_one({'_id': obj_pointer}))

print('\nYou cannot match the "_id" with a string type!' )
pprint.pp(inv.find_one({'_id': str(obj_pointer)}))

import bson
# for some reason, I need to include the entire string "bson.objectid.ObjectID" in the following
print('\nYou can convert the string of an objectid back into an objectid')
pprint.pp(inv.find_one({'_id': bson.objectid.ObjectId(str(obj_pointer))}))

{'_id': ObjectId('6654bf84978a669a87e16f1d'),
 'item': 'journal',
 'qty': 25,
 'size': {'h': 10, 'w': 21, 'uom': 'cm'},
 'loc': 'NY',
 'color': 'red'}

<class 'bson.objectid.ObjectId'>

6654bf84978a669a87e16f1d

<class 'int'>

You can retrieve an object based on its object id
{'_id': ObjectId('6654bf84978a669a87e16f1d'),
 'item': 'journal',
 'qty': 25,
 'size': {'h': 10, 'w': 21, 'uom': 'cm'},
 'loc': 'NY',
 'color': 'red'}

You cannot match the "_id" with a string type!
None

You can convert the string of an objectid back into an objectid
{'_id': ObjectId('6654bf84978a669a87e16f1d'),
 'item': 'journal',
 'qty': 25,
 'size': {'h': 10, 'w': 21, 'uom': 'cm'},
 'loc': 'NY',
 'color': 'red'}
