# NoSQL DB With Python

## 1. Setup & Connection

In [None]:
import os
import pprint
from dotenv import load_dotenv, find_dotenv
from pymongo import MongoClient
from pymongo.collection import Collection
from typing import List, Dict, Any, Optional, TypedDict
from bson import ObjectId

In [None]:
# Load env variable and build the connection string

load_dotenv(find_dotenv())

USERNAME = os.getenv("MONGO_USERNAME", "admin") # Added default for safety
PASSWORD = os.getenv("MONGO_PASSWD", "password")

# Ensure you are running MongoDB locally on port 27017

connection_string = f"mongodb://{USERNAME}:{PASSWORD}@localhost:27017"

In [None]:
# Mongo client
client = MongoClient(connection_string)

# ---------------------------------------------------------
# Drop the database to ensure a fresh start every time this
# notebook is run. This prevents duplicate data and ensures
# the logic below works on a clean state.
# ---------------------------------------------------------

client.drop_database("production")

In [None]:
# Create DB (Lazy creation: created only when data is inserted)
prod_db = client.production

# Create Collection
person_collection = prod_db.person_collection

In [None]:
class PersonDoc(TypedDict):
    first_name: str
    last_name: str
    age: int

## 2. Insert Data & Capture IDs

*We must capture the generated `_id`s so we can reference them in later cells.*

In [None]:
def create_documents(collection: Collection,
                     first_names: List[str],
                     last_names: List[str],
                     ages: List[int]) -> List[ObjectId]:
    docs: List[PersonDoc] = []
    
    for first_name, last_name, age in zip(first_names, last_names, ages):
        doc: PersonDoc = {
            "first_name": first_name,
            "last_name": last_name,
            "age": age
        }
        docs.append(doc)
    
    result = collection.insert_many(docs)
    return result.inserted_ids

In [None]:
first_names = ["Rahul", "Ananya", "Vikram", "Priya", "Arjun"]
last_names = ["Sharma", "Gupta", "Singh", "Mehta", "Verma"]
ages = [28, 24, 32, 27, 35]

# ---------------------------------------------------------
# [REPRODUCIBILITY KEY]
# Store the returned IDs in a variable (`inserted_ids`).
# We will use this list to access specific documents later
# instead of hardcoding strings like "66d5a..."
# ---------------------------------------------------------

inserted_ids = create_documents(person_collection, first_names, last_names, ages)
print(f"Inserted {len(inserted_ids)} documents.")
print(inserted_ids)

## 3. Read Operations

### Get all Data

In [None]:
def find_all(collection: Collection):
    # Returns cursor iterator
    results = collection.find()
    
    for res in results:
        pprint.pprint(res)

find_all(person_collection)

### Search by ID

In [None]:
def get_doc_by_id(collection: Collection, doc_id: ObjectId):
    # SQL: SELECT * FROM TABLE WHERE _id="val"
    res = collection.find_one({"_id": doc_id})
    pprint.pprint(res)

In [None]:
# Use the first ID from our inserted list
target_id = inserted_ids[0]
print(f"Fetching ID: {target_id}")
get_doc_by_id(person_collection, target_id)

### Search in Range

In [None]:
def get_in_range(collection: Collection, min_age: int, max_age: int):
    query = {
        "$and": [
            {"age": {"$gte": min_age}},
            {"age": {"$lte": max_age}}
        ]
    }
    # Project columns to hide _id for cleaner output
    result = collection.find(query, {"_id": 0}).sort("age")
    for res in result:
        pprint.pprint(res)

get_in_range(person_collection, 25, 35)

## 4. Update Operations

In [None]:
def update_by_id(collection: Collection, doc_id: ObjectId):
    query = {
        "$set": {"married": False},
        "$inc": {"age": 1},
        "$rename": {"first_name": "fname", "last_name": "lname"}
    }
    
    collection.update_one({"_id": doc_id}, query)
    print(f"Updated document {doc_id}")

In [None]:
# We will update the SECOND person (Ananya)
target_id = inserted_ids[1]
update_by_id(person_collection, target_id)

# Verify update
get_doc_by_id(person_collection, target_id)

## 5. Replace Operations

In [None]:
def replace_by_id(collection: Collection, doc_id: ObjectId):
    data = {
        "age": 25,
        "status": "Replaced"
    }
    # Replaces the entire document structure with `data`
    collection.replace_one({"_id": doc_id}, data)
    print(f"Replaced document {doc_id}")

In [None]:
# We will replace the THIRD person (Vikram)
target_id = inserted_ids[2]
replace_by_id(person_collection, target_id)

# Verify replace
get_doc_by_id(person_collection, target_id)

## 6. Delete Operations

In [None]:
def delete_by_id(collection: Collection, doc_id: ObjectId):
    collection.delete_one({"_id": doc_id})
    print(f"Deleted document {doc_id}")

In [None]:
# We will delete the same person we just replaced (Vikram)
target_id = inserted_ids[2]
delete_by_id(person_collection, target_id)

# Verify deletion (Should print None)
print("Verifying deletion:")
get_doc_by_id(person_collection, target_id)

## 7. Relationships

### Embedding (One-to-Few)

*Adding an address inside the Person document.*

In [None]:
def add_address_embed(collection: Collection, doc_id: ObjectId, address: dict):
    # $addToSet adds an item to an array only if it doesn't exist
    collection.update_one({"_id": doc_id}, {"$addToSet": {'addresses': address}})
    print(f"Added address to {doc_id}")

In [None]:
# Add to the SECOND person (Ananya) who we updated earlier
target_id = inserted_ids[1]
address = {
    "street": "Bay Street",
    "number": 2706,
    "city": "San Francisco",
    "country": "United States",
    "zip": "94107"
}
add_address_embed(person_collection, target_id, address)

# Check the result
get_doc_by_id(person_collection, target_id)

### Referencing (One-to-Many / Many-to-Many)

*storing the address in a separate collection and linking via ID.*

In [None]:
# Create address collection
address_collection = prod_db.address

def add_address_relationship(collection: Collection, owner_id: ObjectId, address: dict):
    address = address.copy()
    # Add foreign key (Manual Reference)
    address["owner_id"] = owner_id
    
    collection.insert_one(address)
    print(f"Created address linked to owner {owner_id}")

In [None]:
# Link to the FOURTH person (Priya)
target_id = inserted_ids[3]

add_address_relationship(address_collection, target_id, address)

# Verify: Find all addresses belonging to Priya
print(f"Finding addresses for Owner ID: {target_id}")
results = address_collection.find({"owner_id": target_id})
for res in results:
    pprint.pprint(res)