# MongoDB CRUD and Query Practice Project

This project demonstrates MongoDB fundamentals using Python and the Faker library. It covers:
- CRUD operations
- Querying using logical and comparison operators
- Filtering and sorting
- Working with fake data


## 🛠️ Setup and Dependencies

In [1]:
from pymongo import MongoClient
from faker import Faker
import random
from datetime import datetime as dt
from bson.objectid import ObjectId


## 🌍 Connect to MongoDB

In [None]:
client = MongoClient("mongodb://localhost:27017/")
db = client["book_review_app"]

users_collection = db["users"]
reviews_collection = db["reviews"]


## 🎲 Generate and Insert Fake Data

In [20]:

fake = Faker()

users = []
for i in range(50):
    user = {
        "_id": ObjectId(),
        "name": fake.name(),
        "email": fake.email(),
        "age": random.randint(18, 70),
        "isPremiumMember": random.choice([True, True, False])
    }
    users.append(user)



In [21]:
def populate_collection(collection, data, is_many = False):
    if is_many:
        collection.insert_many(data)
    else:
        collection.insert_one(data)
    print(f"Inserted {len(data)} documents into {collection.name}")

populate_collection(users_collection, users, True)


Inserted 50 documents into users


In [22]:
user_ids = [doc["_id"] for doc in users_collection.find({}, {"_id": 1})]
print("User IDs:", user_ids)

User IDs: [ObjectId('682f653154aa6fe490ec7d9e'), ObjectId('682f653154aa6fe490ec7d9f'), ObjectId('682f653154aa6fe490ec7da0'), ObjectId('682f653154aa6fe490ec7da1'), ObjectId('682f653154aa6fe490ec7da2'), ObjectId('682f653154aa6fe490ec7da3'), ObjectId('682f653154aa6fe490ec7da4'), ObjectId('682f653154aa6fe490ec7da5'), ObjectId('682f653154aa6fe490ec7da6'), ObjectId('682f653154aa6fe490ec7da7'), ObjectId('682f653154aa6fe490ec7da8'), ObjectId('682f653154aa6fe490ec7da9'), ObjectId('682f653154aa6fe490ec7daa'), ObjectId('682f653154aa6fe490ec7dab'), ObjectId('682f653154aa6fe490ec7dac'), ObjectId('682f653154aa6fe490ec7dad'), ObjectId('682f653154aa6fe490ec7dae'), ObjectId('682f653154aa6fe490ec7daf'), ObjectId('682f653154aa6fe490ec7db0'), ObjectId('682f653154aa6fe490ec7db1'), ObjectId('682f653154aa6fe490ec7db2'), ObjectId('682f653154aa6fe490ec7db3'), ObjectId('682f653154aa6fe490ec7db4'), ObjectId('682f653154aa6fe490ec7db5'), ObjectId('682f653154aa6fe490ec7db6'), ObjectId('682f653154aa6fe490ec7db7'), O

In [23]:
reviews = []

for i in range(200):
    review = {
        "userId": random.choice(user_ids),
        "bookTitle": fake.sentence(nb_words=3),
        "reviewText": fake.paragraph(nb_sentences=5),
        "rating": random.randint(1, 5),
        "createdAt": fake.date_time_between_dates(datetime_start=dt(1950, 1, 1), datetime_end=dt(2025, 1, 1)).strftime("%Y-%m-%d")
    }

    reviews.append(review)

populate_collection(reviews_collection, reviews, True)




Inserted 200 documents into reviews


# CRUD Operations

## 🔍 Read (Query) Operations

In [24]:
# Find all users older than 30.
users_older_than_30 = users_collection.find({"age": {"$gte": 30}}, {"name": 1, "_id": 0})

list(users_older_than_30)


[{'name': 'Carla Barry'},
 {'name': 'Victoria Thornton'},
 {'name': 'Cathy Romero'},
 {'name': 'Kenneth Anderson'},
 {'name': 'Taylor Haley'},
 {'name': 'Patricia Crawford'},
 {'name': 'Dustin Mcgee'},
 {'name': 'Mr. Joseph Gentry MD'},
 {'name': 'Justin Pham'},
 {'name': 'Kelly Burns PhD'},
 {'name': 'Adrian Garrison'},
 {'name': 'Ronnie Perry'},
 {'name': 'Kimberly Burgess'},
 {'name': 'Pamela Clark'},
 {'name': 'William Thompson'},
 {'name': 'Erica Johnson'},
 {'name': 'Chloe Morris'},
 {'name': 'Taylor Ford'},
 {'name': 'Victoria Mckinney'},
 {'name': 'David White'},
 {'name': 'Melody Avery'},
 {'name': 'Matthew Blair'},
 {'name': 'Joshua Franklin'},
 {'name': 'Ashley Taylor'},
 {'name': 'Lisa Powell'},
 {'name': 'Timothy Huffman III'},
 {'name': 'Gwendolyn Holder'},
 {'name': 'Andrew Davidson'},
 {'name': 'Ronald Johnson'},
 {'name': 'Robert Lee'},
 {'name': 'Kathryn Taylor'},
 {'name': 'Erica Butler'},
 {'name': 'Andrea Reyes'},
 {'name': 'Mary Anderson'},
 {'name': 'Jeffrey Mars

In [25]:
# Find premium members who are below 25 years old.
list(users_collection.find({"isPremiumMember": True, "age":{"$lt":25}}))

[{'_id': ObjectId('682f653154aa6fe490ec7dac'),
  'name': 'Alexandria Reynolds',
  'email': 'jessicapope@example.org',
  'age': 22,
  'isPremiumMember': True},
 {'_id': ObjectId('682f653154aa6fe490ec7dba'),
  'name': 'Monica Le',
  'email': 'nhill@example.com',
  'age': 24,
  'isPremiumMember': True},
 {'_id': ObjectId('682f653154aa6fe490ec7dc4'),
  'name': 'Craig Quinn',
  'email': 'pwillis@example.com',
  'age': 22,
  'isPremiumMember': True}]

In [26]:
# Get all reviews with a rating greater than 3.

list(reviews_collection.find({
    "rating": {"$gt":3}
}, {
    
    "rating":1
}))

[{'_id': ObjectId('682f653654aa6fe490ec7dd0'), 'rating': 5},
 {'_id': ObjectId('682f653654aa6fe490ec7dd1'), 'rating': 5},
 {'_id': ObjectId('682f653654aa6fe490ec7dd3'), 'rating': 4},
 {'_id': ObjectId('682f653654aa6fe490ec7dd4'), 'rating': 5},
 {'_id': ObjectId('682f653654aa6fe490ec7dd8'), 'rating': 5},
 {'_id': ObjectId('682f653654aa6fe490ec7dd9'), 'rating': 5},
 {'_id': ObjectId('682f653654aa6fe490ec7dda'), 'rating': 4},
 {'_id': ObjectId('682f653654aa6fe490ec7ddb'), 'rating': 4},
 {'_id': ObjectId('682f653654aa6fe490ec7ddc'), 'rating': 4},
 {'_id': ObjectId('682f653654aa6fe490ec7ddd'), 'rating': 4},
 {'_id': ObjectId('682f653654aa6fe490ec7de5'), 'rating': 4},
 {'_id': ObjectId('682f653654aa6fe490ec7de9'), 'rating': 5},
 {'_id': ObjectId('682f653654aa6fe490ec7deb'), 'rating': 4},
 {'_id': ObjectId('682f653654aa6fe490ec7ded'), 'rating': 4},
 {'_id': ObjectId('682f653654aa6fe490ec7df2'), 'rating': 5},
 {'_id': ObjectId('682f653654aa6fe490ec7df5'), 'rating': 4},
 {'_id': ObjectId('682f6

In [27]:
# Find reviews where the book title starts with 'The'.


list(reviews_collection.find({"bookTitle": {"$regex": "^The"}}))

[{'_id': ObjectId('682f653654aa6fe490ec7ddd'),
  'userId': ObjectId('682f653154aa6fe490ec7db7'),
  'bookTitle': 'Their until thought institution.',
  'reviewText': 'Member size heavy soldier he story national. These near stay trouble. Beyond all home offer a order major. Above hit school economic south style.',
  'rating': 4,
  'createdAt': '2008-01-16'},
 {'_id': ObjectId('682f653654aa6fe490ec7de8'),
  'userId': ObjectId('682f653154aa6fe490ec7da7'),
  'bookTitle': 'Their mouth partner.',
  'reviewText': 'Admit strategy growth believe force. Offer manager to film think. Join baby south identify bring community reveal six. Whole value sense.',
  'rating': 1,
  'createdAt': '1963-08-21'},
 {'_id': ObjectId('682f653654aa6fe490ec7e22'),
  'userId': ObjectId('682f653154aa6fe490ec7dc5'),
  'bookTitle': 'Themselves strategy common.',
  'reviewText': 'Respond vote white strong close new source particular. By race you instead lead science approach. Future own yourself class family occur story p

In [28]:
# Retrieve reviews made after the january 1st 2023

list(reviews_collection.find({"createdAt":{"$gt":'2023-01-01'}}))

[{'_id': ObjectId('682f653654aa6fe490ec7dd4'),
  'userId': ObjectId('682f653154aa6fe490ec7db5'),
  'bookTitle': 'Would mention.',
  'reviewText': 'Listen several me material. By story among day cover box four. Then pattern matter back exist. Doctor maybe myself walk finally individual. Whole deep wonder any up government. Culture on program.',
  'rating': 5,
  'createdAt': '2023-10-28'},
 {'_id': ObjectId('682f653654aa6fe490ec7e30'),
  'userId': ObjectId('682f653154aa6fe490ec7dc0'),
  'bookTitle': 'Different include fine.',
  'reviewText': 'Indicate carry purpose the. On television finish guy. Pretty process administration add few. Before white system picture claim. Offer drug sit throughout bed.',
  'rating': 1,
  'createdAt': '2023-11-14'}]

# ✏️ Update Operations

In [29]:
# Mark all users older than 65 as isPremiumMember: false.

users_collection.update_many(
    {"age": {"$gt": 65}},
    {"$set": {"isPremiumMember": False} }
)

UpdateResult({'n': 3, 'nModified': 3, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)

In [30]:
# Increase the rating of reviews made before 2023 by 1 point (max 5).

reviews_collection.update_many(
    {"createdAt": {"$lt": '2023-01-01'}},
    [
    {"$set":{"rating": {
        "$min": [ {"$add": ["$rating", 1]}, 5]
    }}}]
    )

UpdateResult({'n': 198, 'nModified': 157, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)

# ❌ Delete Operations

In [31]:
# Delete all users younger than 20 and are premium members.

users_collection.delete_many({
    "age": {"$lt": 20}, 
    "isPremiumMember": True
})

DeleteResult({'n': 0, 'ok': 1.0}, acknowledged=True)

In [32]:
# Delete all reviews with rating less than or equal to 2.

reviews_collection.delete_many(
    {
        "rating": {"$lte": 2}
    }
)

DeleteResult({'n': 39, 'ok': 1.0}, acknowledged=True)

## 🧠 Advanced Queries

In [33]:
# Group reviews by bookTitle and count how many reviews each book has.
# Find the average rating per book.
# Get the top 5 books with the highest average rating.

pipeline = [
    {"$group": {
        "_id": "$bookTitle",
        "no_of_reviews": {"$sum":1},
        "avg_rating":{"$avg":"$rating"}
    }},

    {
        "$sort": {"no_of_reviews": -1, "avg_rating": -1}
    },

    {"$limit": 5}
]

resp = reviews_collection.aggregate(pipeline)

In [34]:
for doc in resp:
    print(doc)

{'_id': 'Hit.', 'no_of_reviews': 2, 'avg_rating': 4.5}
{'_id': 'Natural fear.', 'no_of_reviews': 1, 'avg_rating': 5.0}
{'_id': 'Crime.', 'no_of_reviews': 1, 'avg_rating': 5.0}
{'_id': 'Focus at station field.', 'no_of_reviews': 1, 'avg_rating': 5.0}
{'_id': 'Something under.', 'no_of_reviews': 1, 'avg_rating': 5.0}


## 📌 Summary & Next Steps

This project practiced CRUD operations in MongoDB using realistic data.
In the future, I will explore:
- Aggregation pipelines
- Indexes and performance
- MongoDB with Flask or FastAPI

