1.	Project Setup & Connection

In [6]:
import uuid
import random
from datetime import datetime, timedelta
from pymongo import MongoClient
from random import choice, randint, uniform
from pprint import pprint


#Part 1
# Establishing connection
client = MongoClient('mongodb://localhost:27017/')
db = client['eduhub_db']


print("MongoDB connection successful. Database:", db.name)

# --- DATABASE RESET ---

#--- Checks if the database exists and drop it to ensure a clean run. I implemented this to avoid duplicate key errors during repeated runs
#and to ensure that the database starts fresh each time the script is executed.

if 'eduhub_db' in client.list_database_names():
    client.drop_database('eduhub_db')
    print("Database 'eduhub_db' successfully dropped for clean re-run.")
else:
    print("Database 'eduhub_db' does not exist yet. Proceeding with creation.")


MongoDB connection successful. Database: eduhub_db
Database 'eduhub_db' successfully dropped for clean re-run.


1.2 Creating Collections


In [7]:
#schema validation. refers to 6-1
user_validator = {
    "$jsonSchema": {
        "bsonType": "object",
        "required": ["userId", "email", "firstName", "lastName", "role", "dateJoined", "isActive"],
        "properties": {
            "email": {"bsonType": "string", "pattern": "^.+@.+\\..+$"},
            "role": {"enum": ["student", "instructor"]},
            "dateJoined": {"bsonType": "date"},
            "isActive": {"bsonType": "bool"}
        }
    }
}

# --- CREATE COLLECTION ---
# using a to Try/except block to catch error. it runs if the collection already exists
try:
    db.create_collection('users', validator=user_validator)
    print("Collection 'users' created with validation.")
except Exception as e:
    # If the collection exists, MongoDB raises an error on re-creation.
    print(f"Collection 'users' already exists or validation error: {e}")

# Create other  5 collections 
for col_name in ['courses', 'enrollments', 'lessons', 'assignments', 'submissions']:
    if col_name not in db.list_collection_names():
        db.create_collection(col_name)

Collection 'users' created with validation.


Part 2: Data Population

Task 2.1: Insert Sample Data
 generate and insert at least 20 users (students/instructors),
   8 courses, 15 enrollments, 25 lessons, 10 assignments, and 12 submissions.

In [8]:


#Part 2




#Step 1: Users Collection
#mix of students and instructors via genereating unique IDs

# Helper function to generate unique IDs
def generate_id():
    return str(uuid.uuid4())[:8] 

users_data = []
instructor_ids = []
student_ids = []
user_count = 20

# Create Instructor Users (4 users)
for i in range(4):
    instructor_id = generate_id()
    instructor_ids.append(instructor_id)
    users_data.append({
        "userId": instructor_id,
        "email": f"instructor{i+1}@eduhub.com",
        "firstName": f"InstName{i+1}",
        "lastName": f"InstLast{i+1}",
        "role": "instructor",
        "dateJoined": datetime.now() - timedelta(days=randint(365, 730)), 
        "profile": {"bio": f"Experienced instructor in Subject {i+1}.", "skills": ["Python", "SQL", "Data Analysis"]},
        "isActive": True
    })

# Create Student Users (16 users)
for i in range(16):
    student_id = generate_id()
    student_ids.append(student_id)
    users_data.append({
        "userId": student_id,
        "email": f"student{i+1}@eduhub.com",
        "firstName": f"StudName{i+1}",
        "lastName": f"StudLast{i+1}",
        "role": "student",
        "dateJoined": datetime.now() - timedelta(days=randint(30, 365)), # Joined 1 month - 1 year ago
        "profile": {"bio": f"Eager to learn Subject {randint(1,4)}.", "skills": ["Javascript", "HTML", "CSS"]},
        "isActive": True
    })

# Insert into collection
db.users.insert_many(users_data)
print(f"Inserted {len(users_data)} users.")








Inserted 20 users.


Task 2, Step 2 to 6: VALIDATIONS

In [9]:
#Step 2: Courses Collection
#Courses need an instructorId from the list generated.

course_data = []
course_ids = []
categories = ["Programming", "Design", "Business", "Marketing"]

for i in range(8): #looping through 8 courses
    course_id = generate_id()
    course_ids.append(course_id)
    instructor_id = choice(instructor_ids) # Assigning a random instructor
    
    course_data.append({
        "courseId": course_id,
        "title": f"The Ultimate Course in {choice(categories)} {i+1}",
        "description": f"Learn everything about {categories[i % len(categories)]}!",
        "instructorId": instructor_id, # <--- Reference to users.userId
        "category": categories[i % len(categories)],
        "level": choice(["beginner", "intermediate", "advanced"]),
        "duration": randint(5, 50),
        "price": round(uniform(49.99, 199.99), 2),
        "tags": [categories[i % len(categories)].lower(), "2024", "online"],
        "createdAt": datetime.now() - timedelta(days=randint(30, 300)),
        "updatedAt": datetime.now(),
        "isPublished": choice([True, True, False]) 
    })

db.courses.insert_many(course_data)
print(f"Inserted {len(course_data)} courses.")


#Step 3: Enrollments Collection
#This links students to courses. At least 15 enrollments.


enrollment_data = []
course_student_pairs = set()
enrollment_count = 0


while enrollment_count < 15: # Ensure at least 15 unique enrollments
    student_id = choice(student_ids) # Random student
    course_id = choice(course_ids) # Random course
    
    # Ensures a student doesn't enroll in the same course twice
    if (student_id, course_id) not in course_student_pairs: #loop checks for unique pairs
        course_student_pairs.add((student_id, course_id)) #then adds it to the set
        enrollment_count += 1 # Increment counts only on unique enrollment
        
        enrollment_data.append({ #enrollment data to be looped through
            "enrollmentId": generate_id(),
            "studentId": student_id, # <--- Reference to users.userId
            "courseId": course_id,   # <--- Reference to courses.courseId
            "enrollmentDate": datetime.now() - timedelta(days=randint(7, 90)),
            "completionStatus": choice(["in_progress", "completed", "in_progress"]),
            "lastAccessed": datetime.now() - timedelta(hours=randint(1, 48)),
            "progressPercentage": randint(0, 100)
        })

db.enrollments.insert_many(enrollment_data)
print(f"Inserted {len(enrollment_data)} enrollments.")


#Step 4: Lessons Collection
#Lessons belong to courses. We need at least 25 lessons total.


lesson_data = [] #empty list to hold lesson data
lesson_map = {} # Map courseId to a list of lessonIds

for course_id in course_ids: #loop to go through each course

    # Each course gets 3-5 lessons
    num_lessons = randint(3, 5)
    lesson_map[course_id] = []
    for i in range(num_lessons): #loop to create lessons for each course
        lesson_id = generate_id() #generate unique lesson ID
        lesson_map[course_id].append(lesson_id) # Add to map for reference
        
        lesson_data.append({
            "lessonId": lesson_id,
            "courseId": course_id, # <--- Reference to courses.courseId
            "title": f"Lesson {i+1}: Introduction to MongoDB and PyMongo",
            "contentLink": f"/content/{course_id}/{lesson_id}.mp4",
            "order": i + 1,
            "createdAt": datetime.now() - timedelta(days=randint(50, 200)),
            "isPublished": True
        })

db.lessons.insert_many(lesson_data)
print(f"Inserted {len(lesson_data)} lessons.")


#Step 5: Assignments Collection
#Assignments belong to courses (and optionally lessons). We need at least 10 assignments.

# Each assignment is linked to a course and optionally to a lesson within that course.
#empty list to hold assignment data and IDs
assignment_data = [] 
assignment_ids = []

for i in range(10): #loop to create 10 assignments
    course_id = choice(course_ids) # Random course for each assignment
    assignment_id = generate_id() #generate unique assignment ID
    assignment_ids.append(assignment_id) # Store for reference
    
    # Link to a specific lesson in the course (if any exist)
    lesson_id = choice(lesson_map.get(course_id, [None])) 
    
    assignment_data.append({
        "assignmentId": assignment_id,
        "courseId": course_id, # <--- Reference to courses.courseId
        "lessonId": lesson_id, # <--- Reference to lessons.lessonId (optional)
        "title": f"Project {i+1} - SQL Data Warehousing",
        "description": "Data Modeling and Medallion Architecture.",
        "maxGrade": 100, 
        "dueDate": datetime.now() + timedelta(days=randint(1, 14)), # Due in the next 1 to 14 days
        "createdAt": datetime.now() - timedelta(days=randint(10, 50)) # Created 10-50 days ago
    })

db.assignments.insert_many(assignment_data)
print(f"Inserted {len(assignment_data)} assignments.")


#Step 6: Submissions Collection
#Submissions link students to assignments. We need at least 12 submissions.

#empoty lists to hold submission data. Count starts from 0 because we will increment it
submission_data = []
submission_count = 0

# Use a subset of students and assignments for submissions
for assignment_id in random.sample(assignment_ids, k=6): # Select 6 random assignments. k here is a keyword argument
    for student_id in random.sample(student_ids, k=2):     # Assign 2 random students to each
        
        # We need the courseId from the assignment to ensure the student is enrolled 
        
    
        submission_count += 1
        if submission_count > 12:
            break # Break inner loop if we exceed 12 submissions. 

        submission_data.append({
            "submissionId": generate_id(),
            "assignmentId": assignment_id, # <--- Reference to assignments.assignmentId
            "studentId": student_id,       # <--- Reference to users.userId
            "submissionDate": datetime.now() - timedelta(days=randint(1, 7)),
            "submissionContent": f"Submitted file link for {assignment_id}",
            "grade": randint(60, 100) if choice([True, False]) else None, # Some graded, some not
            "feedback": "Great work on the aggregation!" if choice([True, False]) else None,
            "isGraded": True if choice([True, False]) else False
        })
    if submission_count > 12: 
        break # Break outer loop if we exceed 12 submissions. 

db.submissions.insert_many(submission_data)
print(f"Inserted {len(submission_data)} submissions.")


#Task 2.2: Data Relationships
#Verification


print("\n--- Verification Counts ---")
print(f"Total Users: {db.users.count_documents({})}")
print(f"Total Courses: {db.courses.count_documents({})}")
print(f"Total Enrollments: {db.enrollments.count_documents({})}")
print(f"Total Lessons: {db.lessons.count_documents({})}")
print(f"Total Assignments: {db.assignments.count_documents({})}")
print(f"Total Submissions: {db.submissions.count_documents({})}")


Inserted 8 courses.
Inserted 15 enrollments.
Inserted 38 lessons.
Inserted 10 assignments.
Inserted 12 submissions.

--- Verification Counts ---
Total Users: 20
Total Courses: 8
Total Enrollments: 15
Total Lessons: 38
Total Assignments: 10
Total Submissions: 12


Part 3: CRUD Operations

Task 3.1: Create Operations (Insert)

This involves inserting new documents into the collections.

In [10]:


#Task 3.1: Create Operations (Insert)
#
#a. Add a new student user

# Generate a new unique ID for the student
new_student_id = str(uuid.uuid4())[:8]

new_student = {
    "userId": new_student_id,
    "email": "new.student@example.com",
    "firstName": "Chiamaka",
    "lastName": "Adams",
    "role": "student",
    "dateJoined": datetime.now(),
    "profile": {"bio": "Just joined to learn Financial Analysis.", "skills": ["Finance"]},
    "isActive": True
}

# Insert the document
result = db.users.insert_one(new_student)
print(f"New student added with _id: {result.inserted_id}")
student_ids.append(new_student_id) # Add to tracking list

#b. Create a new course

# Generate a new unique ID for the course
new_course_id = str(uuid.uuid4())[:8]
instructor_for_new_course = choice(instructor_ids) # Picks a random instructor

new_course = {
    "courseId": new_course_id,
    "title": "Introduction to Financial Engineering with Python",
    "description": "A deep dive into Financial Data Modelling and Python.",
    "instructorId": instructor_for_new_course,
    "category": "Programming",
    "level": "intermediate",
    "duration": 40,
    "price": 149.99,
    "tags": ["Finance", "data-analysis", "python"],
    "createdAt": datetime.now(),
    "updatedAt": datetime.now(),
    "isPublished": False # Not published yet
}

result = db.courses.insert_one(new_course)
print(f"New course created with _id: {result.inserted_id}")
course_ids.append(new_course_id) # Appended to  tracking list


#c. Enroll a student in a course
#enrolling the new student just created (new_student_id) into the new course (new_course_id).


new_enrollment = {
    "enrollmentId": generate_id(),
    "studentId": new_student_id,      # Reference:  new student
    "courseId": new_course_id,        # Reference:  new course
    "enrollmentDate": datetime.now(),
    "completionStatus": "in_progress",
    "lastAccessed": datetime.now(),
    "progressPercentage": 0
}

result = db.enrollments.insert_one(new_enrollment)
print(f"New enrollment created with _id: {result.inserted_id}")

#d. Add a new lesson to an existing course
# adding a lesson to the new course created (new_course_id).


new_lesson = {
    "lessonId": generate_id(),
    "courseId": new_course_id, # Reference: The new course
    "title": "Lesson 1: PyMongo Connection and CRUD",
    "contentLink": f"/content/{new_course_id}/lesson_3.mp4",
    "order": 1,
    "createdAt": datetime.now(),
    "isPublished": True
}

result = db.lessons.insert_one(new_lesson)
print(f"New lesson added with _id: {result.inserted_id}")




New student added with _id: 68dfbfe315db193e039e42a7
New course created with _id: 68dfbfe315db193e039e42a8
New enrollment created with _id: 68dfbfe315db193e039e42a9
New lesson added with _id: 68dfbfe315db193e039e42aa


Task 3.2: Read Operations

---Querying uses the find() and find_one() methods with various query operators.---

In [11]:


#a. Finding all active students
#This query uses a simple equality match on the role and isActive fields.


active_students = db.users.find({
    "role": "student", 
    "isActive": True
}, {"firstName": 1, "lastName": 1, "email": 1, "_id": 0}) # Projection to select specific fields

print("\n--- Active Students (5 samples) ---")
pprint(list(active_students.limit(5)))

#b. Retrieve course details with instructor information
#an aggregation operation

#using new course we created for a specific example
target_course_id = new_course_id

pipeline = [
    # 1. Match operator for the specific course
    {"$match": {"courseId": target_course_id}},
    # 2. Join (lookup) the instructor details from the 'users' collection
    {"$lookup": {
        "from": "users",          # The collection to join
        "localField": "instructorId", # Field from the 'courses' collection
        "foreignField": "userId",     # Field from the 'users' collection
        "as": "instructor_details"    # The name of the new array field
    }},

    # 3. Unwind the instructor_details array 
    {"$unwind": "$instructor_details"},

    # 4. select the desired output fields, with project operator
    {"$project": {
        "_id": 0,
        "courseTitle": "$title",
        "category": 1,
        "price": 1,
        "instructorName": {"$concat": ["$instructor_details.firstName", " ", "$instructor_details.lastName"]},
        "instructorEmail": "$instructor_details.email"
    }}
]

course_with_instructor = db.courses.aggregate(pipeline)

print("\n--- Course Details with Instructor ---")
pprint(list(course_with_instructor))

#c. Get all courses in a specific category

target_category = "Programming"

programming_courses = db.courses.find({
    "category": target_category
}, {"courseId": 1, "title": 1, "price": 1, "_id": 0})

print(f"\n--- Courses in the '{target_category}' Category ---")
pprint(list(programming_courses))

#d. Find students enrolled in a particular course
#We need to query the collection for enrollments(gives us stidents details) and then use the resulting studentIds to look up the user details.

# Pick a popular course ID from the list
popular_course_id = choice(course_ids[:-1]) 
course_title = db.courses.find_one({"courseId": popular_course_id})['title']

# Aggregation Pipeline to find enrolled students
pipeline = [
    {"$match": {"courseId": popular_course_id}},
    {"$lookup": {
        "from": "users",
        "localField": "studentId",
        "foreignField": "userId",
        "as": "student_info"
    }},
    {"$unwind": "$student_info"},
    {"$project": {
        "_id": 0,
        "studentName": {"$concat": ["$student_info.firstName", " ", "$student_info.lastName"]},
        "enrollmentDate": 1,
        "progressPercentage": 1
    }}
]

enrolled_students = db.enrollments.aggregate(pipeline)

print(f"\n--- Students Enrolled in '{course_title}' ---")
pprint(list(enrolled_students))

#e. Search courses by title (case-insensitive, partial match)
#This uses, $regex, and the options flag, $options: 'i' (for case-insensitive).

search_term = "ultimate"

search_query = {
    "title": {
        "$regex": search_term, 
        "$options": "i" # 'i' stands for case-insensitive
    }
}

matching_courses = db.courses.find(search_query, {"title": 1, "category": 1, "_id": 0})

print(f"\n--- Courses Matching '{search_term}' (case-insensitive) ---")
pprint(list(matching_courses))


--- Active Students (5 samples) ---
[{'email': 'student1@eduhub.com',
  'firstName': 'StudName1',
  'lastName': 'StudLast1'},
 {'email': 'student2@eduhub.com',
  'firstName': 'StudName2',
  'lastName': 'StudLast2'},
 {'email': 'student3@eduhub.com',
  'firstName': 'StudName3',
  'lastName': 'StudLast3'},
 {'email': 'student4@eduhub.com',
  'firstName': 'StudName4',
  'lastName': 'StudLast4'},
 {'email': 'student5@eduhub.com',
  'firstName': 'StudName5',
  'lastName': 'StudLast5'}]

--- Course Details with Instructor ---
[{'category': 'Programming',
  'courseTitle': 'Introduction to Financial Engineering with Python',
  'instructorEmail': 'instructor1@eduhub.com',
  'instructorName': 'InstName1 InstLast1',
  'price': 149.99}]

--- Courses in the 'Programming' Category ---
[{'courseId': 'a9fa29fb',
  'price': 121.79,
  'title': 'The Ultimate Course in Programming 1'},
 {'courseId': 'ff5e0f2c',
  'price': 195.54,
  'title': 'The Ultimate Course in Design 5'},
 {'courseId': '30dd26a8',
  

Task 3.3: Update Operations

This uses the update_one() or update_many() methods along with $set (to replace a field's value) and $push (to add an element to an array).

In [12]:


#a. Update a user’s profile information. update the new student created.


# Use $set to update multiple fields
update_result = db.users.update_one(
    {"userId": new_student_id},
    {"$set": {
        "profile.bio": "Advanced MongoDB user now!",
        "profile.avatar": "new_avatar.jpg",
        "updatedAt": datetime.now() #datetime.now returns real-time date& time
    }}
)
print(f"\nUpdated User Profile: Matched {update_result.matched_count}, Modified {update_result.modified_count}")

#b. Mark a course as published

#updating the new course created.

update_result = db.courses.update_one(
    {"courseId": new_course_id},
    {"$set": {
        "isPublished": True,
        "updatedAt": datetime.now()
    }}
)
print(f"Marked Course as Published: Matched {update_result.matched_count}, Modified {update_result.modified_count}")

#c. Update assignment grades
#finding an 'un-graded submission' and update its grade.

# Find the first submission that is not yet graded
submission_to_grade = db.submissions.find_one({"isGraded": False})

if submission_to_grade:
    submission_id = submission_to_grade['submissionId']
    
    update_result = db.submissions.update_one(
        {"submissionId": submission_id},
        {"$set": {
            "grade": 88,
            "feedback": "Good attempt!",
            "isGraded": True
        }}
    )
    print(f"Updated Submission Grade for ID {submission_id}: Modified {update_result.modified_count}")
else:
    print("Could not find an un-graded submission to update.")

#d. Add tags to an existing course
#This uses the $push operator to add an item to an array field.

course_to_tag = choice(course_ids) # this picks ick a random course
update_result = db.courses.update_one(
    {"courseId": course_to_tag},
    {"$push": {
        "tags": "new-curriculum"
    }}
)
print(f"Added tag to course {course_to_tag}: Modified {update_result.modified_count}")


Updated User Profile: Matched 1, Modified 1
Marked Course as Published: Matched 1, Modified 1
Updated Submission Grade for ID f0f3f2bf: Modified 1
Added tag to course 30dd26a8: Modified 1


Task 3.4: Delete Operations

#Deletion uses update_one() for soft deletes and delete_one() for hard deletes.

In [13]:


#a. Remove a user (soft delete by setting isActive to false)
#softly "delete" the student just created.


update_result = db.users.update_one(
    {"userId": new_student_id},
    {"$set": {
        "isActive": False,
        "deactivatedAt": datetime.now()
    }}
)
print(f"\nSoft Deleted User: Matched {update_result.matched_count}, Modified {update_result.modified_count}")

#b. Delete an enrollment (Hard Delete)


delete_result = db.enrollments.delete_one(
    {"studentId": new_student_id, "courseId": new_course_id}
)
print(f"Deleted Enrollment: {delete_result.deleted_count} document(s) deleted.")

#c. Remove a lesson from a course (Hard Delete)


delete_result = db.lessons.delete_one(
    {"lessonId": new_lesson['lessonId']}
)
print(f"Deleted Lesson: {delete_result.deleted_count} document(s) deleted.")



Soft Deleted User: Matched 1, Modified 1
Deleted Enrollment: 1 document(s) deleted.
Deleted Lesson: 1 document(s) deleted.
