In [None]:
from pymongo import MongoClient
from datetime import datetime


#Part 1
# Establishing connection
client = MongoClient('mongodb://localhost:27017/')
db = client['eduhub_db']

#schema validation. refers to 6-1
user_validator = {
    "$jsonSchema": {
        "bsonType": "object",
        "required": ["userId", "email", "firstName", "lastName", "role", "dateJoined", "isActive"],
        "properties": {
            "email": {"bsonType": "string", "pattern": "^.+@.+\\..+$"},
            "role": {"enum": ["student", "instructor"]},
            "dateJoined": {"bsonType": "date"},
            "isActive": {"bsonType": "bool"}
        }
    }
}

# --- CREATE COLLECTION ---
# Try/except block runs if the collection already exists
try:
    db.create_collection('users', validator=user_validator)
    print("Collection 'users' created with validation.")
except Exception as e:
    # If the collection exists, MongoDB often raises an error on re-creation.
    print(f"Collection 'users' already exists or validation error: {e}")

# Create other collections 
for col_name in ['courses', 'enrollments', 'lessons', 'assignments', 'submissions']:
    if col_name not in db.list_collection_names():
        db.create_collection(col_name)

Collection 'users' created with validation.


In [6]:
import uuid
import random
from datetime import datetime, timedelta
from pymongo import MongoClient
from random import choice, randint, uniform
from pprint import pprint

#Part 2
# Establish connection 
client = MongoClient('mongodb://localhost:27017/')
db = client['eduhub_db']
print("Connected to eduhub_db")

#Task 2.1: Insert Sample Data
# generate and insert at least 20 users (students/instructors),
#  8 courses, 15 enrollments, 25 lessons, 10 assignments, and 12 submissions.

#Step 1: Users Collection
#mix of students and instructors via genereating unique IDs

# Helper function to generate unique IDs
def generate_id():
    return str(uuid.uuid4())[:8] 

users_data = []
instructor_ids = []
student_ids = []
user_count = 20

# Create Instructor Users (4 users)
for i in range(4):
    instructor_id = generate_id()
    instructor_ids.append(instructor_id)
    users_data.append({
        "userId": instructor_id,
        "email": f"instructor{i+1}@eduhub.com",
        "firstName": f"InstName{i+1}",
        "lastName": f"InstLast{i+1}",
        "role": "instructor",
        "dateJoined": datetime.now() - timedelta(days=randint(365, 730)), 
        "profile": {"bio": f"Experienced instructor in Subject {i+1}.", "skills": ["Python", "SQL", "Data Analysis"]},
        "isActive": True
    })

# Create Student Users (16 users)
for i in range(16):
    student_id = generate_id()
    student_ids.append(student_id)
    users_data.append({
        "userId": student_id,
        "email": f"student{i+1}@eduhub.com",
        "firstName": f"StudName{i+1}",
        "lastName": f"StudLast{i+1}",
        "role": "student",
        "dateJoined": datetime.now() - timedelta(days=randint(30, 365)), # Joined 1 month - 1 year ago
        "profile": {"bio": f"Eager to learn Subject {randint(1,4)}.", "skills": ["Javascript", "HTML", "CSS"]},
        "isActive": True
    })

# Insert into collection
db.users.insert_many(users_data)
print(f"Inserted {len(users_data)} users.")

#Step 2: Courses Collection
#Courses need an instructorId from the list generated.

course_data = []
course_ids = []
categories = ["Programming", "Design", "Business", "Marketing"]

for i in range(8): #looping through 8 courses
    course_id = generate_id()
    course_ids.append(course_id)
    instructor_id = choice(instructor_ids) # Assigning a random instructor
    
    course_data.append({
        "courseId": course_id,
        "title": f"The Ultimate Course in {choice(categories)} {i+1}",
        "description": f"Learn everything about {categories[i % len(categories)]}!",
        "instructorId": instructor_id, # <--- Reference to users.userId
        "category": categories[i % len(categories)],
        "level": choice(["beginner", "intermediate", "advanced"]),
        "duration": randint(5, 50),
        "price": round(uniform(49.99, 199.99), 2),
        "tags": [categories[i % len(categories)].lower(), "2024", "online"],
        "createdAt": datetime.now() - timedelta(days=randint(30, 300)),
        "updatedAt": datetime.now(),
        "isPublished": choice([True, True, False]) 
    })

db.courses.insert_many(course_data)
print(f"Inserted {len(course_data)} courses.")


#Step 3: Enrollments Collection
#This links students to courses. At least 15 enrollments.


enrollment_data = []
course_student_pairs = set()
enrollment_count = 0


while enrollment_count < 15: # Ensure at least 15 unique enrollments
    student_id = choice(student_ids) # Random student
    course_id = choice(course_ids) # Random course
    
    # Ensures a student doesn't enroll in the same course twice
    if (student_id, course_id) not in course_student_pairs: #loop checks for unique pairs
        course_student_pairs.add((student_id, course_id)) #then adds it to the set
        enrollment_count += 1 # Increment counts only on unique enrollment
        
        enrollment_data.append({ #enrollment data to be looped through
            "enrollmentId": generate_id(),
            "studentId": student_id, # <--- Reference to users.userId
            "courseId": course_id,   # <--- Reference to courses.courseId
            "enrollmentDate": datetime.now() - timedelta(days=randint(7, 90)),
            "completionStatus": choice(["in_progress", "completed", "in_progress"]),
            "lastAccessed": datetime.now() - timedelta(hours=randint(1, 48)),
            "progressPercentage": randint(0, 100)
        })

db.enrollments.insert_many(enrollment_data)
print(f"Inserted {len(enrollment_data)} enrollments.")


#Step 4: Lessons Collection
#Lessons belong to courses. We need at least 25 lessons total.


lesson_data = [] #empty list to hold lesson data
lesson_map = {} # Map courseId to a list of lessonIds

for course_id in course_ids: #loop to go through each course

    # Each course gets 3-5 lessons
    num_lessons = randint(3, 5)
    lesson_map[course_id] = []
    for i in range(num_lessons): #loop to create lessons for each course
        lesson_id = generate_id() #generate unique lesson ID
        lesson_map[course_id].append(lesson_id) # Add to map for reference
        
        lesson_data.append({
            "lessonId": lesson_id,
            "courseId": course_id, # <--- Reference to courses.courseId
            "title": f"Lesson {i+1}: Introduction to MongoDB and PyMongo",
            "contentLink": f"/content/{course_id}/{lesson_id}.mp4",
            "order": i + 1,
            "createdAt": datetime.now() - timedelta(days=randint(50, 200)),
            "isPublished": True
        })

db.lessons.insert_many(lesson_data)
print(f"Inserted {len(lesson_data)} lessons.")


#Step 5: Assignments Collection
#Assignments belong to courses (and optionally lessons). We need at least 10 assignments.

# Each assignment is linked to a course and optionally to a lesson within that course.
#empty list to hold assignment data and IDs
assignment_data = [] 
assignment_ids = []

for i in range(10): #loop to create 10 assignments
    course_id = choice(course_ids) # Random course for each assignment
    assignment_id = generate_id() #generate unique assignment ID
    assignment_ids.append(assignment_id) # Store for reference
    
    # Link to a specific lesson in the course (if any exist)
    lesson_id = choice(lesson_map.get(course_id, [None])) 
    
    assignment_data.append({
        "assignmentId": assignment_id,
        "courseId": course_id, # <--- Reference to courses.courseId
        "lessonId": lesson_id, # <--- Reference to lessons.lessonId (optional)
        "title": f"Project {i+1} - SQL Data Warehousing",
        "description": "Data Modeling and Medallion Architecture.",
        "maxGrade": 100, 
        "dueDate": datetime.now() + timedelta(days=randint(1, 14)), # Due in the next 1 to 14 days
        "createdAt": datetime.now() - timedelta(days=randint(10, 50)) # Created 10-50 days ago
    })

db.assignments.insert_many(assignment_data)
print(f"Inserted {len(assignment_data)} assignments.")


#Step 6: Submissions Collection
#Submissions link students to assignments. We need at least 12 submissions.

#empoty lists to hold submission data. Count starts from 0 because we will increment it
submission_data = []
submission_count = 0

# Use a subset of students and assignments for submissions
for assignment_id in random.sample(assignment_ids, k=6): # Select 6 random assignments. k here is a keyword argument
    for student_id in random.sample(student_ids, k=2):     # Assign 2 random students to each
        
        # We need the courseId from the assignment to ensure the student is enrolled 
        
    
        submission_count += 1
        if submission_count > 12:
            break # Break inner loop if we exceed 12 submissions. 

        submission_data.append({
            "submissionId": generate_id(),
            "assignmentId": assignment_id, # <--- Reference to assignments.assignmentId
            "studentId": student_id,       # <--- Reference to users.userId
            "submissionDate": datetime.now() - timedelta(days=randint(1, 7)),
            "submissionContent": f"Submitted file link for {assignment_id}",
            "grade": randint(60, 100) if choice([True, False]) else None, # Some graded, some not
            "feedback": "Great work on the aggregation!" if choice([True, False]) else None,
            "isGraded": True if choice([True, False]) else False
        })
    if submission_count > 12: 
        break # Break outer loop if we exceed 12 submissions. 

db.submissions.insert_many(submission_data)
print(f"Inserted {len(submission_data)} submissions.")


#Task 2.2: Data Relationships
#Verification


print("\n--- Verification Counts ---")
print(f"Total Users: {db.users.count_documents({})}")
print(f"Total Courses: {db.courses.count_documents({})}")
print(f"Total Enrollments: {db.enrollments.count_documents({})}")
print(f"Total Lessons: {db.lessons.count_documents({})}")
print(f"Total Assignments: {db.assignments.count_documents({})}")
print(f"Total Submissions: {db.submissions.count_documents({})}")




Connected to eduhub_db
Inserted 20 users.
Inserted 8 courses.
Inserted 15 enrollments.
Inserted 35 lessons.
Inserted 10 assignments.
Inserted 12 submissions.

--- Verification Counts ---
Total Users: 40
Total Courses: 16
Total Enrollments: 30
Total Lessons: 65
Total Assignments: 20
Total Submissions: 12
