# E-LEARNING PLATFORM DATABASE


## CREATING DATABASE


In [15]:
!pip install Faker



In [16]:
from pymongo import MongoClient
from datetime import datetime, timedelta
import pandas as pd
from faker import Faker 
fake = Faker()
import os
import json
import random
import time

In [17]:
# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client['eduhub_db']
print("Connected to MongoDB successfully!")
print(f"Database: {db.name}")


Connected to MongoDB successfully!
Database: eduhub_db


## CREATING COLLECTIONS WITH VALIDATIONS

In [18]:
# Droping existing collections for clean start if exists
for collection in ['users', 'courses', 'enrollments', 'lessons', 'assignments', 'submissions']:
    db[collection].drop()

#Users Collection with Validation
users_validator = {
    '$jsonSchema': {
        'bsonType': 'object',
        'required': ['userId', 'email', 'firstName', 'lastName', 'role', 'dateJoined', 'isActive'],
        'properties': {
            'userId': {'bsonType': 'string'},
            'email': {
                'bsonType': 'string',
                'pattern': '^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$'
            },
            'firstName': {'bsonType': 'string', 'minLength': 1},
            'lastName': {'bsonType': 'string', 'minLength': 1},
            'role': {'enum': ['student', 'instructor']},
            'dateJoined': {'bsonType': 'date'},
            'isActive': {'bsonType': 'bool'},
            'profile': {
                'bsonType': 'object',
                'properties': {
                    'bio': {'bsonType': 'string'},
                    'avatar': {'bsonType': 'string'},
                    'skills': {
                        'bsonType': 'array',
                        'items': {'bsonType': 'string'}
                    }
                }
            }
        }
    }
}

try:
    db.create_collection('users', validator=users_validator)
    print("Users collection created")
except Exception as e:
    print(f"Error creating 'users' collection: {e}")



# Courses Collection with Validation
courses_validator = {
    '$jsonSchema': {
        'bsonType': 'object',
        'required': [
            'courseId', 'title', 'instructorId',
            'level', 'isPublished', 'createdAt'
        ],
        'properties': {
            '_id': {'bsonType': 'objectId'},
            'courseId': {'bsonType': 'string'},
            'title': {'bsonType': 'string', 'minLength': 1},
            'description': {'bsonType': 'string'},
            'instructorId': {'bsonType': 'string'},  
            'category': {'bsonType': 'string'},
            'level': {'enum': ['beginner', 'intermediate', 'advanced']},
            'duration': {'bsonType': 'number', 'minimum': 0},  
            'price': {'bsonType': 'number', 'minimum': 0},
            'tags': {
                'bsonType': 'array',
                'items': {'bsonType': 'string'}
            },
            'createdAt': {'bsonType': 'date'},
            'updatedAt': {'bsonType': 'date'},
            'isPublished': {'bsonType': 'bool'}
        }
    }
}

try:
    db.create_collection('courses', validator=courses_validator)
    print("Courses collection created")
except Exception as e:
    print(f"Error creating 'courses' collection: {e}")



#Enrollment Collection with Validation
enrollments_validator = {
    '$jsonSchema': {
        'bsonType': 'object',
        'required': ['enrollmentId', 'studentId', 'courseId', 'enrolledAt', 'status'],
        'properties': {
            '_id': {'bsonType': 'objectId'},
            'enrollmentId': {'bsonType': 'string'},
            'studentId': {'bsonType': 'string'},
            'courseId': {'bsonType': 'string'},
            'enrolledAt': {'bsonType': 'date'},
            'status': {'enum': ['active', 'completed', 'dropped']},
            'progress': {'bsonType': 'number', 'minimum': 0, 'maximum': 100},
            'completedAt': {'bsonType': ['date', 'null']},
            'lastAccessedAt': {'bsonType': 'date'}
        }
    }
}

try:
    db.create_collection('enrollments', validator=enrollments_validator)
    print("Enrollments collection created")
except Exception as e:
    print(f"Error creating 'enrollments' collection: {e}")



#Lesson Collection with Validation
lessons_validator = {
    '$jsonSchema': {
        'bsonType': 'object',
        'required': ['lessonId', 'courseId', 'title', 'content', 'order'],
        'properties': {
            '_id': {'bsonType': 'objectId'},
            'lessonId': {'bsonType': 'string'},
            'courseId': {'bsonType': 'string'},  # reference to courses
            'title': {'bsonType': 'string', 'minLength': 1},
            'content': {'bsonType': 'string'},
            'order': {'bsonType': 'number', 'minimum': 1}, # lesson sequence
            'resources': {
                'bsonType': 'array',
                'items': {'bsonType': 'string'}  # URLs or file links
            },
            'createdAt': {'bsonType': 'date'},
            'updatedAt': {'bsonType': 'date'}
        }
    }
}

try:
    db.create_collection('lessons', validator=lessons_validator)
    print("Lessons collection created")
except Exception as e:
    print(f"Error creating 'lessons' collection: {e}")



#Assignment Collection with Validation
assignments_validator = {
    '$jsonSchema': {
        'bsonType': 'object',
        'required': ['assignmentId', 'courseId', 'title', 'description', 'dueDate'],
        'properties': {
            '_id': {'bsonType': 'objectId'},
            'assignmentId': {'bsonType': 'string'},
            'courseId': {'bsonType': 'string'},  # reference to course
            'title': {'bsonType': 'string', 'minLength': 1},
            'description': {'bsonType': 'string'},
            'dueDate': {'bsonType': 'date'},
            'createdAt': {'bsonType': 'date'},
            'updatedAt': {'bsonType': 'date'},
            'maxScore': {'bsonType': 'number', 'minimum': 0}
        }
    }
}

try:
    db.create_collection('assignments', validator=assignments_validator)
    print("Assignments collection created")
except Exception as e:
    print(f"Error creating 'assignments' collection: {e}")



#Submission Collection with Validation
submissions_validator = {
    '$jsonSchema': {
        'bsonType': 'object',
        'required': ['submissionId', 'assignmentId', 'studentId', 'submittedAt'],
        'properties': {
            '_id': {'bsonType': 'objectId'},
            'submissionId': {'bsonType': 'string'},
            'assignmentId': {'bsonType': 'string'},
            'studentId': {'bsonType': 'string'},
            'courseId' : {'bsonType': 'string'},
            'submittedAt': {'bsonType': 'date'},
            'content': {'bsonType': 'string'},
            'fileUrl': {'bsonType': 'string'},
            'grade': {
                'bsonType': ['number', 'null'],
                'minimum': 0
            },
            'feedback': {'bsonType': 'string'},
            'gradedAt': {'bsonType': ['date', 'null']},
            'gradedBy': {'bsonType': 'string'},
            'status': {
                'enum': ['submitted', 'graded', 'returned']
            }
        }
    }
}

try:
    db.create_collection('submissions', validator=submissions_validator)
    print("Submissions collection created")
except Exception as e:
    print(f"Error creating 'submissions' collection: {e}")


print("\nAll collections created successfully!")
print(f"Collections: {db.list_collection_names()}")


Users collection created
Courses collection created
Enrollments collection created
Lessons collection created
Assignments collection created
Submissions collection created

All collections created successfully!
Collections: ['users', 'assignments', 'enrollments', 'courses', 'submissions', 'lessons']


## UNIQUE INDEXES FOR COLLECTION

In [19]:
# Enforce uniqueness on Collection
collections_indexes = {
    "users": ["userId", "email"],
    "courses": ["courseId"],
    "enrollments": ["enrollmentId"],
    "lessons": ["lessonId"],
    "assignments": ["assignmentId"],
    "submissions": ["submissionId"]
}

for collection_name, fields in collections_indexes.items():
    if collection_name in db.list_collection_names():
        collection = db[collection_name]
        for field in fields:
            try:
                collection.create_index(field, unique=True)
                print(f"Unique index created on {collection_name}.{field}")
            except Exception as e:
                print(f"Could not create index on {collection_name}.{field}: {e}")
    else:
        print(f"Collection {collection_name} does not exist")

Unique index created on users.userId
Unique index created on users.email
Unique index created on courses.courseId
Unique index created on enrollments.enrollmentId
Unique index created on lessons.lessonId
Unique index created on assignments.assignmentId
Unique index created on submissions.submissionId


## INSERTING DATA INTO COLLECTIONS

### Clearing Collection Before Inserting Data to Avoid Duplicates 

In [20]:
#Clearing collections before inserting
db.users.delete_many({})
db.courses.delete_many({})
db.enrollments.delete_many({})
db.lessons.delete_many({})
db.assignments.delete_many({})
db.submissions.delete_many({})

print("All collections cleared!")


All collections cleared!


### Inserting Users Data

In [21]:
# Inserting  20 Users Data
users_data = []

#Creating 5 Instructors data
instructors = []
instructor_skills = [
    'Python', 'JavaScript', 'Data Science', 'AI', 'Cloud Computing', 
    'Web Development', 'Machine Learning', 'DevOps', 'React', 'Node.js'
]

instructor_bios = [
    "Passionate about teaching Python and building scalable systems.",
    "Expert in AI and Data Science with 10+ years of experience.",
    "Focused on modern web development with React and Node.js.",
    "Loves mentoring students in cloud technologies and DevOps.",
    "Dedicated to simplifying Machine Learning concepts for everyone."
]

for i in range(1, 6):
    first_name = fake.first_name()
    last_name = fake.last_name()
    
    instructor = {
        'userId': f'INST{i:03d}',
        'email': fake.email(),
        'firstName': first_name,
        'lastName': last_name,
        'role': 'instructor',
        'dateJoined': fake.date_time_between(start_date='-2y', end_date='-1y'),
        'profile': {
            'bio': random.choice(instructor_bios),
            'avatar': fake.image_url(),
            'skills': random.sample(instructor_skills, k=random.randint(3, 5))
        },
        'isActive': random.choice([True, False])
    }
    instructors.append(instructor['userId'])
    users_data.append(instructor)

print(f"Created {len(instructors)} instructors: {instructors}")


# Creating 15 students data
students = []
student_bios = [
    "Aspiring data analyst passionate about turning numbers into insights.",
    "Interested in web development and improving coding skills.",
    "Motivated learner exploring AI and machine learning concepts.",
    "Excited about solving problems with Python and SQL.",
    "Building strong foundations in statistics and data visualization.",
    "Curious about cloud computing and modern tech tools.",
    "Focused on improving programming and problem-solving skills.",
    "Passionate about mathematics and applying it in real-world projects.",
    "Exploring career opportunities in technology and analytics.",
    "Eager to learn new skills in data science and business intelligence."
]

for i in range(1, 16):
    first_name = fake.first_name()
    last_name = fake.last_name()
    
    student = {
        'userId': f'STU{i:03d}',
        'email': fake.email(),
        'firstName': first_name,
        'lastName': last_name,
        'role': 'student',
        'dateJoined': fake.date_time_between(start_date='-1y', end_date='now'),
        'profile': {
            'bio': random.choice(student_bios),
            'avatar': fake.image_url(),
            'skills': random.sample(instructor_skills, k=random.randint(0, 3))
        },
        'isActive': random.choice([True, False])
    }
    students.append(student['userId'])
    users_data.append(student)
print(f"Created {len(students)} students")


# Insert all 20 users with error handling
try:
    result = db.users.insert_many(users_data)
    print(f"\nInserted {len(result.inserted_ids)} users into database")
    print(f"   - Instructors: {len(instructors)}")
    print(f"   - Students: {len(students)}")
except Exception as e:
    print(f"An error occurred during insertion: {e}")

# Save to JSON file with error handling
output_folder = "../data"
os.makedirs(output_folder, exist_ok=True)

try:
    with open(os.path.join(output_folder, "users.json"), "w") as f:
        json.dump(users_data, f, indent=4, default=str)
    print("Users saved to JSON file successfully")
except Exception as e:
    print(f"An error occurred while saving users to JSON: {e}")


Created 5 instructors: ['INST001', 'INST002', 'INST003', 'INST004', 'INST005']
Created 15 students

Inserted 20 users into database
   - Instructors: 5
   - Students: 15
Users saved to JSON file successfully


### Inserting Courses Data

In [22]:
category_content = {
    "Data Science": {
        "titles": [
            "Introduction to Data Science",
            "Python for Data Analysis",
            "Machine Learning for Beginners"
        ],
        "descriptions": [
            "Learn the fundamentals of data science, from cleaning datasets to building predictive models.",
            "Master Python libraries such as Pandas, NumPy, and Matplotlib for real-world data tasks.",
            "Understand supervised and unsupervised learning with practical machine learning examples."
        ],
        "tags": [["data", "python", "analytics"], ["machinelearning", "ai", "statistics"], ["visualization", "modeling", "pandas"]]
    },
    "Web Development": {
        "titles": [
            "Frontend Development with React",
            "Backend Development with Node.js",
            "Full-Stack Web Development"
        ],
        "descriptions": [
            "Build responsive user interfaces using React and modern JavaScript tools.",
            "Learn backend principles, APIs, and server management with Node.js and Express.",
            "Master full-stack skills by building complete, scalable web applications."
        ],
        "tags": [["react", "javascript", "frontend"], ["nodejs", "express", "backend"], ["fullstack", "webdev", "coding"]]
    },
    "Cloud Computing": {
        "titles": [
            "Cloud Computing with AWS",
            "DevOps Fundamentals",
            "Serverless Applications on Cloud"
        ],
        "descriptions": [
            "Understand cloud architecture and work hands-on with AWS services.",
            "Learn automation, CI/CD pipelines, and containerization for efficient deployments.",
            "Explore serverless computing and how to build applications without managing servers."
        ],
        "tags": [["cloud", "aws", "infrastructure"], ["devops", "automation", "ci/cd"], ["serverless", "lambda", "scalable"]]
    }
}

levels = ["beginner", "intermediate", "advanced"]

courses = []
for i in range(8):
    category = random.choice(list(category_content.keys()))
    content = category_content[category]

    title = random.choice(content["titles"])
    description = random.choice(content["descriptions"])
    tags = random.choice(content["tags"])

    courses.append({
        "courseId": f"CRS{i+1:03d}",
        "title": title,
        "description": description,
        "instructorId": random.choice(instructors),
        "category": category,
        "level": random.choice(levels),
        "duration": random.randint(5, 100),  # in hours
        "price": round(random.uniform(10, 200), 2),
        "tags": tags,
        "createdAt": datetime.now() - timedelta(days=random.randint(1, 365)),
        "updatedAt": datetime.now(),
        "isPublished": random.choice([True, False])
    })
    


# Insert courses with error handling
try:
    result = db.courses.insert_many(courses)
    print(f"Inserted {len(result.inserted_ids)} courses")
except Exception as e:
    print(f"An error occurred while inserting courses: {e}")



# Save courses to JSON file with error handling
try:
    with open(os.path.join(output_folder, "courses.json"), "w") as f:
        json.dump(courses, f, indent=4, default=str)
    print("Courses saved to JSON file successfully")
except Exception as e:
    print(f"An error occurred while saving courses to JSON: {e}")



# Collect IDs safely
try:
    students_ids = [u["userId"] for u in users_data if u["role"] == "student"]
    course_ids = [c["courseId"] for c in courses]
    print("Collected student and course IDs successfully")
except Exception as e:
    print(f"An error occurred while collecting IDs: {e}")

Inserted 8 courses
Courses saved to JSON file successfully
Collected student and course IDs successfully


### Inserting Enrollment Data

In [23]:
#Inserting 15 Enrollment Data
statuses = ["active", "completed", "dropped"]
enrollments = []

for i in range(15):
    status_value = random.choice(statuses)
    
    # Set completedAt only if status is 'completed'
    if status_value == "completed":
        completed_at_value = datetime.now() - timedelta(days=random.randint(1, 30))
    else:
        completed_at_value = None

    enrollment = {
        "enrollmentId": f"ENR{i+1:03d}",
        "studentId": random.choice(students_ids),
        "courseId": random.choice(course_ids),
        "enrolledAt": fake.date_time_between(start_date="-1y", end_date="now"),
        "status": status_value,
        "progress": random.randint(0, 100),
        "completedAt": completed_at_value,
        "lastAccessedAt": datetime.now()
    }
    
    enrollments.append(enrollment)


# Insert enrollments with error handling
try:
    result = db.enrollments.insert_many(enrollments)
    print(f"Inserted {len(result.inserted_ids)} enrollments")
except Exception as e:
    print(f"An error occurred while inserting enrollments: {e}")



# Save enrollments to JSON file with error handling
try:
    with open(os.path.join(output_folder, "enrollments.json"), "w") as f:
        json.dump(enrollments, f, indent=4, default=str)
    print("Enrollments saved to JSON file successfully")
except Exception as e:
    print(f"An error occurred while saving enrollments to JSON: {e}")


Inserted 15 enrollments
Enrollments saved to JSON file successfully


### Inserting Lessons Data

In [24]:
# Inserting 25 Lessons data
lesson_titles = [
    "Introduction to Programming Concepts",
    "Understanding Data Structures in Python",
    "Building Your First Web Application",
    "Exploring Machine Learning Basics",
    "Working with SQL Databases",
    "Version Control with Git and GitHub",
    "Advanced Functions and Modules in Python",
    "Creating Interactive Dashboards in Power BI",
    "Object-Oriented Programming Explained",
    "Data Cleaning and Preparation Techniques",
    "Visualization with Matplotlib and Seaborn",
    "APIs and Data Integration Essentials",
    "Deploying Machine Learning Models",
    "Time Series Forecasting Fundamentals",
    "Building RESTful APIs with Flask",
    "Error Handling and Debugging Strategies",
    "Introduction to Cloud Computing",
    "Basics of Frontend Development with React",
    "Introduction to Data Analytics Workflow",
    "Final Project and Capstone Overview"
]

lesson_contents = [
    "This lesson provides an overview of key concepts and sets the foundation for the course.",
    "Students will learn how to structure data efficiently using arrays, lists, and dictionaries.",
    "We will build a simple web application step by step to demonstrate practical implementation.",
    "This session introduces supervised and unsupervised machine learning techniques.",
    "Learn how to design, query, and manage SQL databases for analytics.",
    "Version control is essential for collaboration; this lesson covers Git basics and workflows.",
    "Deep dive into functions, modules, and best practices for writing clean code.",
    "Gain practical experience creating dashboards and reports in Power BI.",
    "Understand the core principles of object-oriented programming with examples.",
    "Learn techniques to clean, prepare, and validate raw data before analysis.",
    "This lesson teaches visualization best practices with real datasets.",
    "Learn how to work with APIs and integrate external data into projects.",
    "Explore techniques for deploying ML models to production environments.",
    "An introduction to time series data and forecasting methods.",
    "Learn to design and build REST APIs using Flask in Python.",
    "Techniques for handling errors, debugging, and writing robust applications.",
    "Introduction to cloud platforms and their use in modern applications.",
    "Learn the basics of frontend web development with React.",
    "This session provides a complete walkthrough of the analytics process.",
    "Overview of final project requirements and capstone preparation."
]

# Zip titles and contents into one list
lesson_data = list(zip(lesson_titles, lesson_contents))

# Generate 25 lessons randomly
lessons = []
for i in range(25):
    title, content = random.choice(lesson_data)  # pick random pair
    lessons.append({
        "lessonId": f"LES{i+1:03d}",
        "courseId": random.choice(course_ids),
        "title": title,
        "content": content,
        "order": i + 1,
        "resources": [fake.url() for _ in range(random.randint(0, 2))],
        "createdAt": datetime.now() - timedelta(days=random.randint(1, 100)),
        "updatedAt": datetime.now()
    })



# Insert lessons with error handling
try:
    result = db.lessons.insert_many(lessons)
    print(f"Inserted {len(result.inserted_ids)} lessons")
except Exception as e:
    print(f"An error occurred while inserting lessons: {e}")



# Save lessons to JSON file with error handling
try:
    with open(os.path.join(output_folder, "lessons.json"), "w") as f:
        json.dump(lessons, f, indent=4, default=str)
    print("Lessons saved to JSON file successfully")
except Exception as e:
    print(f"An error occurred while saving lessons to JSON: {e}")

Inserted 25 lessons
Lessons saved to JSON file successfully


### Inserting Assignment data

In [25]:
# Inserting 10 Assignments data
assignments = []
descriptions = [
    "Analyze the given dataset and provide insights.",
    "Create a Python script to automate the task.",
    "Build a dashboard using Power BI to visualize sales data.",
    "Perform data cleaning and preprocessing on the dataset.",
    "Write a report on the findings of the market analysis.",
    "Develop a predictive model using machine learning techniques.",
    "Prepare a presentation summarizing the key insights.",
    "Conduct statistical analysis and interpret the results.",
    "Explore and visualize trends in the provided dataset.",
    "Implement an algorithm to solve the given problem."
]

for i in range(10):
    assignments.append({
        "assignmentId": f"ASM{i+1:03d}",
        "courseId": random.choice(course_ids),
        "title": f"Assignment {i+1}",
        "description": random.choice(descriptions),
        "dueDate": datetime.now() + timedelta(days=random.randint(5, 30)),
        "createdAt": datetime.now() - timedelta(days=random.randint(1, 30)),
        "updatedAt": datetime.now(),
        "maxScore": 100
    })



# Insert assignments with error handling
try:
    result = db.assignments.insert_many(assignments)
    print(f"Inserted {len(result.inserted_ids)} assignments")
except Exception as e:
    print(f"An error occurred while inserting assignments: {e}")



# Save assignments to JSON file with error handling
try:
    with open(os.path.join(output_folder, "assignments.json"), "w") as f:
        json.dump(assignments, f, indent=4, default=str)
    print("Assignments saved to JSON file successfully")
except Exception as e:
    print(f"An error occurred while saving assignments to JSON: {e}")


# Collect assignment IDs
assignment_ids = [a["assignmentId"] for a in assignments]

Inserted 10 assignments
Assignments saved to JSON file successfully


### Inserting Submission Data

In [None]:
submissions = []
submission_contents = [
    "Completed assignment with detailed explanations.",
    "Attached my solution file with step-by-step answers.",
    "Submitted the project report for review.",
    "Here is my code implementation and documentation.",
    "Final version of my essay submission.",
    "Uploaded analysis with supporting charts and graphs.",
    "Included all calculations and reasoning in the file.",
    "Attached the completed case study as requested.",
    "My solution with additional references included.",
    "Report and summary of my findings are attached."
]

# Force 8 graded, 2 submitted, 2 returned

statuses = ["graded"] * 8 + ["submitted"] * 2 + ["returned"] * 2
random.shuffle(statuses)

for i in range(12):
    status = statuses[i]
    grade, gradedAt, feedback, gradedBy = None, None, "", ""

    if status == "graded":
        grade = random.randint(50, 100)
        feedback = random.choice([
            "Great effort!",
            "Needs improvement in clarity.",
            "Excellent work.",
            "Check your calculations again.",
            "Well structured."
        ])
        gradedAt = datetime.now() - timedelta(days=random.randint(1, 5))
        gradedBy = random.choice(instructors)

    elif status == "returned":
        feedback = random.choice([
            "Please revise and resubmit.",
            "Missing important details.",
            "Recheck formatting requirements.",
            "Work is incomplete, try again."
        ])
        gradedAt = datetime.now() - timedelta(days=random.randint(1, 5))
        gradedBy = random.choice(instructors)

    elif status == "submitted":
        feedback = random.choice([
            "Submission received, pending grading.",
            "Your work has been submitted successfully.",
            "Assignment received, awaiting review."
        ])
        gradedBy = ""  

    submissions.append({
        "submissionId": f"SUB{i+1:03d}",
        "assignmentId": random.choice(assignment_ids),
        "studentId": random.choice(students_ids),
        "courseId" : random.choice(course_ids),
        "submittedAt": datetime.now() - timedelta(days=random.randint(1, 10)),
        "content": random.choice(submission_contents),
        "fileUrl": fake.url(),
        "grade": grade,
        "feedback": feedback,
        "gradedAt": gradedAt,
        "gradedBy": gradedBy,
        "status": status
    })



# Insert submissions with error handling
try:
    result = db.submissions.insert_many(submissions)
    print(f"Inserted {len(result.inserted_ids)} submissions")
except Exception as e:
    print(f"An error occurred while inserting submissions: {e}")



# Save submissions to JSON file with error handling
try:
    with open(os.path.join(output_folder, "submissions.json"), "w") as f:
        json.dump(submissions, f, indent=4, default=str)
    print("Submissions saved to JSON file successfully")
except Exception as e:
    print(f"An error occurred while saving submissions to JSON: {e}")

Inserted 12 submissions
Submissions saved to JSON file successfully
