# ADVANCED QUERIES AND AGGREGATION

In [1]:
import pandas as pd
from datetime import datetime, timedelta
from pymongo import MongoClient

# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client['eduhub_db']


In [2]:
print("Collections in eduhub:")
print(db.list_collection_names())

Collections in eduhub:
['assignments', 'enrollments', 'submissions', 'users', 'lessons', 'courses']


## COMPLEX QUERIES

### Courses with Price between $50 & $200

In [3]:
# Finding Courses with price btw $50 & &200
try:
    courses_in_price_range = db.courses.find(
        {"price": {"$gte": 50, "$lte": 200}},
        {"_id": 0, "courseId": 1, "title": 1, "price": 1}
    )
    print("Courses priced between $50 and $200:")
    for course in courses_in_price_range:
        print(course)

except Exception as e:
    print(f"An error occurred while fetching courses by price: {e}")

Courses priced between $50 and $200:
{'courseId': 'CRS001', 'title': 'Cloud Computing with AWS', 'price': 156.47}
{'courseId': 'CRS003', 'title': 'Frontend Development with React', 'price': 195.84}
{'courseId': 'CRS004', 'title': 'Full-Stack Web Development', 'price': 93.67}
{'courseId': 'CRS006', 'title': 'Serverless Applications on Cloud', 'price': 142.25}
{'courseId': 'CRS007', 'title': 'Python for Data Analysis', 'price': 68.43}
{'courseId': 'CRS008', 'title': 'Frontend Development with React', 'price': 183.76}
{'courseId': 'CRS012', 'title': 'Data Visualization with Power BI', 'price': 99.99}


### Users Who Joined In the Last 6 Month

In [4]:
# Finding users who join in thelast 6 month 
try:
    six_months_ago = datetime.now() - timedelta(days=6*30)  # approximate 6 months
    recent_users = db.users.find(
        {"dateJoined": {"$gte": six_months_ago}},
        {"_id": 0, "userId": 1, "firstName": 1, "lastName": 1, "dateJoined": 1}
    )
    print("\nUsers who joined in the last 6 months:")
    for user in recent_users:
        print(user)

except Exception as e:
    print(f"An error occurred while fetching recent users: {e}")


Users who joined in the last 6 months:
{'userId': 'STU002', 'firstName': 'Carrie', 'lastName': 'Williams', 'dateJoined': datetime.datetime(2025, 5, 10, 9, 13, 33, 974000)}
{'userId': 'STU004', 'firstName': 'Shane', 'lastName': 'Scott', 'dateJoined': datetime.datetime(2025, 6, 29, 23, 1, 46, 176000)}
{'userId': 'STU005', 'firstName': 'Elaine', 'lastName': 'Maldonado', 'dateJoined': datetime.datetime(2025, 9, 10, 8, 11, 11, 77000)}
{'userId': 'STU006', 'firstName': 'Mackenzie', 'lastName': 'Richardson', 'dateJoined': datetime.datetime(2025, 9, 6, 22, 5, 43, 652000)}
{'userId': 'STU007', 'firstName': 'Vickie', 'lastName': 'Wiley', 'dateJoined': datetime.datetime(2025, 4, 8, 22, 5, 43, 88000)}
{'userId': 'STU008', 'firstName': 'Amy', 'lastName': 'Alvarez', 'dateJoined': datetime.datetime(2025, 7, 10, 3, 2, 41, 773000)}
{'userId': 'STU010', 'firstName': 'Robin', 'lastName': 'Barnett', 'dateJoined': datetime.datetime(2025, 5, 16, 16, 14, 55, 319000)}
{'userId': 'STU015', 'firstName': 'Micha

### Courses With Specific Tags

In [5]:
# Finding Courses that Have Specific Tags using $in Operator
try:
    tags_to_find = ["Python", "Data Science", "AI"]
    courses_with_tags = db.courses.find(
        {"tags": {"$in": tags_to_find}},
        {"_id": 0, "courseId": 1, "title": 1, "tags": 1}
    )
    print("\nCourses with specific tags:")
    for course in courses_with_tags:
        print(course)

except Exception as e:
    print(f"An error occurred while fetching courses with tags: {e}")


Courses with specific tags:
{'courseId': 'CRS005', 'title': 'DevOps Fundamentals', 'tags': ['cloud', 'aws', 'infrastructure', 'AI', 'Data Science', 'Python']}


### Assignments Due Next Week

In [6]:
# Finding Assignmenst due next week
try:
    one_week_from_now = datetime.now() + timedelta(days=7)
    upcoming_assignments = db.assignments.find(
        {"dueDate": {"$lte": one_week_from_now, "$gte": datetime.now()}},
        {"_id": 0, "assignmentId": 1, "title": 1, "dueDate": 1}
    )
    print("\nAssignments due in the next week:")
    for assignment in upcoming_assignments:
        print(assignment)

except Exception as e:
    print(f"An error occurred while fetching upcoming assignments: {e}")


Assignments due in the next week:
{'assignmentId': 'ASM006', 'title': 'Assignment 6', 'dueDate': datetime.datetime(2025, 10, 8, 3, 51, 48, 504000)}
{'assignmentId': 'ASM007', 'title': 'Assignment 7', 'dueDate': datetime.datetime(2025, 10, 9, 3, 51, 48, 504000)}
{'assignmentId': 'ASM010', 'title': 'Assignment 10', 'dueDate': datetime.datetime(2025, 10, 8, 3, 51, 48, 504000)}


## AGGREGATION PIPELINE

### COURSE ENROLLMENT STATISTICS

### a) Total Enrollment Per Course

In [7]:
# Counting total enrollments per course
try:
    enrollments_per_course = db.enrollments.aggregate([
        {
            "$group": {
                "_id": "$courseId",
                "totalEnrollments": {"$sum": 1}
            }
        },
        {"$sort": {"totalEnrollments": -1}}  # sort by most enrollments
    ])

    print("Total enrollments per course:")
    for item in enrollments_per_course:
        print(item)

except Exception as e:
    print(f"Error fetching enrollments per course: {e}")

Total enrollments per course:
{'_id': 'CRS002', 'totalEnrollments': 6}
{'_id': 'CRS006', 'totalEnrollments': 2}
{'_id': 'CRS004', 'totalEnrollments': 2}
{'_id': 'CRS007', 'totalEnrollments': 2}
{'_id': 'CRS008', 'totalEnrollments': 1}
{'_id': 'CRS009', 'totalEnrollments': 1}
{'_id': 'CRS003', 'totalEnrollments': 1}


### b) Average Course Rating

In [8]:
#  Calculating average course rating
try:
    avg_course_rating = db.submissions.aggregate([
        {
            "$group": {
                "_id": "$courseId",
                "averageRating": {"$avg": "$grade"}
            }
        },
        {"$sort": {"averageRating": -1}}  # sort by highest average rating
    ])

    print("Average course rating:")
    for item in avg_course_rating:
        print(item)

except Exception as e:
    print(f"Error fetching average course rating: {e}")

Average course rating:
{'_id': 'CRS004', 'averageRating': 100.0}
{'_id': 'CRS005', 'averageRating': 98.0}
{'_id': 'CRS008', 'averageRating': 92.0}
{'_id': 'CRS002', 'averageRating': 76.0}
{'_id': 'CRS007', 'averageRating': 67.66666666666667}
{'_id': 'CRS001', 'averageRating': 60.0}
{'_id': 'CRS006', 'averageRating': None}


### c)  Group by Course Category 

In [9]:
#  Group by course category (average rating per category)
try:
    avg_rating_by_category = db.courses.aggregate([
        {
            "$lookup": {
                "from": "submissions",
                "localField": "courseId",
                "foreignField": "courseId",
                "as": "grades"
            }
        },
        {
            "$unwind": "$grades"
        },
        {
            "$group": {
                "_id": "$category",
                "averageRating": {"$avg": "$grades.grade"},
                "totalCourses": {"$sum": 1}
            }
        }
    ])
    print("Average rating by category:")
    for item in avg_rating_by_category:
        print(item)
except Exception as e:
    print(f"Error fetching rating by category: {e}")

Average rating by category:
{'_id': 'Cloud Computing', 'averageRating': 79.0, 'totalCourses': 3}
{'_id': 'Data Science', 'averageRating': 69.75, 'totalCourses': 7}
{'_id': 'Web Development', 'averageRating': 96.0, 'totalCourses': 2}


### STUDENT PERFORMANCE ANALYSIS

### a) Average Grade per Student

In [10]:
# Calculating Average grade per student
try:
    avg_grade_per_student = db.submissions.aggregate([
        {
            "$group": {
                "_id": "$studentId",
                "averageGrade": {"$avg": "$grade"}
            }
        },
        {"$sort": {"averageGrade": -1}}  # sort by highest average grade
    ])

    print("Average grade per student:")
    for item in avg_grade_per_student:
        print(item)

except Exception as e:
    print(f"Error fetching average grade per student: {e}")

Average grade per student:
{'_id': 'STU004', 'averageGrade': 98.0}
{'_id': 'STU003', 'averageGrade': 92.0}
{'_id': 'STU011', 'averageGrade': 82.0}
{'_id': 'STU009', 'averageGrade': 80.0}
{'_id': 'STU010', 'averageGrade': 76.0}
{'_id': 'STU012', 'averageGrade': 60.0}
{'_id': 'STU005', 'averageGrade': 59.0}
{'_id': 'STU014', 'averageGrade': None}
{'_id': 'STU013', 'averageGrade': None}


### b) Completion Rate by Course

In [11]:
# Calculating Completion Rate by course
try:
    completion_rate = db.submissions.aggregate([
        {
            "$group": {
                "_id": "$courseId",
                "completed": {
                    "$sum": {"$cond": [{"$eq": ["$status", "graded"]}, 1, 0]}
                },
                "totalSubmissions": {"$sum": 1}
            }
        },
        {
            "$project": {
                "_id": 1,  # keep courseId
                "completionRate": {
                    "$multiply": [
                        {"$divide": ["$completed", "$totalSubmissions"]},
                        100
                    ]
                }
            }
        },
        {"$sort": {"completionRate": -1}}  # optional: sort by highest rate
    ])

    print("Completion rate by course:")
    for item in completion_rate:
        print(item)

except Exception as e:
    print(f"Error fetching completion rate: {e}")

Completion rate by course:
{'_id': 'CRS004', 'completionRate': 100.0}
{'_id': 'CRS007', 'completionRate': 100.0}
{'_id': 'CRS008', 'completionRate': 100.0}
{'_id': 'CRS005', 'completionRate': 100.0}
{'_id': 'CRS001', 'completionRate': 100.0}
{'_id': 'CRS002', 'completionRate': 25.0}
{'_id': 'CRS006', 'completionRate': 0.0}


### c) Top-performing Students

In [12]:
# Calculating Top performing students
try:
    top_students = db.submissions.aggregate([
        {
            "$group": {
                "_id": "$studentId",
                "averageGrade": {"$avg": "$grade"}
            }
        },
        {"$sort": {"averageGrade": -1}},
        {"$limit": 5}
    ])
    print("Top-performing students:")
    for item in top_students:
        print(item)
except Exception as e:
    print(f"Error fetching top students: {e}")

Top-performing students:
{'_id': 'STU004', 'averageGrade': 98.0}
{'_id': 'STU003', 'averageGrade': 92.0}
{'_id': 'STU011', 'averageGrade': 82.0}
{'_id': 'STU009', 'averageGrade': 80.0}
{'_id': 'STU010', 'averageGrade': 76.0}


### INSTRUCTOR ANALYTICS

### a) Total Students Taught by each Instructor

In [13]:
# Calculating Total Student taught by each instructor
try:
    students_per_instructor = db.courses.aggregate([
        {
            "$lookup": {
                "from": "enrollments",
                "localField": "courseId",
                "foreignField": "courseId",
                "as": "enrolled"
            }
        },
        {
            "$project": {
                "instructorId": 1,
                "studentCount": {"$size": "$enrolled"}  
            }
        },
        {
            "$group": {
                "_id": "$instructorId",
                "totalStudents": {"$sum": "$studentCount"}  
            }
        },
        {
            "$sort": {"totalStudents": -1}  
        }
    ])

    print("Total students per instructor:")
    for item in students_per_instructor:
        print(item)

except Exception as e:
    print(f"Error fetching students per instructor: {e}")

Total students per instructor:
{'_id': 'INST002', 'totalStudents': 8}
{'_id': 'INST003', 'totalStudents': 4}
{'_id': 'INST005', 'totalStudents': 2}
{'_id': 'INST001', 'totalStudents': 0}


### b) Average Course Rating per Instructor

In [14]:
# Calculating Average Course rating  per Instructor
try:
    avg_rating_per_instructor = db.courses.aggregate([
        {
            "$lookup": {
                "from": "submissions",
                "localField": "courseId",
                "foreignField": "courseId",
                "as": "grades"
            }
        },
        {"$unwind": "$grades"},
        {
            "$group": {
                "_id": "$instructorId",
                "averageRating": {"$avg": "$grades.grade"}
            }
        },
        {
            "$sort": {"averageRating": -1}   # Sort by average rating descending
        }
    ])

    print("Average course rating per instructor:")
    for item in avg_rating_per_instructor:
        print(item)

except Exception as e:
    print(f"Error fetching average rating per instructor: {e}")

Average course rating per instructor:
{'_id': 'INST005', 'averageRating': 99.0}
{'_id': 'INST003', 'averageRating': 92.0}
{'_id': 'INST002', 'averageRating': 69.75}
{'_id': 'INST001', 'averageRating': 60.0}


### c) Revenue Generated per Instructor

In [15]:
# Total Revenue generated per Instrictor
try:
    revenue_per_instructor = db.courses.aggregate([
        {
            "$lookup": {
                "from": "enrollments",
                "localField": "courseId",
                "foreignField": "courseId",
                "as": "enrolled"
            }
        },
        {
            "$project": {
                "instructorId": 1,
                "revenue": {"$multiply": [{"$size": "$enrolled"}, "$price"]}
            }
        },
        {
            "$group": {
                "_id": "$instructorId",
                "totalRevenue": {"$sum": "$revenue"}
            }
        },
        {
            "$sort": {"totalRevenue": -1}   
        }
    ])

    print("Revenue per instructor (sorted):")
    for item in revenue_per_instructor:
        print(item)

except Exception as e:
    print(f"Error fetching revenue per instructor: {e}")

Revenue per instructor (sorted):
{'_id': 'INST003', 'totalRevenue': 664.1}
{'_id': 'INST002', 'totalRevenue': 216.36}
{'_id': 'INST005', 'totalRevenue': 187.34}
{'_id': 'INST001', 'totalRevenue': 0.0}


### ADVANCED ANALTICS

### a) Monthly Enrollment Trends

In [16]:
# Calculating Monthly trends
try:
    monthly_trends = db.enrollments.aggregate([
        {
            "$group": {
                "_id": {"year": {"$year": "$enrolledAt"}, "month": {"$month": "$enrolledAt"}},
                "totalEnrollments": {"$sum": 1}
            }
        },
        {"$sort": {"_id.year": 1, "_id.month": 1}}
    ])
    print("Monthly enrollment trends:")
    for item in monthly_trends:
        print(item)
except Exception as e:
    print(f"Error fetching monthly enrollment trends: {e}")

Monthly enrollment trends:
{'_id': {'year': 2024, 'month': 10}, 'totalEnrollments': 1}
{'_id': {'year': 2024, 'month': 12}, 'totalEnrollments': 1}
{'_id': {'year': 2025, 'month': 1}, 'totalEnrollments': 3}
{'_id': {'year': 2025, 'month': 2}, 'totalEnrollments': 1}
{'_id': {'year': 2025, 'month': 3}, 'totalEnrollments': 2}
{'_id': {'year': 2025, 'month': 4}, 'totalEnrollments': 1}
{'_id': {'year': 2025, 'month': 5}, 'totalEnrollments': 1}
{'_id': {'year': 2025, 'month': 7}, 'totalEnrollments': 3}
{'_id': {'year': 2025, 'month': 9}, 'totalEnrollments': 1}
{'_id': {'year': 2025, 'month': 10}, 'totalEnrollments': 1}


### b) Most Popular Course Categories

In [17]:
# Calculating Most Popular Categories
try:
    popular_categories = db.enrollments.aggregate([
        {
            "$lookup": {
                "from": "courses",
                "localField": "courseId",
                "foreignField": "courseId",
                "as": "course"
            }
        },
        {"$unwind": "$course"},
        {
            "$group": {
                "_id": "$course.category",
                "totalEnrollments": {"$sum": 1}
            }
        },
        {"$sort": {"totalEnrollments": -1}}
    ])
    print("Most popular course categories:")
    for item in popular_categories:
        print(item)
except Exception as e:
    print(f"Error fetching popular course categories: {e}")

Most popular course categories:
{'_id': 'Data Science', 'totalEnrollments': 8}
{'_id': 'Web Development', 'totalEnrollments': 4}
{'_id': 'Cloud Computing', 'totalEnrollments': 2}


### c) Student Engagement Metrics

In [18]:
# Student Engagement Metrics
try:
    engagement_metrics = db.submissions.aggregate([
        {
            "$group": {
                "_id": "$studentId",
                "totalSubmissions": {"$sum": 1}
            }
        },
        {"$sort": {"totalSubmissions": -1}}
    ])
    print("Student engagement metrics:")
    for item in engagement_metrics:
        print(item)
except Exception as e:
    print(f"Error fetching student engagement metrics: {e}")

Student engagement metrics:
{'_id': 'STU009', 'totalSubmissions': 2}
{'_id': 'STU011', 'totalSubmissions': 2}
{'_id': 'STU003', 'totalSubmissions': 2}
{'_id': 'STU005', 'totalSubmissions': 1}
{'_id': 'STU004', 'totalSubmissions': 1}
{'_id': 'STU014', 'totalSubmissions': 1}
{'_id': 'STU012', 'totalSubmissions': 1}
{'_id': 'STU013', 'totalSubmissions': 1}
{'_id': 'STU010', 'totalSubmissions': 1}
