# ADVANCED QUERIES AND AGGREGATION

In [5]:
import pandas as pd
from datetime import datetime, timedelta
from pymongo import MongoClient

# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client['eduhub_db']


In [6]:
print("Collections in eduhub:")
print(db.list_collection_names())

Collections in eduhub:
['enrollments', 'assignments', 'submissions', 'users', 'lessons', 'courses']


## COMPLEX QUERIES

### a) Defining Function to get Courses with Price between $50 & $200

In [39]:
# Finding Courses with price btw $50 & &200
def get_courses_by_price(db, min_price, max_price):

    try:
        # Query courses in the price range
        courses_in_price_range = db.courses.find(
            {"price": {"$gte": min_price, "$lte": max_price}},
            {"_id": 0, "courseId": 1, "title": 1, "price": 1}
        )

        courses_list = list(courses_in_price_range)

        # Convert list to DataFrame
        df_courses_price = pd.DataFrame(courses_list)

        if not df_courses_price.empty:
            df_courses_price = df_courses_price[['courseId', 'title', 'price']]
            df_courses_price = df_courses_price.sort_values(by='price', ascending=False).reset_index(drop=True)
            
            print(f"Courses priced between ${min_price} and ${max_price}:\n")
            return df_courses_price
        else:
            print("No courses found in the specified price range.")
            return pd.DataFrame()

    except Exception as e:
        print(f"An error occurred while fetching courses by price: {e}")
        return pd.DataFrame()
    

# Example 
min_price = 50
max_price = 200

get_courses_by_price(db, min_price, max_price)



Courses priced between $50 and $200:



Unnamed: 0,courseId,title,price
0,CRS003,Frontend Development with React,197.09
1,CRS008,Introduction to Data Science,172.47
2,CRS002,Introduction to Data Science,170.17
3,CRS007,Serverless Applications on Cloud,170.09
4,CRS004,Serverless Applications on Cloud,150.43
5,CRS001,DevOps Fundamentals,124.38
6,CRS006,Introduction to Data Science,82.44
7,CRS005,Python for Data Analysis,62.27


### b) Users Who Joined In the Last 6 Month

In [40]:
# Finding users who join in thelast 6 month 
def get_recent_users(db, months=6):
    try:
        # Calculate the cutoff date
        cutoff_date = datetime.now() - timedelta(days=months*30)  # approximate month as 30 days

        # Query recent users
        recent_users = db.users.find(
            {"dateJoined": {"$gte": cutoff_date}},
            {"_id": 0, "userId": 1, "firstName": 1, "lastName": 1, "dateJoined": 1}
        )

        recent_users_list = list(recent_users)

        # Convert to DataFrame
        df_recent_users = pd.DataFrame(recent_users_list)

        if not df_recent_users.empty:
            df_recent_users = df_recent_users[['userId', 'firstName', 'lastName', 'dateJoined']]
            df_recent_users = df_recent_users.sort_values(by='dateJoined', ascending=False).reset_index(drop=True)
            print(f"Users who joined in the last {months} months:\n")
            return df_recent_users
        else:
            print(f"No users joined in the last {months} months.")
            return pd.DataFrame()

    except Exception as e:
        print(f"An error occurred while fetching recent users: {e}")
        return pd.DataFrame()
    
get_recent_users(db, months=6)

Users who joined in the last 6 months:



Unnamed: 0,userId,firstName,lastName,dateJoined
0,STU006,Crystal,Hudson,2025-09-04 18:08:16.949
1,STU005,Deborah,Christian,2025-08-08 01:22:12.842
2,STU001,Thomas,Hammond,2025-07-25 06:31:40.089
3,STU004,Ian,Contreras,2025-06-06 17:54:33.497
4,STU013,Matthew,Bryant,2025-05-27 03:56:35.943
5,STU008,Kenneth,Juarez,2025-05-15 02:29:46.152
6,STU012,Kelly,Marquez,2025-05-13 04:17:42.166
7,STU007,Brian,Hudson,2025-04-10 11:03:58.498


### c) Courses With Specific Tags

In [41]:
# Finding Courses that Have Specific Tags using $in Operator
def get_courses_by_tags(db, tags_list):
    try:
        # Fetch courses with the specified tags
        courses_with_tags = db.courses.find(
            {"tags": {"$in": tags_list}},
            {"_id": 0, "courseId": 1, "title": 1, "tags": 1}
        )

        courses_list = list(courses_with_tags)

        # Convert list to DataFrame
        df_courses_tags = pd.DataFrame(courses_list)

        if not df_courses_tags.empty:
            df_courses_tags = df_courses_tags[['courseId', 'title', 'tags']]
            print(f"Courses with tags {tags_list}:\n")
            return df_courses_tags
        else:
            print(f"No courses found with the specified tags: {tags_list}")
            return pd.DataFrame()

    except Exception as e:
        print(f"An error occurred while fetching courses with tags: {e}")
        return pd.DataFrame()

# Use Case    
tags_to_search = ["machinelearning ", "satistics", "ai"]
df_courses = get_courses_by_tags(db, tags_to_search)
df_courses

Courses with tags ['machinelearning ', 'satistics', 'ai']:



Unnamed: 0,courseId,title,tags
0,CRS005,Python for Data Analysis,"[machinelearning, ai, statistics]"
1,CRS006,Introduction to Data Science,"[machinelearning, ai, statistics]"


### d) Assignments Due Next Week

In [16]:
# Finding Assignmenst due next week
def get_upcoming_assignments(db, days_ahead=7):
    
    try:
        now = datetime.now()
        end_date = now + timedelta(days=days_ahead)

        # Query upcoming assignments
        upcoming_assignments = db.assignments.find(
            {"dueDate": {"$gte": now, "$lte": end_date}},
            {"_id": 0, "assignmentId": 1, "title": 1, "dueDate": 1}
        )

        assignments_list = list(upcoming_assignments)

        # Convert to DataFrame
        df_upcoming_assignments = pd.DataFrame(assignments_list)

        if not df_upcoming_assignments.empty:
            df_upcoming_assignments = df_upcoming_assignments[['assignmentId', 'title', 'dueDate']]
            df_upcoming_assignments = df_upcoming_assignments.sort_values(by='dueDate', ascending=True).reset_index(drop=True)
            print(f"Assignments due in the next {days_ahead} days:")
            print(df_upcoming_assignments)
            return df_upcoming_assignments
        else:
            print(f"No assignments are due in the next {days_ahead} days.")
            return pd.DataFrame()

    except Exception as e:
        print(f"An error occurred while fetching upcoming assignments: {e}")
        return pd.DataFrame()
    

# Use Case
df_assignments = get_upcoming_assignments(db, days_ahead=7)
df_assignments

No assignments are due in the next 7 days.


## AGGREGATION PIPELINE

### COURSE ENROLLMENT STATISTICS

### a) Total Enrollment Per Course

In [18]:
# Counting total enrollments per course
def get_total_enrollments_per_course(db):
    try:
        enrollments_per_course = db.enrollments.aggregate([
            {
                "$group": {
                    "_id": "$courseId",
                    "TotalEnrollments": {"$sum": 1}
                }
            },
            {
                "$lookup": {
                    "from": "courses",
                    "localField": "_id",
                    "foreignField": "courseId",
                    "as": "course"
                }
            },
            {"$unwind": "$course"},
            {
                "$project": {
                    "_id": 0,
                    "CourseId": "$_id",
                    "Title": "$course.title",
                    "TotalEnrollments": 1
                }
            },
            {"$sort": {"TotalEnrollments": -1}}
        ])

        # Convert aggregation result to DataFrame
        df_enrollments = pd.DataFrame(list(enrollments_per_course))
        if not df_enrollments.empty:
            df_enrollments = df_enrollments[["CourseId", "Title", "TotalEnrollments"]]
            print("Total enrollments per course:\n")
            
            return df_enrollments
        else:
            print("No enrollment data found.")
            return pd.DataFrame()

    except Exception as e:
        print(f"Error fetching enrollments per course: {e}")
        return pd.DataFrame()
    
#Use Case 
get_total_enrollments_per_course(db)


Total enrollments per course:



Unnamed: 0,CourseId,Title,TotalEnrollments
0,CRS005,Python for Data Analysis,3
1,CRS004,Serverless Applications on Cloud,3
2,CRS006,Introduction to Data Science,3
3,CRS001,DevOps Fundamentals,2
4,CRS003,Frontend Development with React,1
5,CRS007,Serverless Applications on Cloud,1
6,CRS008,Introduction to Data Science,1
7,CRS002,Introduction to Data Science,1


### b) Average Course Rating

In [21]:
#  Calculating average course rating
def get_average_course_ratings(db):
    # Aggregate average rating per course and return a sorted DataFrame
    try:
        avg_course_rating = db.submissions.aggregate([
            {
                "$group": {
                    "_id": "$courseId",
                    "AverageRating": {"$avg": "$grade"}
                }
            },
            {
                "$lookup": {
                    "from": "courses",
                    "localField": "_id",
                    "foreignField": "courseId",
                    "as": "course"
                }
            },
            {"$unwind": "$course"},
            {
                "$project": {
                    "_id": 0,
                    "CourseId": "$_id",
                    "Title": "$course.title",
                    "AverageRating": {"$round": ["$AverageRating", 2]}
                }
            },
            {"$sort": {"AverageRating": -1}}
        ])

        df_avg_ratings = pd.DataFrame(list(avg_course_rating))
        if not df_avg_ratings.empty:          
            print("Average course rating per course:\n")
            return df_avg_ratings
        else:
            print("No rating data found.")
            return pd.DataFrame()

    except Exception as e:
        print(f"Error fetching average course rating: {e}")
        return pd.DataFrame()
    
#Use Case 
get_average_course_ratings(db)

Average course rating per course:



Unnamed: 0,CourseId,Title,AverageRating
0,CRS001,DevOps Fundamentals,92.0
1,CRS002,Introduction to Data Science,74.0
2,CRS008,Introduction to Data Science,65.5
3,CRS007,Serverless Applications on Cloud,64.0
4,CRS004,Serverless Applications on Cloud,62.0
5,CRS003,Frontend Development with React,


### c)  Group by Course Category 

In [22]:
#  Group by course category (average rating per category)
def get_average_rating_by_category(db):
    # Aggregate average course rating per category and total courses
    try:
        avg_rating_by_category = db.courses.aggregate([
            {
                "$lookup": {
                    "from": "submissions",
                    "localField": "courseId",
                    "foreignField": "courseId",
                    "as": "grades"
                }
            },
            {"$unwind": "$grades"},
            {
                "$group": {
                    "_id": "$category",
                    "AverageRating": {"$avg": "$grades.grade"},
                    "TotalCourses": {"$sum": 1}
                }
            },
            {
                "$project": {
                    "_id": 0,
                    "Category": "$_id",           
                    "AverageRating": {"$round": ["$AverageRating", 2]},
                    "TotalCourses": 1
                }
            },
            {"$sort": {"AverageRating": -1}}
        ])

        df_avg_rating_category = pd.DataFrame(list(avg_rating_by_category))
        if not df_avg_rating_category.empty:
            df_avg_rating_category = df_avg_rating_category[["Category", "TotalCourses", "AverageRating"]]
            print("Average rating by category:\n")
            return df_avg_rating_category
        else:
            print("No rating data found by category.")
            return pd.DataFrame()

    except Exception as e:
        print(f"Error fetching rating by category: {e}")
        return pd.DataFrame()

#Use Case   
get_average_rating_by_category(db)

Average rating by category:



Unnamed: 0,Category,TotalCourses,AverageRating
0,Cloud Computing,6,70.5
1,Data Science,5,69.75
2,Web Development,1,


### STUDENT PERFORMANCE ANALYSIS

### a) Average Grade per Student

In [24]:
# Calculating Average grade per student
def get_average_grade_per_student(db):
    # Aggregate average grade per student and return a sorted DataFrame
    try:
        avg_grade_per_student = db.submissions.aggregate([
            {
                "$group": {
                    "_id": "$studentId",
                    "averageGrade": {"$avg": "$grade"}
                }
            },
            {"$sort": {"averageGrade": -1}},
            {
                "$lookup": {
                    "from": "users",
                    "localField": "_id",
                    "foreignField": "userId",
                    "as": "student"
                }
            },
            {"$unwind": "$student"},
            {
                "$project": {
                    "_id": 0,
                    "FirstName": "$student.firstName",
                    "LastName": "$student.lastName",
                    "Email": "$student.email",
                    "AverageGrade": {"$round": ["$averageGrade", 2]}
                }
            }
        ])

        df_avg_grade_student = pd.DataFrame(list(avg_grade_per_student))
        if not df_avg_grade_student.empty:
            print("Average grade per student:\n")
            return df_avg_grade_student
        else:
            print("No grade data found for students.")
            return pd.DataFrame()

    except Exception as e:
        print(f"Error fetching average grade per student: {e}")
        return pd.DataFrame()
    
# Use Case 
get_average_grade_per_student(db)

Average grade per student:



Unnamed: 0,FirstName,LastName,Email,AverageGrade
0,Thomas,Hammond,floresmichael@example.net,86.5
1,Jennifer,Hoffman,ddowns@example.com,67.0
2,Crystal,Hudson,mlawrence@example.org,65.5
3,Deborah,Christian,susan32@example.com,64.0
4,Brian,Hudson,rodriguezmichael@example.com,62.0
5,Matthew,Bryant,tara39@example.net,
6,Kenneth,Juarez,kyle36@example.com,


### b) Completion Rate by Course

In [26]:
# Calculating Completion Rate by course
def get_completion_rate_per_course(db):
    # Aggregate completion rate per course and return a sorted DataFrame
    try:
        completion_rate = db.submissions.aggregate([
            {
                "$group": {
                    "_id": "$courseId",
                    "completed": {
                        "$sum": {"$cond": [{"$eq": ["$status", "graded"]}, 1, 0]}
                    },
                    "totalSubmissions": {"$sum": 1}
                }
            },
            {
                "$project": {
                    "_id": 1,
                    "completionRate": {
                        "$multiply": [
                            {"$divide": ["$completed", "$totalSubmissions"]},
                            100
                        ]
                    }
                }
            },
            {
                "$lookup": {
                    "from": "courses",
                    "localField": "_id",
                    "foreignField": "courseId",
                    "as": "course"
                }
            },
            {"$unwind": "$course"},
            {
                "$project": {
                    "_id": 0,
                    "CourseId": "$_id",
                    "Title": "$course.title",
                    "CompletionRate": {"$round": ["$completionRate", 2]}
                }
            },
            {"$sort": {"CompletionRate": -1}}
        ])

        df_completion = pd.DataFrame(list(completion_rate))
        if not df_completion.empty:
            print("Completion rate by course:\n")
            return df_completion
        else:
            print("No submissions found for completion rate.")
            return pd.DataFrame()

    except Exception as e:
        print(f"Error fetching completion rate: {e}")
        return pd.DataFrame()
    
# Use Case 
get_completion_rate_per_course(db)

Completion rate by course:



Unnamed: 0,CourseId,Title,CompletionRate
0,CRS008,Introduction to Data Science,100.0
1,CRS004,Serverless Applications on Cloud,100.0
2,CRS002,Introduction to Data Science,66.67
3,CRS007,Serverless Applications on Cloud,66.67
4,CRS001,DevOps Fundamentals,50.0
5,CRS003,Frontend Development with React,0.0


### c) Top-performing Students

In [29]:
# Calculating Top performing students
def get_top_students(db, top_n=5):
    # Aggregate top N students by average grade and return a DataFrame
    try:
        top_students = db.submissions.aggregate([
            {
                "$group": {
                    "_id": "$studentId",
                    "averageGrade": {"$avg": "$grade"}
                }
            },
            {"$sort": {"averageGrade": -1}},
            {"$limit": top_n},
            {
                "$lookup": {
                    "from": "users",
                    "localField": "_id",
                    "foreignField": "userId",
                    "as": "student"
                }
            },
            {"$unwind": "$student"},
            {
                "$project": {
                    "_id": 0,
                    "FirstName": "$student.firstName",
                    "LastName": "$student.lastName",
                    "Email": "$student.email",
                    "AverageGrade": {"$round": ["$averageGrade", 2]}
                }
            }
        ])

        df_top_students = pd.DataFrame(list(top_students))
        if not df_top_students.empty:
            print("Top Performing Students:\n")
            return df_top_students
        else:
            print("No student grade data found.")
            return pd.DataFrame()

    except Exception as e:
        print(f"Error fetching top students: {e}")
        return pd.DataFrame()
    
# Use Case 
get_top_students(db, top_n=5)

Top Performing Students:



Unnamed: 0,FirstName,LastName,Email,AverageGrade
0,Thomas,Hammond,floresmichael@example.net,86.5
1,Jennifer,Hoffman,ddowns@example.com,67.0
2,Crystal,Hudson,mlawrence@example.org,65.5
3,Deborah,Christian,susan32@example.com,64.0
4,Brian,Hudson,rodriguezmichael@example.com,62.0


### INSTRUCTOR ANALYTICS

### a) Total Students Taught by each Instructor

In [32]:
# Calculating Total Student taught by each instructor
def get_students_per_instructor(db):
    # Aggregate total students taught by each instructor and return a DataFrame
    try:
        students_per_instructor = db.courses.aggregate([
            {
                "$lookup": {
                    "from": "enrollments",
                    "localField": "courseId",
                    "foreignField": "courseId",
                    "as": "enrolled"
                }
            },
            {
                "$project": {
                    "instructorId": 1,
                    "studentCount": {"$size": "$enrolled"}  
                }
            },
            {
                "$group": {
                    "_id": "$instructorId",
                    "totalStudents": {"$sum": "$studentCount"}  
                }
            },
            {
                "$lookup": {
                    "from": "users",
                    "localField": "_id",
                    "foreignField": "userId",
                    "as": "instructor"
                }
            },
            {"$unwind": "$instructor"},
            {
                "$project": {
                    "_id": 0,
                    "FirstName": "$instructor.firstName",
                    "LastName": "$instructor.lastName",
                    "Email": "$instructor.email",
                    "TotalStudents": "$totalStudents"
                }
            },
            {"$sort": {"TotalStudents": -1}}
        ])

        df_students_per_instructor = pd.DataFrame(list(students_per_instructor))
        if not df_students_per_instructor.empty:
            print("Total students taught by each instructor:\n")
            return df_students_per_instructor
        else:
            print("No data found for students per instructor.")
            return pd.DataFrame()

    except Exception as e:
        print(f"Error fetching students per instructor: {e}")
        return pd.DataFrame()
    
# Use Case 
get_students_per_instructor(db)

Total students taught by each instructor:



Unnamed: 0,FirstName,LastName,Email,TotalStudents
0,Mallory,Henson,susanford@example.org,7
1,Matthew,Palmer,abishop@example.com,4
2,Kristen,Mclean,xcarpenter@example.net,4


### b) Average Course Rating per Instructor

In [33]:
# Calculating Average Course rating  per Instructor
def get_avg_rating_per_instructor(db):
    # Aggregate average course rating per instructor and return a DataFrame
    try:
        avg_rating_per_instructor = db.courses.aggregate([
            {
                "$lookup": {
                    "from": "submissions",
                    "localField": "courseId",
                    "foreignField": "courseId",
                    "as": "grades"
                }
            },
            {"$unwind": "$grades"},
            {
                "$group": {
                    "_id": "$instructorId",
                    "averageRating": {"$avg": "$grades.grade"}
                }
            },
            {
                "$lookup": {
                    "from": "users",
                    "localField": "_id",
                    "foreignField": "userId",
                    "as": "instructor"
                }
            },
            {"$unwind": "$instructor"},
            {
                "$project": {
                    "_id": 0,
                    "InstructorID": "$_id",
                    "FirstName": "$instructor.firstName",
                    "LastName": "$instructor.lastName",
                    "Email": "$instructor.email",
                    "AverageRating": {"$round": ["$averageRating", 2]}
                }
            },
            {"$sort": {"AverageRating": -1}}
        ])

        df_avg_rating = pd.DataFrame(list(avg_rating_per_instructor))
        if not df_avg_rating.empty:
            print("Average course rating per instructor:\n")
            return df_avg_rating
        else:
            print("No data found for average rating per instructor.")
            return pd.DataFrame()

    except Exception as e:
        print(f"Error fetching average rating per instructor: {e}")
        return pd.DataFrame()
    
#Use Case 
get_avg_rating_per_instructor(db)

Average course rating per instructor:



Unnamed: 0,InstructorID,FirstName,LastName,Email,AverageRating
0,INST005,Matthew,Palmer,abishop@example.com,73.6
1,INST002,Mallory,Henson,susanford@example.org,65.5
2,INST004,Kristen,Mclean,xcarpenter@example.net,62.0


### c) Revenue Generated per Instructor

In [34]:
# Total Revenue generated per Instrictor
def get_revenue_per_instructor(db):
    # Aggregate total revenue per instructor and return a DataFrame
    try:
        revenue_per_instructor = db.courses.aggregate([
            {
                "$lookup": {
                    "from": "enrollments",
                    "localField": "courseId",
                    "foreignField": "courseId",
                    "as": "enrolled"
                }
            },
            {
                "$project": {
                    "instructorId": 1,
                    "revenue": {"$multiply": [{"$size": "$enrolled"}, "$price"]}
                }
            },
            {
                "$group": {
                    "_id": "$instructorId",
                    "totalRevenue": {"$sum": "$revenue"}
                }
            },
            {
                "$lookup": {
                    "from": "users",
                    "localField": "_id",
                    "foreignField": "userId",
                    "as": "instructor"
                }
            },
            {"$unwind": "$instructor"},
            {
                "$project": {
                    "_id": 0,
                    "FirstName": "$instructor.firstName",
                    "LastName": "$instructor.lastName",
                    "Email": "$instructor.email",
                    "TotalRevenue": {"$round": ["$totalRevenue", 2]}
                }
            },
            {"$sort": {"TotalRevenue": -1}}
        ])

        df_revenue = pd.DataFrame(list(revenue_per_instructor))
        if not df_revenue.empty:
            print("Revenue per Instructor:\n")
            return df_revenue
        else:
            print("No revenue data found.")
            return pd.DataFrame()

    except Exception as e:
        print(f"Error fetching revenue per instructor: {e}")
        return pd.DataFrame()
    
# Use Case 
get_revenue_per_instructor(db)

Revenue per Instructor:



Unnamed: 0,FirstName,LastName,Email,TotalRevenue
0,Kristen,Mclean,xcarpenter@example.net,648.38
1,Mallory,Henson,susanford@example.org,606.6
2,Matthew,Palmer,abishop@example.com,589.02


### ADVANCED ANALTICS

### a) Monthly Enrollment Trends

In [36]:
# Calculating Monthly trends
def get_monthly_enrollment_trends(db):
    # Aggregate monthly enrollment trends and return a DataFrame
    try:
        monthly_trends = db.enrollments.aggregate([
            {
                "$group": {
                    "_id": {"year": {"$year": "$enrolledAt"}, "month": {"$month": "$enrolledAt"}},
                    "totalEnrollments": {"$sum": 1}
                }
            },
            {"$sort": {"_id.year": 1, "_id.month": 1}}
        ])

        trends_list = list(monthly_trends)

        if trends_list:
            df_trends = pd.DataFrame(trends_list)
            # Flatten the _id dictionary to separate columns
            df_trends['Year'] = df_trends['_id'].apply(lambda x: x['year'])
            df_trends['Month'] = df_trends['_id'].apply(lambda x: x['month'])
            df_trends = df_trends.drop(columns=['_id'])
            df_trends = df_trends[['Year', 'Month', 'totalEnrollments']]

            print("Monthly Enrollment Trends:\n")
            return df_trends
        else:
            print("No enrollment data found.")
            return pd.DataFrame()

    except Exception as e:
        print(f"Error fetching monthly enrollment trends: {e}")
        return pd.DataFrame()
    
# Use Case 
get_monthly_enrollment_trends(db)

Monthly Enrollment Trends:



Unnamed: 0,Year,Month,totalEnrollments
0,2024,10,1
1,2024,12,2
2,2025,1,2
3,2025,2,1
4,2025,3,3
5,2025,4,1
6,2025,5,2
7,2025,7,1
8,2025,8,2


### b) Most Popular Course Categories

In [37]:
# Calculating Most Popular Categories
def get_popular_course_categories(db):
    # Aggregate total enrollments per category and return a sorted DataFrame
    try:
        popular_categories = db.enrollments.aggregate([
            {
                "$lookup": {
                    "from": "courses",
                    "localField": "courseId",
                    "foreignField": "courseId",
                    "as": "course"
                }
            },
            {"$unwind": "$course"},
            {
                "$group": {
                    "_id": "$course.category",
                    "totalEnrollments": {"$sum": 1}
                }
            },
            {"$sort": {"totalEnrollments": -1}}
        ])

        categories_list = list(popular_categories)

        if categories_list:
            df_categories = pd.DataFrame(categories_list)
            df_categories = df_categories.rename(columns={"_id": "Category", "totalEnrollments": "TotalEnrollments"})
            print("Most popular Course Categories:\n")
            return df_categories
        else:
            print("No enrollment data found.")
            return pd.DataFrame()

    except Exception as e:
        print(f"Error fetching popular course categories: {e}")
        return pd.DataFrame()
    
# Use Case 
get_popular_course_categories(db)

Most popular Course Categories:



Unnamed: 0,Category,TotalEnrollments
0,Data Science,8
1,Cloud Computing,6
2,Web Development,1


### c) Student Engagement Metrics

In [38]:
# Student Engagement Metric
def get_student_engagement_metrics(db):
    # Aggregate total submissions per student and return a sorted DataFrame
    try:
        engagement_metrics = db.submissions.aggregate([
            {
                "$group": {
                    "_id": "$studentId",
                    "totalSubmissions": {"$sum": 1}
                }
            },
            {"$sort": {"totalSubmissions": -1}}
        ])

        engagement_list = list(engagement_metrics)

        if engagement_list:
            df_engagement = pd.DataFrame(engagement_list)
            df_engagement = df_engagement.rename(columns={"_id": "StudentID", "totalSubmissions": "TotalSubmissions"})

            print("Student Engagement Metrics:\n")
            return df_engagement
        else:
            print("No submission data found.")
            return pd.DataFrame()

    except Exception as e:
        print(f"Error fetching student engagement metrics: {e}")
        return pd.DataFrame()
    
# Use Case 
get_student_engagement_metrics(db)

Student Engagement Metrics:



Unnamed: 0,StudentID,TotalSubmissions
0,STU007,2
1,STU008,2
2,STU005,2
3,STU006,2
4,STU001,2
5,STU014,1
6,STU013,1
