In [1]:
import pandas as pd
from datetime import datetime, timedelta
from pymongo import MongoClient

# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client['eduhub_db']


### Defining a Function for Course Content Search

### Creating the Text Index for the Search Course Content 

In [None]:
# Creating the Text Index for the Search Course Content
db.courses.create_index([
    ("title", "text"),
    ("description", "text"),
    ("category", "text"),
    ("tags", "text"),
    ("level", "text")
])

print("Text index created successfully on title, description, category, tags, and level.")

Text index created successfully on title, description, category, tags, and level.


### Function for Course Content Search 

In [34]:
# Defining Function for a Course Content search
def search_courses(search_term):

    try:
        # Search across all text-indexed fields
        results = list(db.courses.find({"$text": {"$search": search_term}}))
        print(f"Found {len(results)} courses matching '{search_term}'")
        return results
    except Exception as e:
        print("Error searching courses:", e)
        return []


In [None]:
# Example usage: get matching courses
matching_courses = search_courses("Python")

# Convert to DataFrame
df_matching_courses = pd.DataFrame(matching_courses)

# Join tags into a single string for display
df_matching_courses['tags'] = df_matching_courses['tags'].apply(lambda x: ", ".join(x) if x else "No tags")

# Reorder columns
df_matching_courses = df_matching_courses[['title', 'description', 'category', 'level', 'tags']]

# Display the DataFrame
df_matching_courses

Found 3 courses matching 'Python'


Unnamed: 0,title,description,category,level,tags
0,Introduction to Data Science,"Master Python libraries such as Pandas, NumPy,...",Data Science,beginner,"data, python, analytics"
1,DevOps Fundamentals,Understand cloud architecture and work hands-o...,Cloud Computing,advanced,"cloud, aws, infrastructure, AI, Data Science, ..."
2,Python for Data Analysis,"Learn the fundamentals of data science, from c...",Data Science,advanced,"machinelearning, ai, statistics"


### Function for Recommendation System

In [28]:
def top_courses_recommendation(limit=5):
    
    try:
        # Aggregate enrollments to find top courses
        pipeline = [
            {"$group": {"_id": "$courseId", "totalEnrollments": {"$sum": 1}}},
            {"$sort": {"totalEnrollments": -1}},
            {"$limit": limit}
        ]
        recommended_courses = db.enrollments.aggregate(pipeline)

        results = []
        for course in recommended_courses:
            course_info = db.courses.find_one({"courseId": course["_id"]})
            if course_info:
                results.append({
                    "Title": course_info.get("title", "No title"),
                    "Category": course_info.get("category", "No category"),
                    "Level": course_info.get("level", "No level"),
                    "Tags": course_info.get("tags", []),
                    "TotalEnrollments": course["totalEnrollments"]
                })

        return results

    except Exception as e:
        print("Error generating recommendations:", e)
        return []


### Example of Recommendation System

In [None]:
# Get top courses
top_courses = top_courses_recommendation(limit=5)

# Convert to DataFrame
df_top_courses = pd.DataFrame(top_courses)

# If you want, join tags into a single string for display
df_top_courses['Tags'] = df_top_courses['Tags'].apply(lambda x: ", ".join(x) if x else "No tags")

#Display only selected columns in a specific order
df_top_courses = df_top_courses[['Title', 'Category', 'Level', 'Tags', 'TotalEnrollments']]
df_top_courses

Unnamed: 0,Title,Category,Level,Tags,TotalEnrollments
0,Introduction to Data Science,Data Science,beginner,"data, python, analytics",6
1,Serverless Applications on Cloud,Cloud Computing,beginner,"devops, automation, ci/cd",2
2,Full-Stack Web Development,Web Development,intermediate,"react, javascript, frontend",2
3,Python for Data Analysis,Data Science,advanced,"machinelearning, ai, statistics",2
4,Frontend Development with React,Web Development,beginner,"nodejs, express, backend",1


### Function for Data Archiving Strategy for Old Enrollments

In [None]:
# Defining function for Archiving Old Enrollment
def archive_old_enrollments(days_old=365):
    
    archive_date = datetime.now() - timedelta(days=days_old)
    
    try:
        # Find old enrollments
        old_enrollments = list(db.enrollments.find({"enrolledAt": {"$lt": archive_date}}))

        if not old_enrollments:
            print("No enrollments to archive.")
            return 0

        # Insert into archive collection
        db.enrollments_archive.insert_many(old_enrollments)

        # Delete from main collection
        db.enrollments.delete_many({"enrolledAt": {"$lt": archive_date}})

        print(f"Archived {len(old_enrollments)} enrollments older than {days_old} days.")
        return len(old_enrollments)
    
    except Exception as e:
        print("Error archiving enrollments:", e)
        return 0

# Example usage
archived_count = archive_old_enrollments(days_old=365)

No enrollments to archive.
