In [2]:
!pip install pymongo

Collecting pymongo
  Downloading pymongo-4.15.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (22 kB)
Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)
  Downloading dnspython-2.8.0-py3-none-any.whl.metadata (5.7 kB)
Downloading pymongo-4.15.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (2.0 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m MB/s[0m eta [36m0:00:01[0m
Downloading dnspython-2.8.0-py3-none-any.whl (331 kB)
Installing collected packages: dnspython, pymongo
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [pymongo]━━━[0m [32m1/2[0m [pymongo]
Successfully installed dnspython-2.8.0 pymongo-4.15.3


In [3]:
from pymongo import MongoClient

client = MongoClient("mongodb://localhost:27017/")
print("MongoDB connected successfully!")

MongoDB connected successfully!


In [4]:
from pymongo import MongoClient, InsertOne, UpdateOne, DeleteOne

# Establish client connection
client = MongoClient('mongodb://localhost:27017/')
db = client['university_db']
courses_collection = db['courses']

# Bulk insert of courses with student enrollments
operations = [
    InsertOne({'course': 'Math 101', 'enrollments': 30, 'department': 'Mathematics'}),
    InsertOne({'course': 'CS 102', 'enrollments': 25, 'department': 'Computer Science'}),
    InsertOne({'course': 'History 201', 'enrollments': 20, 'department': 'History'}),
    InsertOne({'course': 'Physics 202', 'enrollments': 15, 'department': 'Physics'})
]
courses_collection.bulk_write(operations)
print('Courses inserted successfully.')

Courses inserted successfully.


In [5]:
# ============================================================
# Use bulk_write to insert, update, and delete multiple documents simultaneously.
# ============================================================

from pymongo import MongoClient, InsertOne, UpdateOne, DeleteOne
client = MongoClient('mongodb://localhost:27017/')

db = client['university_db']
courses_collection = db['courses']
courses_collection.delete_many({})

operations = [
    InsertOne({'course': 'Math 101', 'enrollments': 30, 'department': 'Mathematics'}),
    InsertOne({'course': 'CS 102', 'enrollments': 25, 'department': 'Computer Science'}),
    InsertOne({'course': 'History 201', 'enrollments': 20, 'department': 'History'}),
    InsertOne({'course': 'Physics 202', 'enrollments': 15, 'department': 'Physics'})
]

courses_collection.bulk_write(operations)
print("Task 1: Courses inserted successfully.\n")

print(" Current Courses Data:")
for doc in courses_collection.find():
    print(doc)

# ============================================================
# Task 2: Update and delete multiple documents simultaneously
# ============================================================

bulk_ops = [
    UpdateOne({'course': 'Math 101'}, {'$set': {'enrollments': 35}}), 
    UpdateOne({'course': 'CS 102'}, {'$inc': {'enrollments': 5}}),     
    DeleteOne({'course': 'History 201'})                              
]

result = courses_collection.bulk_write(bulk_ops)

print("\n Task 2: Bulk operation completed.")
print(f"Modified: {result.modified_count}, Deleted: {result.deleted_count}")

print("\n Updated Courses Data:")
for doc in courses_collection.find():
    print(doc)


Task 1: Courses inserted successfully.

 Current Courses Data:
{'_id': ObjectId('690bf424c8677e31a8074e4a'), 'course': 'Math 101', 'enrollments': 30, 'department': 'Mathematics'}
{'_id': ObjectId('690bf424c8677e31a8074e4b'), 'course': 'CS 102', 'enrollments': 25, 'department': 'Computer Science'}
{'_id': ObjectId('690bf424c8677e31a8074e4c'), 'course': 'History 201', 'enrollments': 20, 'department': 'History'}
{'_id': ObjectId('690bf424c8677e31a8074e4d'), 'course': 'Physics 202', 'enrollments': 15, 'department': 'Physics'}

 Task 2: Bulk operation completed.
Modified: 2, Deleted: 1

 Updated Courses Data:
{'_id': ObjectId('690bf424c8677e31a8074e4a'), 'course': 'Math 101', 'enrollments': 35, 'department': 'Mathematics'}
{'_id': ObjectId('690bf424c8677e31a8074e4b'), 'course': 'CS 102', 'enrollments': 30, 'department': 'Computer Science'}
{'_id': ObjectId('690bf424c8677e31a8074e4d'), 'course': 'Physics 202', 'enrollments': 15, 'department': 'Physics'}


In [6]:
# ============================================================
# Complex Filtering and Querying
# ============================================================

from pymongo import MongoClient, InsertOne

client = MongoClient('mongodb://localhost:27017/')
db = client['university_db']
courses_collection = db['courses']

# Check if the collection is empty
if courses_collection.count_documents({}) == 0:
    print(" No data found. Inserting sample dataset...\n")
    sample_data = [
        InsertOne({'course': 'Math 101', 'enrollments': 35, 'department': 'Mathematics'}),
        InsertOne({'course': 'CS 102', 'enrollments': 30, 'department': 'Computer Science'}),
        InsertOne({'course': 'History 201', 'enrollments': 20, 'department': 'History'}),
        InsertOne({'course': 'Physics 202', 'enrollments': 15, 'department': 'Physics'})
    ]
    courses_collection.bulk_write(sample_data)
    print(" Sample data inserted successfully!\n")
else:
    print(" Existing data found in the collection.\n")

# ============================================================
# Task 1: Filter courses with enrollments over 20 students
# ============================================================

print(" Courses with enrollments greater than 20 students:")
for course in courses_collection.find({'enrollments': {'$gt': 20}}):
    print(course)

# ============================================================
#  Task 2: Retrieve and display courses only from
# 'Computer Science' or 'Mathematics' departments
# ============================================================

print("\n Courses from Computer Science or Mathematics departments:")
for course in courses_collection.find({'department': {'$in': ['Computer Science', 'Mathematics']}}):
    print(course)

 Existing data found in the collection.

 Courses with enrollments greater than 20 students:
{'_id': ObjectId('690bf424c8677e31a8074e4a'), 'course': 'Math 101', 'enrollments': 35, 'department': 'Mathematics'}
{'_id': ObjectId('690bf424c8677e31a8074e4b'), 'course': 'CS 102', 'enrollments': 30, 'department': 'Computer Science'}

 Courses from Computer Science or Mathematics departments:
{'_id': ObjectId('690bf424c8677e31a8074e4a'), 'course': 'Math 101', 'enrollments': 35, 'department': 'Mathematics'}
{'_id': ObjectId('690bf424c8677e31a8074e4b'), 'course': 'CS 102', 'enrollments': 30, 'department': 'Computer Science'}


In [7]:
# ============================================================
# Aggregation Framework for Data Analysis
# ============================================================

from pymongo import MongoClient, InsertOne

client = MongoClient('mongodb://localhost:27017/')
db = client['university_db']
courses_collection = db['courses']

# ============================================================
# Step 1: Ensure data exists
# ============================================================

if courses_collection.count_documents({}) == 0:
    print(" No data found. Inserting sample dataset...\n")
    sample_data = [
        InsertOne({'course': 'Math 101', 'enrollments': 35, 'department': 'Mathematics'}),
        InsertOne({'course': 'CS 102', 'enrollments': 30, 'department': 'Computer Science'}),
        InsertOne({'course': 'History 201', 'enrollments': 20, 'department': 'History'}),
        InsertOne({'course': 'Physics 202', 'enrollments': 15, 'department': 'Physics'}),
        InsertOne({'course': 'CS 201', 'enrollments': 45, 'department': 'Computer Science'}),
        InsertOne({'course': 'Math 202', 'enrollments': 25, 'department': 'Mathematics'})
    ]
    courses_collection.bulk_write(sample_data)
    print(" Sample data inserted successfully!\n")
else:
    print(" Existing data found in the collection.\n")

# ============================================================
# Task 1: Use aggregation to find the average enrollment per department
# ============================================================

print(" Average enrollment per department:")
pipeline = [
    {'$group': {'_id': '$department', 'average_enrollment': {'$avg': '$enrollments'}}}
]
for result in courses_collection.aggregate(pipeline):
    print(result)

# ============================================================
# ask 2: Create an aggregation pipeline that finds 
# the maximum enrollment for each department
# ============================================================

print("\n Maximum enrollment per department:")
pipeline = [
    {'$group': '_id': '$department', 'max_enrollment': {'$max': '$enrollments'}}}
]
for result in courses_collection.aggregate(pipeline):
    print(result)

 Existing data found in the collection.

 Average enrollment per department:
{'_id': 'Computer Science', 'average_enrollment': 30.0}
{'_id': 'Physics', 'average_enrollment': 15.0}
{'_id': 'Mathematics', 'average_enrollment': 35.0}

 Maximum enrollment per department:
{'_id': 'Mathematics', 'max_enrollment': 35}
{'_id': 'Computer Science', 'max_enrollment': 30}
{'_id': 'Physics', 'max_enrollment': 15}


In [8]:
# ============================================================
# Data Transformation using $project and $addFields
# ============================================================

from pymongo import MongoClient, InsertOne

client = MongoClient('mongodb://localhost:27017/')
db = client['university_db']
courses_collection = db['courses']

# ============================================================
#  Step 1: Ensure data exists before running the pipeline
# ============================================================

if courses_collection.count_documents({}) == 0:
    print(" No data found. Inserting sample dataset...\n")
    sample_data = [
        InsertOne({'course': 'Math 101', 'enrollments': 35, 'department': 'Mathematics'}),
        InsertOne({'course': 'CS 102', 'enrollments': 30, 'department': 'Computer Science'}),
        InsertOne({'course': 'History 201', 'enrollments': 20, 'department': 'History'}),
        InsertOne({'course': 'Physics 202', 'enrollments': 15, 'department': 'Physics'}),
        InsertOne({'course': 'CS 201', 'enrollments': 45, 'department': 'Computer Science'}),
        InsertOne({'course': 'Math 202', 'enrollments': 25, 'department': 'Mathematics'})
    ]
    courses_collection.bulk_write(sample_data)
    print(" Sample data inserted successfully!\n")
else:
    print(" Existing data found in the collection.\n")

# ============================================================
# Task 1: Use $project to rename and show only specific fields
# ============================================================

print(" Projection Result (Renaming Fields):")
pipeline = [
    {
        '$project': {
            '_id': 0,  # Hide the default _id field
            'course_name': '$course',
            'department_name': '$department',
            'enrollments': 1
        }
    }
]
for result in courses_collection.aggregate(pipeline):
    print(result)

# ============================================================
# Task 2: Use $addFields to create a new field 'enrollment_category'
# ============================================================

print("\n Adding Enrollment Category Field:")
pipeline = [
    {
        '$addFields': {
            'enrollment_category': {
                '$cond': {
                    'if': {'$gt': ['$enrollments', 20]},  # if enrollments > 20
                    'then': 'high',                      # set category as 'high'
                    'else': 'low'                        # otherwise 'low'
                }
            }
        }
    },
    {
        '$project': {  # optional: only show clean output
            '_id': 0,
            'course': 1,
            'department': 1,
            'enrollments': 1,
            'enrollment_category': 1
        }
    }
]
for result in courses_collection.aggregate(pipeline):
    print(result)

 Existing data found in the collection.

 Projection Result (Renaming Fields):
{'enrollments': 35, 'course_name': 'Math 101', 'department_name': 'Mathematics'}
{'enrollments': 30, 'course_name': 'CS 102', 'department_name': 'Computer Science'}
{'enrollments': 15, 'course_name': 'Physics 202', 'department_name': 'Physics'}

 Adding Enrollment Category Field:
{'course': 'Math 101', 'enrollments': 35, 'department': 'Mathematics', 'enrollment_category': 'high'}
{'course': 'CS 102', 'enrollments': 30, 'department': 'Computer Science', 'enrollment_category': 'high'}
{'course': 'Physics 202', 'enrollments': 15, 'department': 'Physics', 'enrollment_category': 'low'}


In [9]:
from pymongo import MongoClient

# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client['university_db']
courses_collection = db['courses']
students_collection = db['students']

# Clear collections for a clean start
courses_collection.delete_many({})
students_collection.delete_many({})

# Insert sample data into courses_collection
courses_data = [
    {'course': 'Math 101', 'enrollments': 35, 'department': 'Mathematics'},
    {'course': 'CS 102', 'enrollments': 30, 'department': 'Computer Science'},
    {'course': 'Physics 202', 'enrollments': 15, 'department': 'Physics'},
    {'course': 'CS 201', 'enrollments': 40, 'department': 'Computer Science'},
    {'course': 'History 201', 'enrollments': 10, 'department': 'History'}
]
courses_collection.insert_many(courses_data)

# Insert sample data into students_collection
students_data = [
    {'student_name': 'Alice', 'enrolled_course': 'Math 101'},
    {'student_name': 'Bob', 'enrolled_course': 'CS 102'},
    {'student_name': 'Charlie', 'enrolled_course': 'CS 201'},
    {'student_name': 'Diana', 'enrolled_course': 'CS 201'},
    {'student_name': 'Eve', 'enrolled_course': 'Physics 202'}
]
students_collection.insert_many(students_data)

# ============================================================
# Task 1: Aggregation to get a count of courses per department
# ============================================================
print("\n--- Task 1: Course Count per Department ---")
pipeline_1 = [
    {'$group': {'_id': '$department', 'course_count': {'$sum': 1}}}
]
for result in courses_collection.aggregate(pipeline_1):
    print(result)

# ============================================================
# Task 2: Use $match and $group to filter and get only courses
# with enrollments > 25 in 'Computer Science'
# ============================================================
print("\n--- Task 2: Filtered Courses (Enrollments > 25, CS only) ---")
pipeline_2 = [
    {'$match': {'department': 'Computer Science', 'enrollments': {'$gt': 25}}},
    {'$group': {'_id': '$department', 'total_courses': {'$sum': 1}, 'avg_enrollment': {'$avg': '$enrollments'}}}
]
for result in courses_collection.aggregate(pipeline_2):
    print(result)

# ============================================================
# Task 3: Apply $lookup to join courses and students collections
# ============================================================
print("\n--- Task 3: Join Courses with Students ---")
pipeline_3 = [
    {
        '$lookup': {
            'from': 'students',
            'localField': 'course',
            'foreignField': 'enrolled_course',
            'as': 'enrolled_students'
        }
    },
    {'$project': {'course': 1, 'department': 1, 'enrolled_students.student_name': 1}}
]
for result in courses_collection.aggregate(pipeline_3):
    print(result)



--- Task 1: Course Count per Department ---
{'_id': 'History', 'course_count': 1}
{'_id': 'Computer Science', 'course_count': 2}
{'_id': 'Physics', 'course_count': 1}
{'_id': 'Mathematics', 'course_count': 1}

--- Task 2: Filtered Courses (Enrollments > 25, CS only) ---
{'_id': 'Computer Science', 'total_courses': 2, 'avg_enrollment': 35.0}

--- Task 3: Join Courses with Students ---
{'_id': ObjectId('690bffbec8677e31a8074e52'), 'course': 'Math 101', 'department': 'Mathematics', 'enrolled_students': [{'student_name': 'Alice'}]}
{'_id': ObjectId('690bffbec8677e31a8074e53'), 'course': 'CS 102', 'department': 'Computer Science', 'enrolled_students': [{'student_name': 'Bob'}]}
{'_id': ObjectId('690bffbec8677e31a8074e54'), 'course': 'Physics 202', 'department': 'Physics', 'enrolled_students': [{'student_name': 'Eve'}]}
{'_id': ObjectId('690bffbec8677e31a8074e55'), 'course': 'CS 201', 'department': 'Computer Science', 'enrolled_students': [{'student_name': 'Charlie'}, {'student_name': 'Dian