<a href="https://colab.research.google.com/github/AnshuKamath/DB-Analytics-Assignment/blob/main/Section3_MongoDB_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Section 3: MongoDB Implementation for Advanced Business Analytics
# Part 2: Designing NoSQL Schema for Company
# Student: Anshu Kamleshkumar Kamath

# Install required libraries
!pip install pymongo dnspython

import pymongo
from pymongo import MongoClient
from datetime import datetime, timezone
import json
from bson import ObjectId

# Database Connection Configuration
CONNECTION_STRING = "mongodb+srv://AnshuKamath:DanyxTokyo%40123@anshucluster.yaagegy.mongodb.net/"
DATABASE_NAME = "CompanyDB"

print("=== Section 3.2: NoSQL Schema Design for Company ===")
print("Connecting to MongoDB Atlas...")

try:
    # Establish connection to MongoDB Atlas
    client = MongoClient(CONNECTION_STRING)
    db = client[DATABASE_NAME]

    # Test connection
    client.admin.command('ping')
    print("Successfully connected to MongoDB Atlas")
    print(f"Working with database: {DATABASE_NAME}")

except Exception as e:
    print(f"Connection failed: {e}")

# =============================================================================
# DESIGNING NOSQL SCHEMA WITH VALIDATION RULES
# =============================================================================

print("\n" + "="*70)
print("DESIGNING NOSQL SCHEMA FOR COMPANY - 8 COLLECTIONS")
print("="*70)

# 1. CUSTOMERS COLLECTION SCHEMA
print("\n🏗️  1. CUSTOMERS COLLECTION SCHEMA")
customers_schema = {
    "validator": {
        "$jsonSchema": {
            "bsonType": "object",
            "title": "Customers Collection Schema",
            "required": ["name", "gender", "age", "phone_number"],
            "properties": {
                "_id": {
                    "bsonType": "objectId",
                    "description": "Unique customer identifier"
                },
                "name": {
                    "bsonType": "string",
                    "description": "Customer full name"
                },
                "gender": {
                    "bsonType": "string",
                    "enum": ["Male", "Female", "Other"],
                    "description": "Customer gender"
                },
                "age": {
                    "bsonType": "int",
                    "minimum": 18,
                    "maximum": 120,
                    "description": "Customer age (18-120)"
                },
                "phone_number": {
                    "bsonType": "string",
                    "description": "Customer contact number"
                },
                "addresses": {
                    "bsonType": "array",
                    "description": "Array of customer addresses",
                    "items": {
                        "bsonType": "object",
                        "properties": {
                            "_id": {"bsonType": "objectId"},
                            "house": {"bsonType": "string"},
                            "street": {"bsonType": "string"},
                            "city": {"bsonType": "string"},
                            "post_code": {"bsonType": "string"},
                            "location": {
                                "bsonType": "object",
                                "properties": {
                                    "type": {"bsonType": "string", "enum": ["Point"]},
                                    "coordinates": {"bsonType": "array"}
                                }
                            }
                        }
                    }
                },
                "current_orders": {
                    "bsonType": "array",
                    "description": "Array of current orders"
                }
            }
        }
    }
}

try:
    db.create_collection("customers", **customers_schema)
    print("Customers collection schema created successfully")
    print("Features: Embedded addresses, geospatial location, current orders")
except Exception as e:
    if "already exists" in str(e):
        db.customers.drop()
        db.create_collection("customers", **customers_schema)
        print("Customers collection schema recreated successfully")

# 2. PRODUCTS COLLECTION SCHEMA
print("\n🏗️  2. PRODUCTS COLLECTION SCHEMA")
products_schema = {
    "validator": {
        "$jsonSchema": {
            "bsonType": "object",
            "title": "Products Collection Schema",
            "required": ["product_name", "std_price", "category"],
            "properties": {
                "_id": {"bsonType": "objectId"},
                "product_name": {"bsonType": "string"},
                "short_desc": {"bsonType": "string"},
                "dimensions": {
                    "bsonType": "object",
                    "properties": {
                        "length": {"bsonType": "number"},
                        "width": {"bsonType": "number"},
                        "height": {"bsonType": "number"}
                    }
                },
                "quantity_per_unit": {"bsonType": "string"},
                "avg_ratings": {"bsonType": "number", "minimum": 0, "maximum": 5},
                "std_price": {"bsonType": "number", "minimum": 0},
                "supp_price": {"bsonType": "number", "minimum": 0},
                "category": {
                    "bsonType": "string",
                    "enum": ["Fresh", "Books", "CDs", "Phones", "Home_appliances"]
                }
            }
        }
    }
}

try:
    db.create_collection("products", **products_schema)
    print("Products collection schema created successfully")
    print("Features: Multiple categories, pricing validation, rating constraints")
except Exception as e:
    if "already exists" in str(e):
        db.products.drop()
        db.create_collection("products", **products_schema)
        print("Products collection schema recreated successfully")

# 3. SUPPLIERS COLLECTION SCHEMA
print("\n🏗️  3. SUPPLIERS COLLECTION SCHEMA")
suppliers_schema = {
    "validator": {
        "$jsonSchema": {
            "bsonType": "object",
            "title": "Suppliers Collection Schema",
            "required": ["name", "address", "city"],
            "properties": {
                "_id": {"bsonType": "objectId"},
                "name": {"bsonType": "string"},
                "address": {"bsonType": "string"},
                "city": {"bsonType": "string"},
                "post_code": {"bsonType": "string"},
                "location": {
                    "bsonType": "object",
                    "properties": {
                        "type": {"bsonType": "string", "enum": ["Point"]},
                        "coordinates": {"bsonType": "array"}
                    }
                },
                "realtime_inventory": {
                    "bsonType": "array",
                    "description": "Real-time inventory tracking"
                }
            }
        }
    }
}

try:
    db.create_collection("suppliers", **suppliers_schema)
    print("Suppliers collection schema created successfully")
    print("Features: Geospatial location, embedded inventory tracking")
except Exception as e:
    if "already exists" in str(e):
        db.suppliers.drop()
        db.create_collection("suppliers", **suppliers_schema)
        print("Suppliers collection schema recreated successfully")

# 4. PARTNERS COLLECTION SCHEMA
print("\n🏗️  4. PARTNERS COLLECTION SCHEMA")
partners_schema = {
    "validator": {
        "$jsonSchema": {
            "bsonType": "object",
            "title": "Partners Collection Schema",
            "required": ["name", "age", "phone", "email"],
            "properties": {
                "_id": {"bsonType": "objectId"},
                "name": {"bsonType": "string"},
                "age": {"bsonType": "int", "minimum": 18, "maximum": 70},
                "gender": {"bsonType": "string", "enum": ["Male", "Female", "Other"]},
                "phone": {"bsonType": "string"},
                "email": {"bsonType": "string"},
                "bank_account": {
                    "bsonType": "object",
                    "description": "Embedded bank account details"
                },
                "availability": {
                    "bsonType": "object",
                    "description": "Real-time availability status"
                },
                "deliveries_made": {"bsonType": "int", "minimum": 0},
                "avg_per_week": {"bsonType": "number", "minimum": 0},
                "best_week": {"bsonType": "number", "minimum": 0},
                "number_of_week": {"bsonType": "int", "minimum": 0}
            }
        }
    }
}

try:
    db.create_collection("partners", **partners_schema)
    print("Partners collection schema created successfully")
    print("Features: Age validation, embedded bank details, delivery statistics")
except Exception as e:
    if "already exists" in str(e):
        db.partners.drop()
        db.create_collection("partners", **partners_schema)
        print("Partners collection schema recreated successfully")

# 5-8. REMAINING COLLECTIONS (pastOrders, ratings, dailyInventoryRecord, partnerHistory)
remaining_collections = [
    ("pastOrders", "Historical order records with customer references"),
    ("ratings", "Product ratings and customer feedback"),
    ("dailyInventoryRecord", "Daily inventory tracking for analytics"),
    ("partnerHistory", "Partner delivery history and performance")
]

for i, (collection_name, description) in enumerate(remaining_collections, 5):
    print(f"\n  {i}. {collection_name.upper()} COLLECTION SCHEMA")
    try:
        db.create_collection(collection_name)
        print(f"{collection_name} collection schema created successfully")
        print(f"    Purpose: {description}")
    except Exception as e:
        if "already exists" in str(e):
            print(f"{collection_name} collection already exists")

# =============================================================================
# SCHEMA EXAMPLES WITH SAMPLE DOCUMENTS
# =============================================================================

print("\n" + "="*70)
print("NOSQL SCHEMA EXAMPLES - SAMPLE DOCUMENTS")
print("="*70)

# Example 1: Customer Document
print("\n📄 EXAMPLE 1: CUSTOMER DOCUMENT STRUCTURE")
customer_example = {
    "_id": ObjectId(),
    "name": "John Smith",
    "gender": "Male",
    "age": 35,
    "phone_number": "+441234567890",
    "addresses": [{
        "_id": ObjectId(),
        "house": "123",
        "street": "Main Street",
        "city": "London",
        "post_code": "SW1A 1AA",
        "location": {
            "type": "Point",
            "coordinates": [-0.1276, 51.5074]
        }
    }],
    "current_orders": [{
        "_id": ObjectId(),
        "date": datetime.now(timezone.utc),
        "order_status": "Paid",
        "order_details": {
            "total_cost": 150.50,
            "partner_id": ObjectId(),
            "shipping_id": ObjectId(),
            "supplier_id": ObjectId()
        },
        "recommended_products": [{
            "product_id": ObjectId(),
            "avg_rating": 4.5
        }]
    }]
}

print(json.dumps(customer_example, indent=2, default=str))

# Example 2: Product Document
print("\n📄 EXAMPLE 2: PRODUCT DOCUMENT STRUCTURE")
product_example = {
    "_id": ObjectId(),
    "product_name": "iPhone 14 Pro",
    "short_desc": "Latest Apple iPhone with advanced camera system",
    "dimensions": {
        "length": 15.7,
        "width": 7.65,
        "height": 0.79
    },
    "quantity_per_unit": "1 unit",
    "avg_ratings": 4.6,
    "std_price": 999.99,
    "supp_price": 750.00,
    "category": "Phones",
    "phones": {
        "brand": "Apple",
        "model": "iPhone 14 Pro",
        "colour": "Deep Purple",
        "features": "A16 Bionic chip, Pro camera system, Dynamic Island"
    }
}

print(json.dumps(product_example, indent=2, default=str))

# Example 3: Supplier Document
print("\n📄 EXAMPLE 3: SUPPLIER DOCUMENT STRUCTURE")
supplier_example = {
    "_id": ObjectId(),
    "name": "FreshFarms Ltd",
    "address": "123 Supply Road",
    "city": "Manchester",
    "post_code": "M1 1AA",
    "location": {
        "type": "Point",
        "coordinates": [-2.2426, 53.4808]
    },
    "realtime_inventory": [{
        "product_id": ObjectId(),
        "timestamp": datetime.now(timezone.utc),
        "quantity": 75
    }],
    "contact_info": {
        "phone": "+4498765432",
        "email": "contact@freshfarms.co.uk"
    }
}

print(json.dumps(supplier_example, indent=2, default=str))

# Example 4: Partner Document
print("\n📄 EXAMPLE 4: PARTNER DOCUMENT STRUCTURE")
partner_example = {
    "_id": ObjectId(),
    "name": "Michael Johnson",
    "age": 28,
    "gender": "Male",
    "phone": "+447654321",
    "email": "michael.j@deliverypartners.com",
    "bank_account": {
        "account_name": "Michael Johnson",
        "account_number": "12345678",
        "sort_code": "123456"
    },
    "availability": {
        "is_active": 1,
        "on_delivery": 0,
        "location": {
            "type": "Point",
            "coordinates": [-0.1275, 51.5072]
        }
    },
    "deliveries_made": 245,
    "avg_per_week": 12,
    "best_week": 20,
    "number_of_week": 8
}

print(json.dumps(partner_example, indent=2, default=str))

# =============================================================================
# SCHEMA VALIDATION DEMONSTRATION
# =============================================================================

print("\n" + "="*70)
print("SCHEMA VALIDATION DEMONSTRATION")
print("="*70)

# Test schema validation by inserting sample documents
print("\n🧪 Testing Schema Validation:")

# Test 1: Valid customer insertion
try:
    result = db.customers.insert_one({
        "name": "Alice Johnson",
        "gender": "Female",
        "age": 29,
        "phone_number": "+447890123456"
    })
    print(f"Valid customer document inserted: {result.inserted_id}")
except Exception as e:
    print(f"Customer insertion failed: {e}")

# Test 2: Invalid customer insertion (age violation)
try:
    result = db.customers.insert_one({
        "name": "Young Person",
        "gender": "Male",
        "age": 15,  # Below minimum age
        "phone_number": "+447890123456"
    })
    print(f"This should not succeed: {result.inserted_id}")
except Exception as e:
    print(f"Schema validation working - rejected invalid age: {str(e)[:100]}...")

# Test 3: Valid product insertion
try:
    result = db.products.insert_one({
        "product_name": "Organic Bananas",
        "std_price": 2.99,
        "category": "Fresh"
    })
    print(f"Valid product document inserted: {result.inserted_id}")
except Exception as e:
    print(f"Product insertion failed: {e}")

# =============================================================================
# SCHEMA INFORMATION SUMMARY
# =============================================================================

print("\n" + "="*70)
print("NOSQL SCHEMA DESIGN SUMMARY")
print("="*70)

# Display all collections and their document counts
print("\n Database Collections Overview:")
try:
    collections = db.list_collection_names()
    total_collections = len(collections)

    for i, collection in enumerate(collections, 1):
        doc_count = db[collection].count_documents({})
        print(f"   {i}. {collection}: {doc_count} documents")

    print(f"\n Total Collections Created: {total_collections}")

    # Schema validation status
    print(f"\n🔒 Schema Validation Status:")
    validated_collections = ['customers', 'products', 'suppliers', 'partners']

    for collection_name in validated_collections:
        try:
            collection_info = db.command("listCollections", filter={"name": collection_name})
            if collection_info['cursor']['firstBatch']:
                options = collection_info['cursor']['firstBatch'][0].get('options', {})
                if 'validator' in options:
                    print(f"    {collection_name}: Schema validation enabled")
                else:
                    print(f"     {collection_name}: No schema validation")
        except Exception as e:
            print(f"    {collection_name}: Error checking validation")

except Exception as e:
    print(f" Error retrieving collection information: {e}")

print("\n" + "="*70)
print(" NOSQL SCHEMA DESIGN COMPLETED SUCCESSFULLY!")
print(" All 8 collections created with appropriate schemas")
print(" Schema validation implemented for core collections")
print(" Sample documents demonstrate proper structure")
print(" Business requirements addressed through flexible design")
print("="*70)

# Close database connection
client.close()
print("\n🔒 Database connection closed.")

=== Section 3.2: NoSQL Schema Design for Company ===
Connecting to MongoDB Atlas...
Successfully connected to MongoDB Atlas
Working with database: CompanyDB

DESIGNING NOSQL SCHEMA FOR COMPANY - 8 COLLECTIONS

🏗️  1. CUSTOMERS COLLECTION SCHEMA
Customers collection schema recreated successfully

🏗️  2. PRODUCTS COLLECTION SCHEMA
Products collection schema recreated successfully

🏗️  3. SUPPLIERS COLLECTION SCHEMA
Suppliers collection schema recreated successfully

🏗️  4. PARTNERS COLLECTION SCHEMA
Partners collection schema recreated successfully

  5. PASTORDERS COLLECTION SCHEMA
pastOrders collection already exists

  6. RATINGS COLLECTION SCHEMA
ratings collection already exists

  7. DAILYINVENTORYRECORD COLLECTION SCHEMA
dailyInventoryRecord collection already exists

  8. PARTNERHISTORY COLLECTION SCHEMA
partnerHistory collection already exists

NOSQL SCHEMA EXAMPLES - SAMPLE DOCUMENTS

📄 EXAMPLE 1: CUSTOMER DOCUMENT STRUCTURE
{
  "_id": "682f094f5d38dfbc46349947",
  "name": "Joh