In [None]:
{
  "$jsonSchema": {
    "bsonType": "object",
    "required": ["entity_name", "category", "regulator", "fip_stage", "fiu_stage"],
    "properties": {
      "entity_name": {
        "bsonType": "string",
        "description": "Name of the entity"
      },
      "category": {
        "bsonType": "string",
        "description": "License type held by the entity"
      },
      "regulator": {
        "bsonType": "string",
        "description": "Regulatory authority of the entity"
      },
      "fip_stage": {
        "bsonType": "string",
        "description": "FIP implementation stage"
      },
      "fiu_stage": {
        "bsonType": "string",
        "description": "FIU implementation stage"
      }
    }
  }
}


In [5]:
import pandas as pd
from pymongo import MongoClient
from pymongo.errors import CollectionInvalid, BulkWriteError

# Load CSV data
csv_path = 'Sahamati.csv'
df = pd.read_csv(csv_path)

# Rename columns to match MongoDB keys
df.columns = ['entity_name', 'category', 'regulator', 'fip_stage', 'fiu_stage']

# Replace NaN with empty strings to comply with MongoDB string schema
df = df.fillna('')

# MongoDB connection
client = MongoClient("mongodb://localhost:27017/")
db = client["sahamati"]

# Define JSON schema for validation
schema = {
    "$jsonSchema": {
        "bsonType": "object",
        "required": ["entity_name", "category", "regulator", "fip_stage", "fiu_stage"],
        "properties": {
            "entity_name": {"bsonType": "string"},
            "category": {"bsonType": "string"},
            "regulator": {"bsonType": "string"},
            "fip_stage": {"bsonType": "string"},
            "fiu_stage": {"bsonType": "string"}
        }
    }
}

# Drop collection if it exists (for re-import purposes)
if "fip_fiu_status" in db.list_collection_names():
    db.drop_collection("fip_fiu_status")

# Create collection with schema validator
try:
    db.create_collection("fip_fiu_status", validator=schema)
except CollectionInvalid:
    print("Collection already exists.")

collection = db["fip_fiu_status"]

# Convert DataFrame to list of dicts
data = df.to_dict(orient="records")

# Insert data into MongoDB with error handling
try:
    collection.insert_many(data)
    print("Data imported successfully into MongoDB.")
except BulkWriteError as bwe:
    print("Bulk write error occurred:")
    print(bwe.details)


Data imported successfully into MongoDB.


In [6]:
# (1) Total number of records
total_records = collection.count_documents({})
print(f"[Test 1] Total number of records: {total_records}")

# (2) Count of entities that are 'Live' in FIP Implementation Stage
live_fip_count = collection.count_documents({"fip_stage": {"$regex": "^Live$", "$options": "i"}})
print(f"[Test 2] Entities with 'Live' FIP Implementation Stage: {live_fip_count}")

# (3) Count of entities for each Regulator
print(f"[Test 3] Entity count by Regulator:")
pipeline = [
    {"$group": {"_id": "$regulator", "count": {"$sum": 1}}},
    {"$sort": {"count": -1}}  # Optional: sort by count
]
results = collection.aggregate(pipeline)
for res in results:
    print(f" - Regulator: {res['_id']}, Count: {res['count']}")

[Test 1] Total number of records: 815
[Test 2] Entities with 'Live' FIP Implementation Stage: 178
[Test 3] Entity count by Regulator:
 - Regulator: RBI, Count: 478
 - Regulator: SEBI, Count: 264
 - Regulator: IRDAI, Count: 69
 - Regulator: PFRDA, Count: 3
 - Regulator: DoR, Count: 1
