In [1]:
import os
import glob
import logging
import pandas as pd
from pymongo import MongoClient
from dotenv import load_dotenv

In [2]:
# --------------------------
# 1. Setup Logging
# --------------------------
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

In [3]:
# --------------------------
# 2. Load environment variables
# --------------------------
load_dotenv()

MONGO_URI = os.environ.get("MONGO_URI")
DB_NAME = os.environ.get("DB_NAME")
DATA_PATH = os.environ.get("DATA_PATH", "data")

if not MONGO_URI or not DB_NAME:
    raise ValueError("❌ Missing required environment variables: MONGO_URI or DB_NAME")

In [6]:
# --------------------------
# 3. Connect to MongoDB with context manager
# --------------------------
with MongoClient(MONGO_URI) as client:
    db = client[DB_NAME]
    logging.info(f"Connected to MongoDB Database: {DB_NAME}")

    # --------------------------
    # 4. Read all CSV files from data folder
    # --------------------------
    csv_files = glob.glob(os.path.join(DATA_PATH, "**", "*.csv"), recursive=True)
    logging.info(f"Found {len(csv_files)} CSV files in {DATA_PATH}")

    # --------------------------
    # 5. Upload each CSV as a collection
    # --------------------------
    for file in csv_files:
        collection_name = os.path.splitext(os.path.basename(file))[0]

        logging.info(f"Processing {file} -> Collection: {collection_name}")

        try:
            df = pd.read_csv(file)
            if df.empty:
                logging.warning(f"Skipped empty file: {file}")
                continue

            records = df.to_dict(orient="records")

            # Optional: Clear existing collection before inserting
            db[collection_name].delete_many({})
            db[collection_name].insert_many(records)

            logging.info(f"✅ Uploaded {len(records)} records to collection: {collection_name}")

        except Exception as e:
            logging.error(f"❌ Failed to process {file}: {e}")

2025-09-15 00:46:06,465 - INFO - Connected to MongoDB Database: fraud_detection
2025-09-15 00:46:06,465 - INFO - Found 10 CSV files in D:/fraud_detection/data
2025-09-15 00:46:06,465 - INFO - Processing D:/fraud_detection/data\Customer Profiles\account_activity.csv -> Collection: account_activity
2025-09-15 00:46:07,050 - INFO - ✅ Uploaded 1000 records to collection: account_activity
2025-09-15 00:46:07,050 - INFO - Processing D:/fraud_detection/data\Customer Profiles\customer_data.csv -> Collection: customer_data
2025-09-15 00:46:07,204 - INFO - ✅ Uploaded 1000 records to collection: customer_data
2025-09-15 00:46:07,204 - INFO - Processing D:/fraud_detection/data\Fraudulent Patterns\fraud_indicators.csv -> Collection: fraud_indicators
2025-09-15 00:46:07,313 - INFO - ✅ Uploaded 1000 records to collection: fraud_indicators
2025-09-15 00:46:07,313 - INFO - Processing D:/fraud_detection/data\Fraudulent Patterns\suspicious_activity.csv -> Collection: suspicious_activity
2025-09-15 00:46:

In [None]:
with MongoClient(MONGO_URI) as client:
    db = client[DB_NAME]  
    
    try:
        # Fetch records from collection
        transaction_records = pd.DataFrame(list(db["transaction_records"].find()))
        
        logging.info("Sample data from transaction_records collection:")
        display(transaction_records.head())  
        
    except Exception as e:
        logging.error(f"❌ Failed to fetch data: {e}")

2025-09-15 00:50:14,505 - INFO - Sample data from transaction_records collection:


Unnamed: 0,_id,TransactionID,Amount,CustomerID
0,68c71478eeeb4d385a4cfc4d,1,55.530334,1952
1,68c71478eeeb4d385a4cfc4e,2,12.88118,1027
2,68c71478eeeb4d385a4cfc4f,3,50.176322,1955
3,68c71478eeeb4d385a4cfc50,4,41.634001,1796
4,68c71478eeeb4d385a4cfc51,5,78.122853,1946
