## 📝 1. Synthetic Log Simulation & NoSQL Ingestion
This notebook generates synthetic enterprise logs (Sales, User Activity, Inventory), and uploads them to MongoDB Atlas for NoSQL ETL processing.

### 🔧 Setup & Imports

In [None]:
!pip install faker pymongo dnspython --quiet

import json
import random
import datetime
from faker import Faker
from pymongo import MongoClient

fake = Faker()
random.seed(42)


### 🛒 Step 1: Generate Synthetic Sales Logs

In [None]:
def generate_sales_logs(n=10):
    sales_logs = []
    for _ in range(n):
        log = {
            "timestamp": fake.date_time_this_year().isoformat(),
            "customer_id": fake.uuid4(),
            "product_id": fake.uuid4(),
            "region": random.choice(["North", "South", "East", "West"]),
            "price": round(random.uniform(5.0, 200.0), 2),
            "quantity": random.randint(1, 10),
            "payment_type": random.choice(["card", "cash", "upi"])
        }
        sales_logs.append(log)
    return sales_logs

sales_logs = generate_sales_logs(10)
print(json.dumps(sales_logs[:2], indent=2))  # Sample

### 📱 Step 2: Generate Synthetic User Activity Logs

In [None]:
def generate_user_activity_logs(n=10):
    logs = []
    for _ in range(n):
        log = {
            "timestamp": fake.date_time_this_year().isoformat(),
            "user_id": fake.uuid4(),
            "action": random.choice(["click", "scroll", "view", "purchase"]),
            "device": random.choice(["mobile", "desktop", "tablet"]),
            "browser": random.choice(["chrome", "firefox", "safari", "edge"]),
            "session_duration": round(random.uniform(0.5, 5.0), 2)  # in minutes
        }
        logs.append(log)
    return logs

activity_logs = generate_user_activity_logs(10)
print(json.dumps(activity_logs[:2], indent=2))

### 🏬 Step 3: Generate Synthetic Inventory Logs

In [None]:
def generate_inventory_logs(n=10):
    logs = []
    for _ in range(n):
        log = {
            "timestamp": fake.date_time_this_year().isoformat(),
            "product_id": fake.uuid4(),
            "warehouse_id": fake.uuid4(),
            "quantity_added": random.randint(1, 50),
            "quantity_removed": random.randint(0, 30),
            "event_type": random.choice(["restock", "depletion", "transfer"])
        }
        logs.append(log)
    return logs

inventory_logs = generate_inventory_logs(10)
print(json.dumps(inventory_logs[:2], indent=2))

### ☁️ Step 4: Connect to MongoDB Atlas

In [None]:
# Replace with your actual MongoDB connection string
MONGO_URI = "mongodb+srv://<username>:<password>@<cluster>.mongodb.net/?retryWrites=true&w=majority"
client = MongoClient(MONGO_URI)
db = client["enterprise_logs"]

### ☁️ Step 5: Upload Logs to Collections

In [None]:
def upload_logs_to_mongodb(logs, collection_name):
    collection = db[collection_name]
    result = collection.insert_many(logs)
    print(f"✅ Uploaded {len(result.inserted_ids)} records to '{collection_name}'")

upload_logs_to_mongodb(sales_logs, "sales_logs")
upload_logs_to_mongodb(activity_logs, "user_activity_logs")
upload_logs_to_mongodb(inventory_logs, "inventory_events")

### ✅ Summary
- Generated synthetic logs for Sales, Activity, and Inventory.
- Ingested all records into MongoDB Atlas collections.
- Collections are ready for distributed PySpark ETL in the next stage.