In [3]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# Set random seed for reproducibility
np.random.seed(42)

# Define parameters
n_records = 300
employees = [f"E{str(i).zfill(4)}" for i in range(1, 51)]
categories = ["Meals", "Travel", "Lodging", "Supplies", "Training"]
approval_managers = [f"M{str(i).zfill(3)}" for i in range(1, 11)]

# Helper functions
def random_date(start, end):
    return start + timedelta(days=random.randint(0, (end - start).days))

# Generate data
data = []
start_date = datetime(2024, 1, 1)
end_date = datetime(2024, 5, 1)

for i in range(n_records):
    employee_id = random.choice(employees)
    expense_date = random_date(start_date, end_date)
    category = random.choice(categories)
    
    # Generate amount based on category
    base_amount = {
        "Meals": np.random.normal(60, 30),
        "Travel": np.random.normal(300, 150),
        "Lodging": np.random.normal(500, 200),
        "Supplies": np.random.normal(100, 40),
        "Training": np.random.normal(200, 100)
    }
    amount = round(max(10, base_amount[category]), 2)
    
    receipt_uploaded = np.random.choice(["Yes", "No"], p=[0.85, 0.15])
    
    # Determine approval details
    approved = np.random.choice([True, False], p=[0.9, 0.1])
    approval_date = (expense_date + timedelta(days=random.randint(0, 14))) if approved else None
    approved_by = random.choice(approval_managers) if approved else None
    
    claim_id = f"C{str(i+1).zfill(5)}"
    
    data.append({
        "claim_id": claim_id,
        "employee_id": employee_id,
        "expense_date": expense_date.date(),
        "category": category,
        "amount": amount,
        "receipt_uploaded": receipt_uploaded,
        "approved_by": approved_by,
        "approval_date": approval_date.date() if approval_date else None
    })

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
csv_path = "expense_claims.csv"
df.to_csv(csv_path, index=False)