In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Set random seed for reproducibility
np.random.seed(42)

# Helper function to generate random dates
def random_dates(start, end, n):
    start_u = start.value // 10**9
    end_u = end.value // 10**9
    return pd.to_datetime(np.random.randint(start_u, end_u, n), unit='s')

# Set record counts to ensure each file has at least 500 data points
num_customers = 500
num_inventory_items = 500
num_transactions = 1000  # More transactions than customers to simulate repeat purchases

# Generate Customer Data with 500 records
customer_data = {
    "customer_id": range(1, num_customers + 1),
    "name": [f"Customer_{i}" for i in range(1, num_customers + 1)],
    "age": np.random.randint(18, 70, num_customers),
    "income": np.random.normal(50000, 15000, num_customers).round(2),
    "email": [f"customer{i}@store.com" for i in range(1, num_customers + 1)]
}
customers_df = pd.DataFrame(customer_data)

# Generate Inventory Data with 500 records
inventory_data = {
    "product_id": range(1, num_inventory_items + 1),
    "product_name": [f"Product_{i}" for i in range(1, num_inventory_items + 1)],
    "category": np.random.choice(["Electronics", "Clothing", "Home", "Toys", "Sports"], num_inventory_items),
    "price": np.random.uniform(10, 500, num_inventory_items).round(2),
    "stock_quantity": np.random.randint(0, 1000, num_inventory_items)
}
inventory_df = pd.DataFrame(inventory_data)

# Generate Sales Transactions Data with 1000 records
transaction_data = {
    "transaction_id": range(1, num_transactions + 1),
    "customer_id": np.random.choice(customers_df["customer_id"], num_transactions),
    "product_id": np.random.choice(inventory_df["product_id"], num_transactions),
    "quantity": np.random.randint(1, 10, num_transactions),
    "total_amount": np.random.uniform(20, 2000, num_transactions).round(2),
    "transaction_date": random_dates(pd.to_datetime('2022-01-01'), pd.to_datetime('2023-01-01'), num_transactions)
}
sales_df = pd.DataFrame(transaction_data)

# Save as CSV files
customers_df.to_csv("customers.csv", index=False)
inventory_df.to_csv("inventory.csv", index=False)
sales_df.to_csv("sales_transactions.csv", index=False)

# Verify the files were saved and sample data
print("Sample Customer Data:\n", customers_df.head())
print("\nSample Inventory Data:\n", inventory_df.head())
print("\nSample Sales Transactions Data:\n", sales_df.head())


Sample Customer Data:
    customer_id        name  age    income                email
0            1  Customer_1   56  81832.34  customer1@store.com
1            2  Customer_2   69  65486.98  customer2@store.com
2            3  Customer_3   46  27209.45  customer3@store.com
3            4  Customer_4   32  42736.49  customer4@store.com
4            5  Customer_5   60  69003.67  customer5@store.com

Sample Inventory Data:
    product_id product_name     category   price  stock_quantity
0           1    Product_1  Electronics  398.55             660
1           2    Product_2         Home  330.95             637
2           3    Product_3         Home  496.15             903
3           4    Product_4     Clothing  149.09             301
4           5    Product_5       Sports  185.98             359

Sample Sales Transactions Data:
    transaction_id  customer_id  product_id  quantity  total_amount  \
0               1          107         417         9       1307.84   
1               