In [52]:
import pandas as pd
import random
from faker import Faker
from datetime import datetime, timedelta

In [53]:
# Create Faker instance
fake = Faker()

In [54]:
# Generate fake customers data 
def generate_customers(num_records):
    return pd.DataFrame({
        'CustomerID': [str(fake.random_number(digits=3)).zfill(3) for _ in range(num_records)],
        'FirstName': [fake.name() for _ in range(num_records)],
        'Address': [fake.address().replace('\n', ', ') for _ in range(num_records)],
        'Email': [fake.email() for _ in range(num_records)],
        'PhoneNumber': [fake.phone_number() for _ in range(num_records)],
        'Job': [fake.job() for _ in range(num_records)],
        'Company': [fake.company() for _ in range(num_records)]
    })

In [55]:
# Generate fake products data
def generate_products(num_records):
    product_brands = {
        'CPVC Pipes': ['Ashirvad Pipes', 'Astral Pipes', 'Finolex Industries', 'Prince Pipes', 'Supreme Pipes'],
        'UPVC Pipes': ['Apollo Pipes', 'Ashirvad Pipes', 'Finolex Industries', 'Prince Pipes', 'Supreme Pipes'],
        'SWR Pipes': ['Ashirvad Pipes', 'Astral Pipes', 'Finolex Industries', 'Prince Pipes', 'Supreme Industries Ltd.'],
        'Underground Pipes': ['Astral Pipes', 'Jain Irrigation Systems Ltd.', 'Finolex Industries', 'Prince Pipes', 'Supreme Industries Ltd.'],
        'Ball Valves': ['Ashirvad Pipes', 'Astral Pipes', 'Finolex Industries', 'Prince Pipes', 'Supreme Industries Ltd.'],
        'Riser Pipes': ['Cera Sanitaryware Ltd.', 'Jaquar Group', 'Kohler India Corporation Pvt. Ltd.'],
        'HDPE Pipes': ['Astral Pipes', 'Finolex Industries', 'Jain Irrigation Systems Ltd.', 'Prince Pipes', 'Supreme Industries Ltd.'],
        'Casing Pipes': ['Ashirvad Pipes', 'Jain Irrigation Systems Ltd.', 'Finolex Industries', 'Prince Pipes', 'Supreme Industries Ltd.'],
        'Pressure Pipes': ['Ashirvad Pipes', 'Jain Irrigation Systems Ltd.', 'Finolex Industries', 'Supreme Industries Ltd.', 'Prince Pipes'],
        'Suction Pipes': ['Ashirvad Pipes', 'Jain Irrigation Systems Ltd.', 'Finolex Industries', 'Prince Pipes', 'Supreme Industries Ltd.'],
        'Duct Pipes': ['Ashirvad Pipes', 'Jain Irrigation Systems Ltd.', 'Finolex Industries', 'Prince Pipes', 'Supreme Industries Ltd.'],
        'Steel Wire Reinforced Pipes': ['Ashirvad Pipes', 'Jain Irrigation Systems Ltd.', 'Finolex Industries', 'Prince Pipes', 'Supreme Industries Ltd.'],
        'PTMP Taps and Fittings Pipes': ['Cera Sanitaryware Ltd.', 'Kohler India Corporation Pvt. Ltd.', 'Hindware', 'Jaquar Group', 'Parryware'],
        'Flushing Cistern': ['Cera Sanitaryware Ltd.', 'Kohler India Corporation Pvt. Ltd.', 'Hindware', 'Jaquar Group', 'Parryware'],
        'Bathroom Accessories Pipes': ['Cera Sanitaryware Ltd.', 'Kohler India Corporation Pvt. Ltd.', 'Hindware', 'Jaquar Group', 'Parryware']
    }
    categories = list(product_brands.keys())
    return pd.DataFrame({
        'ProductID': [str(fake.random_number(digits=3)).zfill(3) for _ in range(num_records)],
        'SupplierID': [str(fake.random_number(digits=3)).zfill(3) for _ in range(num_records)],
        'CategoryID': [str(fake.random_number(digits=3)).zfill(3) for _ in range(num_records)],
        'Category': [random.choice(categories) for _ in range(num_records)],
        'Description': [fake.sentence() for _ in range(num_records)],
        'Price': [round(random.uniform(10, 1000), 2) for _ in range(num_records)],
        'StockQuantity': [random.randint(0, 1000) for _ in range(num_records)],
        'ProductName': [random.choice(product_brands[category]) for category in random.choices(categories, k=num_records)]
    })

In [56]:
# Generate fake orders data
def generate_orders(num_records):
    return pd.DataFrame({
        'OrderID': [str(fake.random_number(digits=3)).zfill(3) for _ in range(num_records)],
        'ShipperID': [str(fake.random_number(digits=3)).zfill(3) for _ in range(num_records)],
        'CustomerID': [str(fake.random_number(digits=3)).zfill(3) for _ in range(num_records)],
        'TotalOrderAmount': [random.randint(1, 1000000) for _ in range(num_records)],
        'OrderDate': [(datetime.now() - timedelta(days=random.randint(1, 365))).strftime('%Y-%m-%d') for _ in range(num_records)]
    })

In [57]:
# Generate fake ordersdetails data
def generate_order_details(num_records):
    return pd.DataFrame({
        'OrderID': [str(fake.random_number(digits=3)).zfill(3) for _ in range(num_records)],
        'ProductID': [str(fake.random_number(digits=3)).zfill(3) for _ in range(num_records)],
        'Quantity': [random.randint(1, 10) for _ in range(num_records)],
        'UnitPrice': [round(random.uniform(10, 100), 2) for _ in range(num_records)]
    })

In [58]:
# Generate fake Categories data
def generate_categories(num_records):
    return pd.DataFrame({
        'CategoryID': [str(fake.random_number(digits=3)).zfill(3) for _ in range(num_records)],
        'CategoryName': [fake.word() for _ in range(num_records)]
    })

In [59]:
# Generate fake Payments data
def generate_payments(num_records):
    return pd.DataFrame({
        'PaymentID': [str(fake.random_number(digits=3)).zfill(3) for _ in range(num_records)],
        'OrderID': [str(fake.random_number(digits=3)).zfill(3) for _ in range(num_records)],
        'PaymentType': [fake.random_element(elements=('Credit Card', 'Debit Card', 'PayPal', 'Cash')) for _ in range(num_records)],
        'PaymentDate': [(datetime.now() - timedelta(days=random.randint(1, 365))).strftime('%Y-%m-%d') for _ in range(num_records)]
    })

In [60]:
# Generate fake Suppliers data
def generate_suppliers(num_records):
    return pd.DataFrame({
        'SupplierID': [str(fake.random_number(digits=3)).zfill(3) for _ in range(num_records)],
        'SupplierName': [fake.company() for _ in range(num_records)],
        'PostalCode': [fake.postalcode() for _ in range(num_records)],
        'City': [fake.city() for _ in range(num_records)],
        'Country': [fake.country() for _ in range(num_records)],
        'State': [fake.state() for _ in range(num_records)],
        'Email': [fake.email() for _ in range(num_records)],
        'Phone': [fake.phone_number() for _ in range(num_records)]
    })

In [61]:
# Generate fake Shippers data
def generate_shippers(num_records):
    return pd.DataFrame({
        'ShipperID': [str(fake.random_number(digits=3)).zfill(3) for _ in range(num_records)],
        'ShipperName': [fake.company() for _ in range(num_records)],
        'Phone': [fake.phone_number() for _ in range(num_records)]
    })

In [62]:
# Generate data
customers_df = generate_customers(600)
orders_df = generate_orders(600)
order_details_df = generate_order_details(600)
categories_df = generate_categories(600)
products_df = generate_products(600)
shippers_df = generate_shippers(600)
suppliers_df = generate_suppliers(600)
payments_df = generate_payments(600)

In [63]:
# Save to CSV files
customers_df.to_csv('customers.csv', index=False)
orders_df.to_csv('orders.csv', index=False)
order_details_df.to_csv('orderdetails.csv', index=False)
categories_df.to_csv('categories.csv', index=False)
products_df.to_csv('products.csv', index=False)
shippers_df.to_csv('shippers.csv', index=False)
suppliers_df.to_csv('suppliers.csv', index=False)
payments_df.to_csv('payments.csv', index=False)
