In [5]:
from faker import Faker
import pandas as pd
import random



In [6]:
def generate_product_types(num_rows):
    product_types_data = {
        'product_type_id': range(1, num_rows + 1),
        'product_type_name': ['Pipe', 'Fitting', 'Valve', 'Flange'] * (num_rows // 4)
    }
    return pd.DataFrame(product_types_data)


In [7]:
def generate_customers(num_rows):
    customers_data = {
        'customer_id': [fake.random_int(min=1000, max=9999) for _ in range(num_rows)],
        'name': [fake.name() for _ in range(num_rows)],
        'email': [fake.email() for _ in range(num_rows)],
        'phone': [fake.phone_number() for _ in range(num_rows)],
        'address': [fake.address() for _ in range(num_rows)]
    }
    return pd.DataFrame(customers_data)


In [8]:
def generate_sales(num_rows, product_types):
    sales_data = {
        'sale_id': range(10000, 10000 + num_rows),
        'customer_id': [fake.random_int(min=1000, max=9999) for _ in range(num_rows)],
        'product_type_id': [random.choice(product_types['product_type_id']) for _ in range(num_rows)],
        'quantity': [random.randint(1, 100) for _ in range(num_rows)],
        'unit_price': [round(random.uniform(10, 1000), 2) for _ in range(num_rows)]
    }
    return pd.DataFrame(sales_data)


In [9]:
def generate_products(num_rows, product_types):
    products_data = {
        'product_id': range(1, num_rows + 1),
        'product_name': [fake.word() for _ in range(num_rows)],
        'product_type_id': [random.choice(product_types['product_type_id']) for _ in range(num_rows)],
        'description': [fake.sentence() for _ in range(num_rows)]
    }
    return pd.DataFrame(products_data)



In [10]:
def generate_orders(num_rows, sales):
    orders_data = {
        'order_id': range(1, num_rows + 1),
        'sale_id': random.sample(sales['sale_id'].tolist(), num_rows),
        'order_date': [fake.date_time_between(start_date='-1y', end_date='now') for _ in range(num_rows)]
    }
    return pd.DataFrame(orders_data)


In [11]:
def generate_payments(num_rows, sales):
    payments_data = {
        'payment_id': range(1, num_rows + 1),
        'sale_id': random.sample(sales['sale_id'].tolist(), num_rows),
        'payment_date': [fake.date_time_between(start_date='-1y', end_date='now') for _ in range(num_rows)],
        'amount': [round(random.uniform(10, 1000), 2) for _ in range(num_rows)]
    }
    return pd.DataFrame(payments_data)


In [12]:
def generate_suppliers(num_rows):
    suppliers_data = {
        'supplier_id': range(1, num_rows + 1),
        'supplier_name': [fake.company() for _ in range(num_rows)],
        'contact_info': [fake.address() for _ in range(num_rows)]
    }
    return pd.DataFrame(suppliers_data)


In [13]:
def generate_inventory(num_rows, products):
    inventory_data = {
        'product_id': random.sample(products['product_id'].tolist(), num_rows),
        'quantity_on_hand': [random.randint(1, 1000) for _ in range(num_rows)]
    }
    return pd.DataFrame(inventory_data)


In [14]:
# Generate data for product types
product_types_df = generate_product_types(1000)

# Generate data for customers
customers_df = generate_customers(1000)

# Generate data for sales
sales_df = generate_sales(1000, product_types_df)

# Generate data for products
products_df = generate_products(1000, product_types_df)

# Generate data for orders
orders_df = generate_orders(1000, sales_df)

# Generate data for payments
payments_df = generate_payments(1000, sales_df)

# Generate data for suppliers
suppliers_df = generate_suppliers(1000)

# Generate data for inventory
inventory_df = generate_inventory(1000, products_df)


In [15]:
products_df

Unnamed: 0,product_id,product_name,product_type_id,description
0,1,item,10,Receive let group population environmental.
1,2,official,335,Candidate director remember product husband.
2,3,actually,514,Property poor exist send include guy get improve.
3,4,debate,403,Scene blue technology onto science environment...
4,5,seem,53,Find language hundred sport.
...,...,...,...,...
995,996,body,675,Group consider theory window.
996,997,president,774,But scientist image course rest business.
997,998,hit,830,Do deal throw.
998,999,again,563,Quality people figure no less theory than or.


In [16]:
payments_df

Unnamed: 0,payment_id,sale_id,payment_date,amount
0,1,10190,2023-08-24 20:50:29,171.17
1,2,10788,2023-05-21 00:40:56,325.61
2,3,10932,2023-11-24 21:35:38,26.56
3,4,10287,2023-08-27 19:16:49,232.45
4,5,10293,2024-03-18 23:01:22,318.44
...,...,...,...,...
995,996,10263,2023-06-04 06:05:14,21.34
996,997,10420,2023-10-29 19:56:01,572.98
997,998,10861,2024-04-19 00:06:05,887.42
998,999,10488,2023-05-30 02:48:13,345.49


In [17]:
product_types_df.to_csv('product_types.csv', index=False)
customers_df.to_csv('customers.csv', index=False)
sales_df.to_csv('sales.csv', index=False)
products_df.to_csv('products.csv', index=False)
orders_df.to_csv('orders.csv', index=False)
payments_df.to_csv('payments.csv', index=False)
suppliers_df.to_csv('suppliers.csv', index=False)
inventory_df.to_csv('inventory.csv', index=False)
