In [3]:
from typing import List, Dict
from faker import Faker
import pandas as pd
import random

fake = Faker()
PRODUCTS = [
    'Web Development', 'Maintenance Plan', 'SSL Certificate',
    'Brand Strategy Consultation', 'Logo Design', 
    'IT Infrastructure Setup', 'On-Site Support', 'Training Session'
]

UNIT_PRICES = {
    'Web Development': 1200, 
    'Maintenance Plan': 300,
    'SSL Certificate': 75, 
    'Brand Strategy Consultation': 950,
    'Logo Design': 600, 
    'IT Infrastructure Setup': 2000,
    'On-Site Support': 500, 
    'Training Session': 250  
}

DEFAULT_TAX_RATE = 0.1  # 10% tax

def generate_customer_info() -> Dict:
    name = fake.name()
    return {
        'name': name,
        'email': f"{name.replace(' ', '').lower()}@gmail.com",
        'phone': fake.phone_number(),
        'address': fake.address().replace('\n', ', ')
    }

def generate_order_items(customer_name: str, num_items: int) -> List[Dict]:
    items = []
    cart_id = f"INV-{random.randint(100000, 999999)}"
    
    for _ in range(num_items):
        product = random.choice(PRODUCTS)
        quantity = random.randint(1, 3)
        unit_price = UNIT_PRICES[product]
        discount = round(random.uniform(0, 15), 2)  # Discount between 0% and 15%
        
        items.append({
            'cart_id': cart_id,
            'product_description': product,
            'quantity': quantity,
            'unit_price': unit_price,
            'tax_rate': DEFAULT_TAX_RATE,
            'discount': discount
        })
    
    return items

def generate_dataset(number_of_customers: int = 10, min_items: int = 1, max_items: int = 5) -> pd.DataFrame:
    data = []
    
    for _ in range(number_of_customers):
        customer = generate_customer_info()
        order_items = generate_order_items(
            customer['name'],
            random.randint(min_items, max_items)
        )
        
        for item in order_items:
            record = {
                'customer_name': customer['name'],
                'customer_email': customer['email'],
                'customer_phone': customer['phone'],
                'customer_address': customer['address'],
                **item
            }
            data.append(record)
    
    return pd.DataFrame(data)

if __name__ == "__main__":
    df = generate_dataset(number_of_customers=10, min_items=1, max_items=5)
    df.to_csv('orders.csv', index=False)
    display(df)

Unnamed: 0,customer_name,customer_email,customer_phone,customer_address,cart_id,product_description,quantity,unit_price,tax_rate,discount
0,Natasha Bennett,natashabennett@gmail.com,(839)677-7190x683,"153 Castaneda Village, Johnstad, MS 46008",INV-673880,On-Site Support,3,500,0.1,12.67
1,Natasha Bennett,natashabennett@gmail.com,(839)677-7190x683,"153 Castaneda Village, Johnstad, MS 46008",INV-673880,Web Development,2,1200,0.1,11.4
2,Natasha Bennett,natashabennett@gmail.com,(839)677-7190x683,"153 Castaneda Village, Johnstad, MS 46008",INV-673880,Maintenance Plan,2,300,0.1,7.24
3,Natasha Bennett,natashabennett@gmail.com,(839)677-7190x683,"153 Castaneda Village, Johnstad, MS 46008",INV-673880,On-Site Support,2,500,0.1,12.42
4,Ryan Ford,ryanford@gmail.com,847-419-1183x39355,"306 Atkins Rapids, North Vincentbury, OH 82065",INV-360900,On-Site Support,3,500,0.1,11.04
5,Ryan Ford,ryanford@gmail.com,847-419-1183x39355,"306 Atkins Rapids, North Vincentbury, OH 82065",INV-360900,Maintenance Plan,3,300,0.1,11.76
6,Ryan Ford,ryanford@gmail.com,847-419-1183x39355,"306 Atkins Rapids, North Vincentbury, OH 82065",INV-360900,On-Site Support,2,500,0.1,10.66
7,Ryan Ford,ryanford@gmail.com,847-419-1183x39355,"306 Atkins Rapids, North Vincentbury, OH 82065",INV-360900,Maintenance Plan,2,300,0.1,5.1
8,Jesse Smith,jessesmith@gmail.com,001-895-429-8143x47912,"5181 Ronald Vista, South Williamville, NH 27596",INV-842314,IT Infrastructure Setup,3,2000,0.1,7.58
9,Jesse Smith,jessesmith@gmail.com,001-895-429-8143x47912,"5181 Ronald Vista, South Williamville, NH 27596",INV-842314,Brand Strategy Consultation,3,950,0.1,6.48
