<h2 style=' color: crimson;font-family: Colonna MT; font-weight: 600; font-size: 35px; text-align: Center'>Customer Dataset Generations </h2>

---

<h2 style='font-family: Colonna MT; font-weight: 600; font-size: 20px; text-align: left'>1.0. Import Required Libraries and Modules</h2>

In [None]:
from typing import List, Dict, Optional
from faker import Faker
import pandas as pd
import random

fake = Faker()

<h2 style='font-family: Colonna MT; font-weight: 600; font-size: 20px; text-align: left'>1.0. Class Construction </h2>

In [35]:
class DatasetGenerator:
    """
    A class to generate e-commerce dataset with customer and order information.
    """
    
    def __init__(self):
        self.products = [
            'Web Development', 'Maintenance Plan', 'SSL Certificate',
            'Brand Strategy Consultation', 'Logo Design', 
            'IT Infrastructure Setup', 'On-Site Support', 'Training Session'
        ]
        
        self.unit_prices = {
            'Web Development': 1200, 
            'Maintenance Plan': 300,
            'SSL Certificate': 75, 
            'Brand Strategy Consultation': 950,
            'Logo Design': 600, 
            'IT Infrastructure Setup': 2000,
            'On-Site Support': 500, 
            'Training Session': 250  
        }
        
        self.default_tax_rate = 0.1  # 10% tax
        
    def _generate_customer_info(self) -> Dict:
        """Generate basic customer information."""
        name = fake.name()
        return {
            'name': name,
            'email': f"{name.replace(' ', '').lower()}@gmail.com",
            'phone': fake.phone_number(),
            'address': fake.address().replace('\n', ', ')
        }
    
    def _generate_order_items(self, customer_name: str, num_items: int) -> List[Dict]:
        """Generate order items for a customer."""
        items = []
        cart_id = f"INV-{random.randint(100000, 999999)}"
        
        for _ in range(num_items):
            product = random.choice(self.products)
            quantity = random.randint(1, 3)
            unit_price = self.unit_prices[product]
            discount = round(random.uniform(0, 15), 2)  # Discount between 0% and 15%
            
            items.append({
                'cart_id': cart_id,
                'product_description': product,
                'quantity': quantity,
                'unit_price': unit_price,
                'tax_rate': self.default_tax_rate,
                'discount': discount
            })
        
        return items
    
    def generate_dataset(self, number_of_customers: int = 10, min_items: int = 1, max_items: int = 5) -> pd.DataFrame:
        """
        Generate a complete dataset with customer and order information.
        
        Args:
            number_of_customers: Number of customers to generate
            min_items: Minimum number of items per order
            max_items: Maximum number of items per order
            
        Returns:
            pandas.DataFrame: Generated dataset
        """
        data = []
        
        for _ in range(number_of_customers):
            customer = self._generate_customer_info()
            order_items = self._generate_order_items(
                customer['name'],
                random.randint(min_items, max_items)
            )
            
            for item in order_items:
                record = {
                    'customer_name': customer['name'],
                    'customer_email': customer['email'],
                    'customer_phone': customer['phone'],
                    'customer_address': customer['address'],
                    **item
                }
                data.append(record)
        
        return pd.DataFrame(data)
    
    def save_to_csv(self, df: pd.DataFrame, filename: str = "orders.csv") -> None:
        """Save the generated dataset to a CSV file."""
        df.to_csv(filename, index=False)
        print(f"Dataset saved to {filename}")

<h2 style='font-family: Colonna MT; font-weight: 600; font-size: 20px; text-align: left'>1.0. Datasets Generation and Saving </h2>

In [36]:
def main():
    # Initialize the generator
    generator = DatasetGenerator()
    
    # Generate dataset
    df = generator.generate_dataset(number_of_customers=10, min_items=1, max_items=5)
    
    # Display the dataset
    #print("Generated Dataset:")
    display(df)
    
    # Save to CSV
    generator.save_to_csv(df, "orders.csv")

if __name__ == "__main__":
    main()

Unnamed: 0,customer_name,customer_email,customer_phone,customer_address,cart_id,product_description,quantity,unit_price,tax_rate,discount
0,Alex Rivas,alexrivas@gmail.com,001-536-950-0023x933,"2781 Powers Ridge, East Elizabeth, ND 77262",INV-414607,Logo Design,2,600,0.1,10.14
1,Cheyenne Mcclain,cheyennemcclain@gmail.com,(870)701-4141,"2252 Walls Lodge, Danielville, ME 08856",INV-349412,Maintenance Plan,3,300,0.1,9.75
2,Cheyenne Mcclain,cheyennemcclain@gmail.com,(870)701-4141,"2252 Walls Lodge, Danielville, ME 08856",INV-349412,Brand Strategy Consultation,3,950,0.1,2.99
3,Cheyenne Mcclain,cheyennemcclain@gmail.com,(870)701-4141,"2252 Walls Lodge, Danielville, ME 08856",INV-349412,Training Session,3,250,0.1,2.05
4,Steven Alexander,stevenalexander@gmail.com,(771)370-9337,"260 Juan Shores Apt. 329, North Michaelburgh, ...",INV-418746,On-Site Support,3,500,0.1,5.22
5,Steven Alexander,stevenalexander@gmail.com,(771)370-9337,"260 Juan Shores Apt. 329, North Michaelburgh, ...",INV-418746,On-Site Support,2,500,0.1,9.6
6,Gina Davis,ginadavis@gmail.com,+1-341-447-3476x67726,"74008 Pamela Street Apt. 573, Skinnerberg, WI ...",INV-376222,Training Session,3,250,0.1,4.91
7,Gina Davis,ginadavis@gmail.com,+1-341-447-3476x67726,"74008 Pamela Street Apt. 573, Skinnerberg, WI ...",INV-376222,Maintenance Plan,2,300,0.1,14.62
8,Scott Thomas,scottthomas@gmail.com,866-791-2219x05914,"349 Garza Islands Apt. 830, Danielport, CT 49857",INV-780647,Brand Strategy Consultation,2,950,0.1,7.02
9,Scott Thomas,scottthomas@gmail.com,866-791-2219x05914,"349 Garza Islands Apt. 830, Danielport, CT 49857",INV-780647,Training Session,2,250,0.1,0.0


Dataset saved to orders.csv
