In [1]:
import random
from faker import Faker

# Initialize Faker instance
fake = Faker()

# Predefined lists
payment_types = ['Card', 'Internet Banking', 'UPI', 'Wallet']
countries = ['USA', 'UK', 'Germany', 'India', 'Canada']
categories = ['Electronics', 'Clothing', 'Home & Kitchen', 'Books', 'Sports']
sites = ['Amazon', 'eBay', 'Flipkart', 'Walmart', 'Shopify']
payment_status = ['Y', 'N']
failure_reasons = ['Insufficient funds', 'Payment gateway error', 'Card expired', None]


# Predefined product name list
product_name = [
    "Milk", "Bread", "Coffee", "Eggs", "Juice", "Toothpaste", "Shampoo", "Soap", "Deodorant",
    "Lotion", "Detergent", "Trash Bags", "Paper Towels", "Cleaning Spray", "Light Bulbs",
    "Smartphones", "Laptops", "Headphones", "Chargers", "Batteries", "Gasoline",
    "Car Maintenance Products (e.g., motor oil)", "Public Transport Cards", "Pens", "Notebooks",
    "Printer Paper", "Sticky Notes", "Stapler", "Hand Sanitizer", "Tissues", "Aluminum Foil",
    "Plastic Wrap", "Dish Soap", "Sponges", "Mop", "Broom", "Dustpan", "Vacuum Cleaner",
    "Dishwasher Detergent", "Laundry Softener", "Fabric Freshener", "Shower Curtain",
    "Toilet Paper", "Facial Tissue", "Laundry Detergent", "Scrub Brushes", "Squeegee", "Bleach",
    "Air Freshener", "Oven Cleaner", "Glass Cleaner", "Disinfectant Wipes", "Baking Paper",
    "Coffee Filters", "Tea Bags", "Cooking Oil", "Salt", "Sugar", "Pepper", "Spices", "Canned Goods",
    "Pasta", "Rice", "Frozen Vegetables", "Frozen Fruits", "Meat", "Cheese", "Yogurt",
    "Granola Bars", "Cereal", "Peanut Butter", "Jam", "Honey", "Nutritious Snacks",
    "Protein Powder", "Vitamin Supplements", "Pet Food", "Pet Treats", "Cat Litter", "Dog Leash",
    "Dog Bed", "Cat Toy", "Bird Seed", "Fish Food", "Grooming Supplies", "First Aid Kit",
    "Prescription Medications", "Over-the-Counter Medications", "Sunscreen", "Lip Balm",
    "Conditioner", "Hair Gel", "Hair Brush", "Hair Dryer", "Shaving Cream", "Razor",
    "Cotton Balls", "Q-tips", "Nail Clippers", "Tweezers", "Perfume", "Cologne", "Makeup Remover",
    "Moisturizer", "Eye Cream", "Face Masks", "Anti-Aging Cream", "Body Wash", "Scrub",
    "Sponge", "Loofah", "Hand Cream", "Foot Cream", "Laundry Basket", "Iron", "Ironing Board",
    "Clothes Hangers", "Towel Rack", "Dish Rack", "Utensil Holder", "Cutting Board",
    "Knife Set", "Cookware", "Bakeware", "Measuring Cups", "Measuring Spoons", "Can Opener",
    "Peeler", "Grater", "Mixing Bowls", "Colander", "Kitchen Timer", "Food Processor",
    "Blender", "Microwave", "Toaster", "Coffee Maker", "Electric Kettle", "Rice Cooker",
    "Slow Cooker", "Pressure Cooker", "Air Fryer", "Dishwasher", "Refrigerator", "Oven",
    "Stove", "Washing Machine", "Dryer", "Clothes Dryer", "Heater", "Fan", "Air Conditioner",
    "Humidifier", "Dehumidifier", "Electric Blanket", "Bed Sheets", "Pillows", "Blankets",
    "Mattress Protector", "Towel Set", "Shower Mat", "Bathrobe", "Slippers", "Laundry Detergent Pods",
    "Fabric Softener Sheets", "Dish Soap Pods", "Cleaning Cloths", "Microfiber Cloths",
    "Cleaning Gloves", "Bucket", "Plunger", "Toilet Brush", "Shower Head", "Toothbrush Holder",
    "Soap Dispenser", "Shower Caddy", "Towel Hooks", "Bath Mat", "Kitchen Sink Mat",
    "Window Cleaner", "Floor Cleaner", "Furniture Polish", "Metal Polish", "Wood Cleaner",
    "Pots and Pans", "Cookware Set", "Bakeware Set", "Knife Sharpener", "Corkscrew", "Bottle Opener",
    "Ice Tray", "Thermos", "Lunch Box", "Reusable Water Bottle", "Coffee Mug", "Travel Mug",
    "Glassware", "Plates", "Bowls", "Cutlery Set", "Serving Utensils", "Tablecloth", "Napkins",
    "Coasters", "Trivet", "Oven Mitts", "Pot Holders", "Dish Towels", "Kitchen Scale", "Thermometer"
]

# Function to generate a single record
def generate_record():
    payment_success = random.choice(payment_status)
    return {
        'Order Id': fake.uuid4(),
        'Customer Id': fake.uuid4(),
        'Customer Name': fake.name(),
        'Product Id': fake.uuid4(),
        'Product Name': random.choice(product_name),
        'Product Category': random.choice(categories),
        'Payment Type': random.choice(payment_types),
        'Quantity ordered': random.randint(1, 5),
        'Price': round(random.uniform(10, 1000), 2),
        'Date and time when order was placed': fake.date_time_this_decade(),
        'Customer Country': random.choice(countries),
        'Customer City': fake.city(),
        'Site from where order was placed': random.choice(sites),
        'Payment Transaction Confirmation Id': fake.uuid4(),
        'Payment Success or Failure': payment_success,
        'Reason for payment failure': random.choice(failure_reasons) if payment_success == 'N' else None
}


'''
This Python script generates realistic e-commerce transaction records using the `Faker` library. It initializes predefined lists for payment types, countries, product categories, and other attributes. The script also includes a comprehensive list of product names. 
The `generate_record` function creates a single record with fields such as `Order Id`, `Customer Name`, `Product Name`, and `Price`. It uses `Faker` to generate unique identifiers, names, and cities, while random choices from predefined lists determine product details and payment types. The quantity and price are randomly generated within specified ranges, and the datetime reflects orders placed in the current decade. For each record, the payment success status and, if applicable, a reason for payment failure are included. 
This setup is ideal for creating a large, diverse dataset for analysis, including error handling and realistic variations.

'''

In [2]:
import csv

# Generate and save 10000 records to a CSV file
with open('transaction_data.csv', mode='w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=generate_record().keys())
    writer.writeheader()
    for _ in range(10000):
        writer.writerow(generate_record())


'''
This script generates and saves 10,000 e-commerce transaction records to a CSV file named `transaction_data.csv`. It uses the `csv` module to create a `DictWriter` with fieldnames from `generate_record()`. It writes headers and iterates 10,000 times to output each record.
'''
