In [3]:
import csv
import random
from datetime import datetime, timedelta
import os


# List of random product names
product_names = [
    "Laptop", "Smartphone", "Tablet", "Headphones", "TV", "Camera",
    "Smartwatch", "Gaming Console", "Wireless Earbuds", "Bluetooth Speaker",
    "Fitness Tracker", "External Hard Drive", "Printer", "Monitor",
    "Mouse", "Keyboard", "Router", "Power Bank", "Projector",
    "Wireless Charger", "Gaming Mouse", "Gaming Keyboard", "VR Headset",
    "Drone", "Smart Home Hub", "Action Camera", "E-book Reader",
    "Smart Bulb", "Robot Vacuum", "Hair Dryer", "Coffee Maker",
    "Blender", "Microwave", "Air Purifier", "Toaster", "Electric Kettle",
    "Steam Iron", "Food Processor", "Juicer", "Slow Cooker", "Cordless Phone",
    "Digital Thermometer", "Car Charger", "Wireless Mouse", "Fitness Scale",
    "Curling Iron", "GPS Navigator", "Handheld Vacuum", "Humidifier",
    "Robot Lawn Mower", "Security Camera", "Baby Monitor", "Rice Cooker",
    "Outdoor Speaker", "Power Strip", "Wireless Earphones", "Gaming Chair",
    "Portable Fan", "Sleep Tracker", "Smart Lock", "Smart Thermostat",
    "Smart Scale", "Bluetooth Earphones", "Electric Toothbrush", "Smart Mirror",
    "Portable Projector", "Dash Cam", "Water Filter", "Air Fryer",
    "Multifunction Printer", "Wearable Camera", "Shaver", "Noise-Canceling Earphones",
    "Cordless Vacuum", "Smart Refrigerator", "Voice Assistant Speaker",
    "Smart Doorbell", "Induction Cooker", "Digital Photo Frame", "Action Camera",
    "Massage Gun", "Smart Pet Feeder", "Karaoke Microphone", "Mini Fridge",
    "Wireless Webcam", "Digital Notepad", "Hand Warmer", "Gaming Monitor",
    "Wireless Keyboard", "Electric Scooter", "Pedometer", "Fitness Band",
    "USB Hub", "External SSD", "Wireless Controller", "Virtual Keyboard",
    "Fitness Roller", "Sleep Mask", "Smart Umbrella", "Smart Glasses"
]

# List of random country names
country_names = [
    "United States", "Canada", "United Kingdom", "Australia", "Germany",
    "France", "Italy", "Japan", "South Korea", "China", "India", "Brazil",
    "Mexico", "Russia", "South Africa", "Nigeria", "Egypt", "Saudi Arabia",
    "Turkey", "Iran", "Vietnam", "Thailand", "Indonesia", "Argentina",
    "Chile", "Peru", "Colombia", "Spain", "Portugal", "Netherlands",
    "Switzerland", "Sweden", "Norway", "Finland", "Denmark", "Poland",
    "Greece", "Ireland", "New Zealand", "Singapore", "Malaysia", "Philippines",
    "Pakistan", "Bangladesh", "United Arab Emirates", "Israel", "Kenya",
    "Ghana", "Morocco", "Algeria", "Tunisia", "Ukraine", "Belgium",
    "Austria", "Czech Republic", "Hungary", "Romania", "Bulgaria", "Croatia",
    "Slovakia", "Slovenia", "Estonia", "Latvia", "Lithuania", "Cyprus",
    "Malta", "Iceland", "Greenland", "Fiji", "Samoa", "Solomon Islands",
    "Vanuatu", "Papua New Guinea", "Namibia", "Botswana", "Zimbabwe",
    "Zambia", "Mozambique", "Angola", "Tanzania", "Uganda", "Rwanda",
    "Burundi", "Congo", "Niger", "Mali", "Senegal", "Cameroon", "Gabon",
    "Central African Republic", "Chad", "Sudan", "Somalia", "Ethiopia",
    "Eritrea", "Djibouti", "Yemen", "Oman", "Kuwait", "Iraq"
]

# Generate random dates within a range
def random_date(start, end):
    return start + timedelta(seconds=random.randint(0, int((end - start).total_seconds())))

# Generate random customer names
def generate_customer_name():
    first_names = ["John", "Jane", "Michael", "Emily", "David", "Olivia"]
    last_names = ["Smith", "Johnson", "Williams", "Brown", "Jones"]
    return f"{random.choice(first_names)} {random.choice(last_names)}"

# Generate random shop names
def generate_shop_name():
    prefixes = ["Tech", "Gadget", "Electro", "E-Tech", "Digital"]
    suffixes = ["Store", "Shop", "Outlets"]
    return f"{random.choice(prefixes)} {random.choice(suffixes)}"

# Generate a dataset and export it to CSV
def generate_and_export_csv(output_path, num_rows):
    fieldnames = [
        "SaleID", "SaleDate", "ShippingDate", "DeliveredDate",
        "ProductName", "ProductID", "ItemPrice", "SoldQuantity",
        "CustomerName", "CustomerID", "CustomerCountry",
        "CustomerCountryID", "ShopName", "ShopID",
        "ShopLocationCountry", "ShopLocationCountryID"
    ]

    with open(output_path, "w", newline="", encoding="utf-8") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for _ in range(num_rows):
            sale_date = random_date(datetime(2023, 1, 1), datetime(2026, 12, 31))
            shipping_date = sale_date + timedelta(days=random.randint(1, 7))
            delivered_date = shipping_date + timedelta(days=random.randint(1, 7))

            product_name = random.choice(product_names)
            customer_country = random.choice(country_names)
            shop_location_country = random.choice(country_names)

            writer.writerow({
                "SaleID": random.randint(1, 1000000),
                "SaleDate": sale_date.strftime("%d/%m/%Y"),
                "ShippingDate": shipping_date.strftime("%d/%m/%Y"),
                "DeliveredDate": delivered_date.strftime("%d/%m/%Y"),
                "ProductName": product_name,
                "ProductID": random.randint(1, 1000),
                "ItemPrice": f"${random.uniform(50, 1000):.2f}",
                "SoldQuantity": random.randint(1, 10),
                "CustomerName": generate_customer_name(),
                "CustomerID": random.randint(1, 100000),
                "CustomerCountry": customer_country,
                "CustomerCountryID": country_names.index(customer_country) + 1,
                "ShopName": generate_shop_name(),
                "ShopID": random.randint(1, 100),
                "ShopLocationCountry": shop_location_country,
                "ShopLocationCountryID": country_names.index(shop_location_country) + 1
            })

# Output and number of rows
output_directory = r"C:\Users\qmart\Documents"
output_filename = "sales_data.csv"
total_rows = 1000000

# Create the output directory
os.makedirs(output_directory, exist_ok=True)

# Generate and export the CSV file
output_path = os.path.join(output_directory, output_filename)
generate_and_export_csv(output_path, total_rows)
print(f"CSV export complete. File saved at: {output_path}")

CSV export complete. File saved at: C:\Users\qmart\Documents\sales_data.csv
