In [1]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# Function to generate random dates within a range
def random_dates(start_date, end_date, n=10):
    date_range = end_date - start_date
    random_dates = [start_date + timedelta(days=random.randint(0, date_range.days)) for _ in range(n)]
    return random_dates

# Function to generate synthetic sales data
def generate_sales_data(num_records):
    start_date = datetime(2022, 1, 1)
    end_date = datetime(2023, 12, 31)

    dates = random_dates(start_date, end_date, num_records)
    store_keys = random.choices(range(1, 8), k=num_records)
    product_keys = random.choices(['P{:03d}'.format(i) for i in range(1, 51)], k=num_records)
    departments = random.choices(['Televisions', 'Smartphones', 'Gaming Consoles', 'Audio Devices', 'Computing Devices', 'Imaging Devices', 'Drones', 'Wearable Tech', 'E-readers', 'Action Cameras', 'Smart Home', 'VR Devices'], k=num_records)
    descriptions = ['Product {}'.format(i) for i in range(1, 51)]

    transaction_numbers = ['TN{:04d}'.format(i) for i in range(1, num_records + 1)]
    sales_amounts = [round(random.uniform(100, 5000), 2) for _ in range(num_records)]
    units_sold = [random.randint(1, 10) for _ in range(num_records)]

    sales_data = pd.DataFrame({
        'Date Key': [date.strftime('%Y%m%d') for date in dates],
        'Store Key': store_keys,
        'Product Key': product_keys,
        'Product Department': departments,
        'Product Description': random.choices(descriptions, k=num_records),
        'Transaction No.#': transaction_numbers,
        'Sales Amount(CAD)': sales_amounts,
        'Units Sold': units_sold
    })

    return sales_data

# Generate 500 sales records
sales_data = generate_sales_data(500)

# Save to CSV
sales_data.to_csv('synthetic_sales_data.csv', index=False)
