In [21]:
import random
import pandas as pd
from datetime import datetime, timedelta
import numpy as np

In [22]:
# Product lists, types, and categories
products = [
    "Chocolate Cake", "Carrot Cake", "Cheese Bread", "Whole Wheat Bread",
    "Vanilla Ice Cream", "Chocolate Ice Cream", "Pretzels", "Cookies",
    "Lemon Pie", "Apple Pie", "Cornbread", "Baguette",
    "Strawberry Ice Cream", "Oatmeal Cookies", "Chocolate Pie", "Banana Bread",
    "Garlic Bread", "Coconut Ice Cream", "Shortbread Cookies", "Pecan Pie",
    "Red Velvet Cake", "Rye Bread", "Pistachio Ice Cream", "Cheese Crackers",
    "Passion Fruit Pie", "Passion Fruit Cake", "Potato Bread", "Mint Chocolate Chip Ice Cream",
    "Cheese Puffs", "Strawberry Pie"
]
types = ["Sweet", "Savory"]
categories = [
    "Cakes", "Breads", "Ice Creams", "Cookies", "Pies"
]

# Mapping products to types and categories
product_info = {
    "Chocolate Cake": ("Sweet", "Cakes"),
    "Carrot Cake": ("Sweet", "Cakes"),
    "Cheese Bread": ("Savory", "Breads"),
    "Whole Wheat Bread": ("Savory", "Breads"),
    "Vanilla Ice Cream": ("Sweet", "Ice Creams"),
    "Chocolate Ice Cream": ("Sweet", "Ice Creams"),
    "Pretzels": ("Savory", "Cookies"),
    "Cookies": ("Sweet", "Cookies"),
    "Lemon Pie": ("Sweet", "Pies"),
    "Apple Pie": ("Sweet", "Pies"),
    "Cornbread": ("Savory", "Breads"),
    "Baguette": ("Savory", "Breads"),
    "Strawberry Ice Cream": ("Sweet", "Ice Creams"),
    "Oatmeal Cookies": ("Sweet", "Cookies"),
    "Chocolate Pie": ("Sweet", "Pies"),
    "Banana Bread": ("Sweet", "Breads"),
    "Garlic Bread": ("Savory", "Breads"),
    "Coconut Ice Cream": ("Sweet", "Ice Creams"),
    "Shortbread Cookies": ("Sweet", "Cookies"),
    "Pecan Pie": ("Sweet", "Pies"),
    "Red Velvet Cake": ("Sweet", "Cakes"),
    "Rye Bread": ("Savory", "Breads"),
    "Pistachio Ice Cream": ("Sweet", "Ice Creams"),
    "Cheese Crackers": ("Savory", "Cookies"),
    "Passion Fruit Pie": ("Sweet", "Pies"),
    "Passion Fruit Cake": ("Sweet", "Cakes"),
    "Potato Bread": ("Savory", "Breads"),
    "Mint Chocolate Chip Ice Cream": ("Sweet", "Ice Creams"),
    "Cheese Puffs": ("Savory", "Cookies"),
    "Strawberry Pie": ("Sweet", "Pies")
}

# Function to generate a sales price
def sales_price_function(categories):
    if categories == "Breads":
        return round(random.uniform(2.0, 10.0), 2)
    elif categories == "Cakes":
        return round(random.uniform(10.0, 50.0), 2)
    elif categories == "Ice Creams":
        return round(random.uniform(5.0, 20.0), 2)
    elif categories == "Cookies":
        return round(random.uniform(1.0, 5.0), 2)
    elif categories == "Pies":
        return round(random.uniform(15.0, 40.0), 2)

# Function to generate a cost price (lower than the sales price)
def cost_price_function(sales_price):
    return round(sales_price * random.uniform(0.45, 0.7), 2)

# Dictionary to store product data
product_data = {}

# Generate data for the 30 products
for product in products:
    types, categories = product_info[product]
    sales_price = sales_price_function(categories)
    cost_price = cost_price_function(sales_price)
    product_data[product] = {
        "Type": types,
        "Category": categories,
        "Sales": sales_price,
        "Cost": cost_price
    }

In [23]:
# Function to generate random dates within a range
def generate_random_date(start, end):
    delta = end - start
    int_delta = delta.days
    random_day = random.randrange(int_delta)
    return start + timedelta(days=random_day)

# Date range for sales
start_date = datetime(2022, 1, 1)
end_date = datetime(2023, 12, 31)

# List to store sales data
sales = []
weights = [random.uniform(0.5, 2.0) for _ in range(len(product_data))]
# Generate sales data for X records
for _ in range(36000):
    product = random.choices(list(product_data.keys()), weights=weights)[0]
    sale_date = generate_random_date(start_date, end_date)
    sale_price = product_data[product]["Sales"]
    cost_price = product_data[product]["Cost"]
    type = product_data[product]["Type"]
    category = product_data[product]["Category"]

    sales.append([sale_date, product, type, category, cost_price, sale_price])

# Create DataFrame
df_sales = pd.DataFrame(sales, columns=["Sale Date", "Product", "Type", "Category", "Cost Price", "Sales Price"])

# Save the DataFrame to a CSV file
df_sales.to_csv("sales_products.csv", index_label='ID')

In [24]:
df_sales

Unnamed: 0,Sale Date,Product,Type,Category,Cost Price,Sales Price
0,2022-05-28,Garlic Bread,Savory,Breads,5.31,9.22
1,2022-08-19,Cornbread,Savory,Breads,1.41,2.94
2,2022-04-13,Apple Pie,Sweet,Pies,9.73,20.38
3,2022-12-13,Cheese Puffs,Savory,Cookies,2.49,4.71
4,2023-08-03,Cookies,Sweet,Cookies,1.34,2.50
...,...,...,...,...,...,...
35995,2022-01-07,Apple Pie,Sweet,Pies,9.73,20.38
35996,2023-07-05,Carrot Cake,Sweet,Cakes,7.37,14.03
35997,2023-09-07,Red Velvet Cake,Sweet,Cakes,30.92,45.57
35998,2023-03-01,Red Velvet Cake,Sweet,Cakes,30.92,45.57
