In [46]:
import pandas as pd
import numpy as np
import random
from collections import defaultdict
from datetime import timedelta

# Constants
DAILY_CUSTOMER_MIN, DAILY_CUSTOMER_MAX = 1070, 1100
WEEKEND_EXTRA_CUSTOMERS = 50
PRICE_MULTIPLIER, CASE_SIZE = 1.15, 12
INITIAL_DAYS_SUPPLY, INITIAL_MILK_SUPPLY = 3, 1.5

# Load product data
product_file = "/Users/nikitabrahmbhatt/Desktop/study/DW/DWHW-2/Products1.txt"
df_products = pd.read_csv(product_file, delimiter="|", encoding="ISO-8859-1")
df_products["BasePrice"] = df_products["BasePrice"].str.replace("$", "", regex=False).astype(float)
product_lookup = df_products.set_index("SKU")["BasePrice"].to_dict()
skus = df_products["SKU"].values

# Initialize inventory
inventory = {sku: max(INITIAL_DAYS_SUPPLY * 40, CASE_SIZE) for sku in skus}
inventory["Milk"] = max(INITIAL_MILK_SUPPLY * 40, CASE_SIZE)
total_cases_ordered = defaultdict(int)

# Generate sales data
sales_data, start_date = [], pd.Timestamp("2024-01-01")
dates = [start_date + timedelta(days=i) for i in range(365)]

for date in dates:
    daily_customers = random.randint(DAILY_CUSTOMER_MIN, DAILY_CUSTOMER_MAX) + (WEEKEND_EXTRA_CUSTOMERS if date.weekday() >= 5 else 0)

    # Restock process - Check inventory levels and restock if necessary
    for sku in inventory:
        # Milk has different restocking logic
        if sku == "Milk":
            threshold = INITIAL_MILK_SUPPLY * 40  # 1.5 days of supply for Milk
        else:
            threshold = INITIAL_DAYS_SUPPLY * 40  # 3 days of supply for other products
        
        if inventory[sku] < threshold:
            # Calculate how many cases to order
            cases_to_order = ((threshold - inventory[sku]) // CASE_SIZE) + 1
            inventory[sku] += cases_to_order * CASE_SIZE
            total_cases_ordered[sku] += cases_to_order

    # Simulate sales
    for customer_id in range(daily_customers):
        purchased_items = np.random.choice(skus, size=random.randint(1, 5), replace=True)
        for sku in purchased_items:
            if inventory[sku] > 0:
                # Item is available, so reduce the inventory by 1
                inventory[sku] -= 1
                sales_data.append([date.strftime("%Y%m%d"), customer_id, sku, round(product_lookup[sku] * PRICE_MULTIPLIER, 2), inventory[sku], total_cases_ordered[sku]])
            else:
                # If item is out of stock, substitute with another available item
                substitutes = [s for s in skus if inventory[s] > 0]
                if substitutes:
                    new_sku = random.choice(substitutes)
                    inventory[new_sku] -= 1
                    sales_data.append([date.strftime("%Y%m%d"), customer_id, new_sku, round(product_lookup[new_sku] * PRICE_MULTIPLIER, 2), inventory[new_sku], total_cases_ordered[new_sku]])

# Save results to CSV
df_sales = pd.DataFrame(sales_data, columns=["Date", "Customer#", "SKU", "Price", "ItemsLeft", "TotalCasesOrdered"])
df_sales.to_csv("updated_grocery_data.csv", index=False)
print("✅ Updated grocery data has been successfully saved as 'updated_grocery_data.csv'")


✅ Updated grocery data has been successfully saved as 'updated_grocery_data.csv'
