<a href="https://colab.research.google.com/github/Syamabbas/Burger_King/blob/main/Make_dummy_dataset_burger_king_sales.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np
import random

# ==========================================================
# 1. CONFIGURATION
# ==========================================================
N_DAYS = 365
output_path = "burger_king_realistic_multi_item.xlsx"
np.random.seed(42)

# ==========================================================
# 2. MENU DATA + CATEGORY
# ==========================================================
menu_data = [
    ("whopper", "Whopper Burger", "Beef Patty, Lettuce, Tomato, Onion, Pickles, Mayo", "Beef"),
    ("bk_chicken", "BK Chicken Burger", "Crispy Chicken, Lettuce, Mayo", "Chicken"),
    ("cheeseburger", "Cheeseburger", "Beef Patty, Cheese, Pickles, Ketchup, Mustard", "Beef"),
    ("double_steakhouse", "Double Steakhouse Burger", "Double Beef Patty, Crispy Onion, BBQ Sauce", "Premium"),
    ("plant_based_whopper", "Plant Based Whopper", "Plant Patty, Lettuce, Tomato, Vegan Mayo", "Veggie"),
]
df_menu = pd.DataFrame(menu_data, columns=["code", "burger_name", "burger_ingredients", "true_category"])

# Menu weights (proporsi pembelian)
menu_weights = [0.35, 0.20, 0.25, 0.10, 0.10]

# Size distribution
sizes = ["S", "M", "L"]
size_weights = [0.25, 0.50, 0.25]

# Base price per menu
base_price = {
    "whopper": 35000,
    "bk_chicken": 30000,
    "cheeseburger": 28000,
    "double_steakhouse": 48000,
    "plant_based_whopper": 42000
}

# Size price multiplier
size_multiplier = {"S": 1.0, "M": 1.2, "L": 1.4}

# ==========================================================
# 3. GENERATE DATE RANGE
# ==========================================================
start_date = pd.to_datetime("2025-01-01")
dates = [start_date + pd.Timedelta(days=i) for i in range(N_DAYS)]

# ==========================================================
# 4. SIMULATE DAILY ORDERS
# ==========================================================
all_rows = []
burger_id_counter = 1  # global counter for all burger items

for date in dates:

    # Weekend lebih ramai
    if date.weekday() in [5, 6]:  # Saturday/Sunday
        num_orders = np.random.randint(180, 260)
    else:
        num_orders = np.random.randint(120, 180)

    # Untuk hari ini, order_id dimulai dari 1
    for order_id in range(1, num_orders + 1):

        # Banyak item di dalam satu order
        num_items = np.random.choice([1, 2, 3, 4], p=[0.55, 0.30, 0.10, 0.05])

        # Jam transaksi realistis
        def realistic_time():
            r = random.random()
            if r < 0.50:
                hour = np.random.randint(11, 14)  # lunch
            elif r < 0.80:
                hour = np.random.randint(17, 20)  # dinner
            else:
                hour = np.random.randint(9, 22)
            return f"{hour:02d}:{np.random.randint(0,60):02d}:{np.random.randint(0,60):02d}"

        order_time = realistic_time()

        # Generate setiap item burger dalam order
        sampled_menu = df_menu.sample(n=num_items, replace=True, weights=menu_weights).reset_index(drop=True)

        for _, row in sampled_menu.iterrows():

            burger_size = np.random.choice(sizes, p=size_weights)

            # price calc
            unit_price = int(base_price[row["code"]] * size_multiplier[burger_size])
            discount = np.random.choice([0, 2000, 3000, 5000], p=[0.90, 0.04, 0.04, 0.02])
            final_unit_price = unit_price - discount

            quantity = np.random.choice([1, 2, 3], p=[0.70, 0.25, 0.05])

            all_rows.append({
                "burger_id": burger_id_counter,
                "order_id": order_id,
                "order_date": date,
                "order_time": order_time,

                "code": row["code"],
                "burger_name": row["burger_name"],
                "burger_ingredients": row["burger_ingredients"],
                "burger_category": row["true_category"],
                "burger_size": burger_size,
                "burger_name_id": row["code"] + "_" + burger_size.lower(),

                "quantity": quantity,
                "unit_price": unit_price,
                "discount": discount,
                "final_unit_price": final_unit_price,
                "total_price": final_unit_price * quantity
            })

            burger_id_counter += 1

# ==========================================================
# 5. FINALIZE DATAFRAME
# ==========================================================
df = pd.DataFrame(all_rows)

cols = [
    "burger_id", "order_id", "burger_name_id",
    "quantity", "order_date", "order_time",
    "unit_price", "discount", "final_unit_price",
    "total_price",
    "burger_size", "burger_category",
    "burger_ingredients", "burger_name"
]

df = df[cols]

# ==========================================================
# 6. EXPORT
# ==========================================================
df.to_excel(output_path, index=False)
print(f"Data selesai dibuat! Total rows: {len(df)}")
print("File tersimpan di:", output_path)
