# first we have to divide the data for our use case per day for each revenue center

In [4]:
import pandas as pd

file_path = "../data/raw/Hotel_Revenue_Data.xlsx"

# Use read_excel instead of read_csv
df = pd.read_excel(file_path, parse_dates=["Date"])

# Quick sanity-check
print(df.shape)
df.head()


(120562, 4)


Unnamed: 0,Date,MealPeriod,CheckTotal,RevenueCenterName
0,2023-04-18,Dinner,125.0,RevenueCenter_1
1,2023-04-18,Dinner,37.5,RevenueCenter_1
2,2023-04-18,Dinner,0.0,RevenueCenter_1
3,2023-04-18,Dinner,0.0,RevenueCenter_1
4,2023-04-18,Dinner,70.0,RevenueCenter_2


In [9]:
import pandas as pd
import itertools

# Step 1: Get all unique values
all_dates = df["Date"].unique()
all_meals = df["MealPeriod"].unique()
all_centers = df["RevenueCenterName"].unique()

# Step 2: Create full cartesian product of all combinations
all_combinations = pd.DataFrame(
    list(itertools.product(all_dates, all_meals, all_centers)),
    columns=["Date", "MealPeriod", "RevenueCenterName"]
)

# Step 3: Group actual data
actual_totals = (
    df.groupby(["Date", "MealPeriod", "RevenueCenterName"])["CheckTotal"]
    .sum()
    .reset_index()
    .rename(columns={"CheckTotal": "TotalCheckAmount"})
)

# Step 4: Merge with full combinations and fill missing with 0
final_df = (
    all_combinations
    .merge(actual_totals, on=["Date", "MealPeriod", "RevenueCenterName"], how="left")
    .fillna({"TotalCheckAmount": 0})
    .sort_values(by=["Date", "MealPeriod", "RevenueCenterName"])
)

# Step 5: Save to CSV
final_df.to_csv("complete_daily_revenue_totals.csv", index=False)
