In [3]:
import pandas as pd
from datetime import datetime, timedelta
from calendar import monthrange
from collections import Counter

# Load the dataset
file_path = "aggregated_daily_data_with_date_aug2023_aug2024.csv"
data = pd.read_csv(file_path)

# Helper function to parse kg data into a dictionary for analysis
def parse_kg_data(kg_column, items_column):
    items = items_column.split(";")
    quantities = kg_column.split(";")
    return {item.strip(): int(qty) for item, qty in zip(items, quantities) if qty.strip().isdigit()}

# Function to calculate the accurate date range for each week
def calculate_week_date_range(month_year, week):
    start_date = datetime.strptime(month_year, "%b%Y")
    start_of_week = start_date + timedelta(days=(int(week) - 1) * 7)
    last_day_of_month = monthrange(start_of_week.year, start_of_week.month)[1]
    end_of_month = start_of_week.replace(day=last_day_of_month)
    end_of_week = start_of_week + timedelta(days=6)
    if end_of_week > end_of_month:
        end_of_week = end_of_month
    return f"{start_of_week.strftime('%d/%m/%Y')}-{end_of_week.strftime('%d/%m/%Y')}"

# Function to generate weekly report with aggregated most/least consumed dishes in a single cell
def generate_weekly_report(data):
    weekly_summary = []
    week_data = {}

    for _, row in data.iterrows():
        week_key = f"{row['month_year']}_week{row['week'].replace('week', '')}"
        date_range = calculate_week_date_range(row['month_year'], row['week'].replace('week', ''))

        breakfast_data = parse_kg_data(row['breakfast_kg'], row['breakfast_items'])
        lunch_data = parse_kg_data(row['lunch_kg'], row['lunch_items'])
        dinner_data = parse_kg_data(row['dinner_kg'], row['dinner_items'])

        if week_key not in week_data:
            week_data[week_key] = {
                'Date Range': date_range,
                'Breakfast': Counter(),
                'Lunch': Counter(),
                'Dinner': Counter()
            }
        
        week_data[week_key]['Breakfast'].update(breakfast_data)
        week_data[week_key]['Lunch'].update(lunch_data)
        week_data[week_key]['Dinner'].update(dinner_data)

    for week_key, meals in week_data.items():
        for meal, counter in meals.items():
            if meal == 'Date Range':
                continue
            most_consumed = "; ".join([f"{item}: {qty}kg" for item, qty in counter.most_common(3)])
            least_consumed = "; ".join([f"{item}: {qty}kg" for item, qty in counter.most_common()[:-4:-1]])
            weekly_summary.append({
                'Week': week_key,
                'Date Range': meals['Date Range'],
                'Meal': meal,
                'Most Consumed Dishes (kg)': most_consumed,
                'Least Consumed Dishes (kg)': least_consumed
            })

    weekly_summary_df = pd.DataFrame(weekly_summary)
    weekly_summary_df.to_csv("weekly_report.csv", index=False)

# Function to generate monthly report with aggregated most/least consumed dishes in a single cell
def generate_monthly_report(data):
    monthly_summary = []
    month_data = {}

    for _, row in data.iterrows():
        month_year = row['month_year']

        breakfast_data = parse_kg_data(row['breakfast_kg'], row['breakfast_items'])
        lunch_data = parse_kg_data(row['lunch_kg'], row['lunch_items'])
        dinner_data = parse_kg_data(row['dinner_kg'], row['dinner_items'])

        if month_year not in month_data:
            month_data[month_year] = {
                'Breakfast': Counter(),
                'Lunch': Counter(),
                'Dinner': Counter()
            }
        
        month_data[month_year]['Breakfast'].update(breakfast_data)
        month_data[month_year]['Lunch'].update(lunch_data)
        month_data[month_year]['Dinner'].update(dinner_data)

    for month_year, meals in month_data.items():
        for meal, counter in meals.items():
            most_consumed = "; ".join([f"{item}: {qty}kg" for item, qty in counter.most_common(3)])
            least_consumed = "; ".join([f"{item}: {qty}kg" for item, qty in counter.most_common()[:-4:-1]])
            monthly_summary.append({
                'Month': month_year,
                'Meal': meal,
                'Most Consumed Dishes (kg)': most_consumed,
                'Least Consumed Dishes (kg)': least_consumed
            })

    monthly_summary_df = pd.DataFrame(monthly_summary)
    monthly_summary_df.to_csv("monthly_report.csv", index=False)

# Generate both reports
generate_weekly_report(data)
generate_monthly_report(data)
print("Weekly and Monthly reports saved as 'weekly_report.csv' and 'monthly_report.csv'.")


Weekly and Monthly reports saved as 'weekly_report.csv' and 'monthly_report.csv'.


In [4]:
def most_expanded_weekly(df):
  # Create an empty list to store the expanded rows
  most_expanded_data = []

  # Iterate over each row in the original dataset
  for index, row in df.iterrows():
      dishes = row['Most Consumed Dishes (kg)'].split(';')  # Split by semicolon to get each dish-quantity pair
      for dish in dishes:
          dish_name, quantity_kg = dish.split(':')  # Split each pair by colon
          most_expanded_data.append({
              'Week': row['Week'],
              'Date Range': row['Date Range'],
              'Meal': row['Meal'],
              'Dish Name': dish_name.strip(),
              'Quantity (kg)': float(quantity_kg.strip()) if quantity_kg.strip().replace('.', '', 1).isdigit() else None
          })

  # Convert the expanded data into a DataFrame
  most_expanded_df = pd.DataFrame(most_expanded_data)


  # Save the new dataset to a CSV
  most_expanded_df.to_csv('most_expanded_weekly_report.csv', index=False)

  print("Expanded weekly report for most consumed dishes saved as 'most_expanded_weekly_report.csv'.")



def least_expanded_weekly(df):
  # Create an empty list to store the expanded rows
  least_expanded_data = []

  # Iterate over each row in the original dataset
  for index, row in df.iterrows():
      dishes = row['Least Consumed Dishes (kg)'].split(';')  # Split by semicolon to get each dish-quantity pair
      for dish in dishes:
          dish_name, quantity_kg = dish.split(':')  # Split each pair by colon
          least_expanded_data.append({
              'Week': row['Week'],
              'Date Range': row['Date Range'],
              'Meal': row['Meal'],
              'Dish Name': dish_name.strip(),
              'Quantity (kg)': float(quantity_kg.strip()) if quantity_kg.strip().replace('.', '', 1).isdigit() else None
          })

  # Convert the expanded data into a DataFrame
  least_expanded_df = pd.DataFrame(least_expanded_data)


  # Save the new dataset to a CSV
  least_expanded_df.to_csv('least_expanded_weekly_report.csv', index=False)

  print("Expanded weekly report for least consumed dishes saved as 'least_expanded_weekly_report.csv'.")


def most_expanded_monthly(df):
  # Create an empty list to store the expanded rows
  most_expanded_data = []

  # Iterate over each row in the original dataset
  for index, row in df.iterrows():
      dishes = row['Most Consumed Dishes (kg)'].split(';')  # Split by semicolon to get each dish-quantity pair
      for dish in dishes:
          dish_name, quantity_kg = dish.split(':')  # Split each pair by colon
          most_expanded_data.append({
              'Month': row['Month'],
              'Meal': row['Meal'],
              'Dish Name': dish_name.strip(),
              'Quantity (kg)': float(quantity_kg.strip()) if quantity_kg.strip().replace('.', '', 1).isdigit() else None
          })

  # Convert the expanded data into a DataFrame
  most_expanded_df = pd.DataFrame(most_expanded_data)


  # Save the new dataset to a CSV
  most_expanded_df.to_csv('most_expanded_monthly_report.csv', index=False)

  print("Expanded monthly report for most consumed dishes saved as 'most_expanded_monthly_report.csv'.")


def least_expanded_monthly(df):
  # Create an empty list to store the expanded rows
  least_expanded_data = []

  # Iterate over each row in the original dataset
  for index, row in df.iterrows():
      dishes = row['Least Consumed Dishes (kg)'].split(';')  # Split by semicolon to get each dish-quantity pair
      for dish in dishes:
          dish_name, quantity_kg = dish.split(':')  # Split each pair by colon
          least_expanded_data.append({
              'Month': row['Month'],
              'Meal': row['Meal'],
              'Dish Name': dish_name.strip(),
              'Quantity (kg)': float(quantity_kg.strip()) if quantity_kg.strip().replace('.', '', 1).isdigit() else None
          })

  # Convert the expanded data into a DataFrame
  least_expanded_df = pd.DataFrame(least_expanded_data)


  # Save the new dataset to a CSV
  least_expanded_df.to_csv('least_expanded_monthly_report.csv', index=False)

  print("Expanded monthly report for least consumed dishes saved as 'least_expanded_monthly_report.csv'.")


# Load the data
df1 = pd.read_csv('weekly_report.csv')
df2 = pd.read_csv('monthly_report.csv')

#weekly expanded reports
most_expanded_weekly(df1)
least_expanded_weekly(df1)

#monthly expanded reports
most_expanded_monthly(df2)
least_expanded_monthly(df2)



Expanded weekly report for most consumed dishes saved as 'most_expanded_weekly_report.csv'.
Expanded weekly report for least consumed dishes saved as 'least_expanded_weekly_report.csv'.
Expanded monthly report for most consumed dishes saved as 'most_expanded_monthly_report.csv'.
Expanded monthly report for least consumed dishes saved as 'least_expanded_monthly_report.csv'.
