In [1]:
import pandas as pd
import random

# Step 1: Load the restaurant dataset
restaurant_df = pd.read_csv("restaurant_menu_data.csv")
print("Loaded restaurant data with", len(restaurant_df), "dishes.")

# Step 2: Get unique dishes with cuisine and category
unique_dishes = restaurant_df[["Dish Name", "Cuisine", "Category"]].drop_duplicates(subset=["Dish Name"])
print("Found", len(unique_dishes), "unique dishes.")

# Step 3: Define nutritional ranges (tweaked for project targets)
nutrition_ranges = {
    "North Indian": {
        "Main Course": {"Calories": (500, 800), "Protein": (15, 25), "Carbs": (40, 70), "Fat": (15, 30)},  # Fat tightened to 15-30g
        "Side": {"Calories": (200, 400), "Protein": (5, 10), "Carbs": (30, 50), "Fat": (10, 20)},
        "Appetizer": {"Calories": (150, 300), "Protein": (5, 15), "Carbs": (20, 40), "Fat": (5, 15)}
    },
    "South Indian": {
        "Main Course": {"Calories": (200, 400), "Protein": (8, 15), "Carbs": (40, 70), "Fat": (5, 15)},  # Protein bumped to 8-15g
        "Beverage": {"Calories": (50, 150), "Protein": (1, 5), "Carbs": (5, 20), "Fat": (2, 10)},
        "Appetizer": {"Calories": (100, 250), "Protein": (2, 8), "Carbs": (20, 40), "Fat": (5, 15)},
        "Side": {"Calories": (100, 200), "Protein": (2, 6), "Carbs": (20, 40), "Fat": (2, 10)}
    },
    "Biryani & Rice": {
        "Main Course": {"Calories": (600, 800), "Protein": (20, 35), "Carbs": (70, 100), "Fat": (15, 30)},  # Calories capped at 800, Fat 15-30g
        "Side": {"Calories": (150, 300), "Protein": (3, 8), "Carbs": (30, 50), "Fat": (5, 15)}
    },
    "Street Food": {
        "Appetizer": {"Calories": (100, 200), "Protein": (2, 5), "Carbs": (20, 40), "Fat": (5, 15)},
        "Main Course": {"Calories": (200, 400), "Protein": (5, 10), "Carbs": (30, 60), "Fat": (10, 20)}
    },
    "Chinese": {
        "Main Course": {"Calories": (300, 600), "Protein": (15, 20), "Carbs": (50, 80), "Fat": (10, 25)},  # Protein bumped to 15-20g
        "Appetizer": {"Calories": (150, 300), "Protein": (5, 10), "Carbs": (20, 40), "Fat": (5, 15)},
        "Side": {"Calories": (150, 300), "Protein": (3, 8), "Carbs": (30, 50), "Fat": (5, 15)},
        "Beverage": {"Calories": (50, 150), "Protein": (1, 5), "Carbs": (10, 20), "Fat": (2, 10)}
    },
    "Desserts & Beverages": {
        "Dessert": {"Calories": (150, 300), "Protein": (2, 5), "Carbs": (20, 40), "Fat": (10, 20)},
        "Beverage": {"Calories": (100, 200), "Protein": (3, 8), "Carbs": (15, 30), "Fat": (2, 10)},
        "Main Course": {"Calories": (300, 500), "Protein": (5, 15), "Carbs": (40, 70), "Fat": (10, 20)}
    }
}

# Default range for unmatched cases
default_range = {"Calories": (100, 300), "Protein": (5, 10), "Carbs": (20, 40), "Fat": (5, 15)}

# Step 4: Generate nutritional data
nutrition_data = []

for _, row in unique_dishes.iterrows():
    dish = row["Dish Name"]
    cuisine = row["Cuisine"]
    category = row["Category"]
    
    # Get the appropriate range
    if cuisine in nutrition_ranges and category in nutrition_ranges[cuisine]:
        ranges = nutrition_ranges[cuisine][category]
    else:
        ranges = default_range
    
    # Assign random values within ranges
    nutrition_data.append({
        "Dish Name": dish,
        "Calories (kcal)": random.randint(ranges["Calories"][0], ranges["Calories"][1]),
        "Protein (g)": random.randint(ranges["Protein"][0], ranges["Protein"][1]),
        "Carbs (g)": random.randint(ranges["Carbs"][0], ranges["Carbs"][1]),
        "Fat (g)": random.randint(ranges["Fat"][0], ranges["Fat"][1])
    })

# Step 5: Create DataFrame and save
nutrition_df = pd.DataFrame(nutrition_data)
nutrition_df.to_csv("nutritional_data.csv", index=False, encoding='utf-8-sig')
print(f"Generated nutritional dataset with {len(nutrition_df)} unique dishes. Saved as 'nutritional_data.csv'.")

Loaded restaurant data with 579 dishes.
Found 51 unique dishes.
Generated nutritional dataset with 51 unique dishes. Saved as 'nutritional_data.csv'.
