In [2]:
import random
import pandas as pd

# Define cuisines and dishes with price ranges
cuisines = {
    "North Indian": [
        ("Paneer Butter Masala", (200, 350)), ("Dal Makhani", (180, 300)), ("Butter Naan", (40, 80)), 
        ("Aloo Paratha", (50, 120)), ("Chole Bhature", (100, 180)), ("Rajma Chawal", (150, 250)), 
        ("Shahi Paneer", (220, 380)), ("Tandoori Roti", (30, 70)), ("Kadhi Chawal", (120, 200))
    ],
    "South Indian": [
        ("Masala Dosa", (80, 150)), ("Idli Sambar", (50, 120)), ("Vada", (40, 100)), 
        ("Pongal", (60, 130)), ("Uttapam", (70, 140)), ("Rava Dosa", (90, 170)), 
        ("Appam with Stew", (150, 250)), ("Filter Coffee", (40, 100)), ("Mysore Bonda", (80, 150))
    ],
    "Biryani & Rice": [
        ("Hyderabadi Biryani", (250, 450)), ("Veg Pulao", (150, 250)), ("Jeera Rice", (100, 180)), 
        ("Curd Rice", (80, 140)), ("Egg Biryani", (200, 350)), ("Mutton Biryani", (300, 550)), 
        ("Chicken Fried Rice", (180, 320)), ("Ghee Rice", (120, 200))
    ],
    "Street Food": [
        ("Pani Puri", (40, 100)), ("Pav Bhaji", (100, 180)), ("Vada Pav", (30, 80)), 
        ("Samosa", (20, 60)), ("Dabeli", (50, 100)), ("Dahi Puri", (80, 150)), 
        ("Sev Puri", (70, 140)), ("Misal Pav", (90, 160))
    ],
    "Chinese": [
        ("Hakka Noodles", (150, 250)), ("Manchurian", (120, 220)), ("Spring Rolls", (100, 180)), 
        ("Chili Paneer", (150, 280)), ("Fried Rice", (120, 230)), ("Schezwan Noodles", (160, 270)), 
        ("Hot & Sour Soup", (100, 180)), ("Dim Sum", (140, 260))
    ],
    "Desserts & Beverages": [
        ("Gulab Jamun", (50, 120)), ("Rasgulla", (50, 120)), ("Mango Lassi", (80, 150)), 
        ("Cold Coffee", (100, 180)), ("Masala Chai", (30, 80)), ("Badam Milk", (60, 130)), 
        ("Falooda", (120, 200)), ("Ice Cream Sundae", (150, 250)), ("Kulfi", (80, 160))
    ]
}

# List of restaurant names (50 available, enough for 40)
restaurant_names = [
    "Tandoori Treats", "Royal Biryani House", "Flavors of South", "Chaat Junction", "Golden Spoon",
    "The Chinese Wok", "Sweet Symphony", "Fusion Delight", "Punjabi Dhaba", "Urban Tandoor",
    "Spices & Herbs", "Bombay Bites", "South Spice", "Mughlai Magic", "Crispy Crunch",
    "Desi Tadka", "Kolkata Rolls", "Andhra Spice", "Malabar Coast", "Cafe Aroma",
    "Himalayan Kitchen", "Taste of Kerala", "Barbecue Nation", "The Curry House", "Chai & More",
    "Zayka Junction", "Biryani Blues", "The Street Eatery", "Dilli Wale Paratha", "Rajasthan Rasoi",
    "Kebab Factory", "The Grand Tiffin", "Indori Chatpata", "Nukkad Cafe", "Momo Mania",
    "Authentic Flavors", "Heritage Kitchen", "Royal Nawabs", "Lazeez Darbar", "The Midnight Crave",
    "Darjeeling Flavors", "Bangalore Bites", "Lucknowi Handi", "Dosa Delight", "Mehfil-e-Khana",
    "Taste of Hyderabad", "Pind Baluchi", "Kolkata Mishti", "The Grilled Affair", "Kathi Rolls Express"
]

# Meal categories
categories = {
    "Appetizer": ["Samosa", "Spring Rolls", "Vada", "Pani Puri", "Sev Puri", "Dahi Puri", "Mysore Bonda", "Chili Paneer", "Hot & Sour Soup", "Dim Sum"],
    "Main Course": ["Paneer Butter Masala", "Dal Makhani", "Butter Chicken", "Shahi Paneer", "Kadhi Chawal", "Rajma Chawal", "Chole Bhature", "Aloo Paratha", 
                   "Masala Dosa", "Uttapam", "Rava Dosa", "Appam with Stew", "Pongal", "Hyderabadi Biryani", "Egg Biryani", "Mutton Biryani", 
                   "Hakka Noodles", "Manchurian", "Schezwan Noodles", "Chicken Fried Rice", "Pav Bhaji", "Misal Pav"],
    "Side": ["Butter Naan", "Tandoori Roti", "Naan", "Jeera Rice", "Fried Rice", "Veg Pulao", "Curd Rice", "Ghee Rice", "Idli Sambar"],
    "Dessert": ["Gulab Jamun", "Rasgulla", "Falooda", "Ice Cream Sundae", "Kulfi"],
    "Beverage": ["Mango Lassi", "Cold Coffee", "Masala Chai", "Badam Milk", "Filter Coffee"]
}

# Function to generate dataset with a given number of restaurants
def generate_restaurant_data(num_restaurants=40):  # Default to 40 restaurants
    restaurants = []
    selected_names = random.sample(restaurant_names, k=num_restaurants)  # Pick unique restaurant names
    
    for name in selected_names:
        # Pick 1-3 random cuisine types for this restaurant
        cuisine_types = random.sample(list(cuisines.keys()), k=random.randint(1, 3))
        
        for cuisine in cuisine_types:
            # Pick 5-12 dishes, but cap at the number of available dishes
            num_dishes = min(random.randint(5, 12), len(cuisines[cuisine]))  # Never exceed available dishes
            selected_dishes = random.sample(cuisines[cuisine], k=num_dishes)
            for dish, price_range in selected_dishes:
                # Find the right category based on dish name
                category = "Main Course"  # Default
                for cat, dish_list in categories.items():
                    if dish in dish_list:
                        category = cat
                        break
                
                # Add dish details
                restaurants.append({
                    "Dish Name": dish,
                    "Cuisine": cuisine,
                    "Price": random.randint(price_range[0], price_range[1]),
                    "Rating": round(random.uniform(3.8, 5.0), 1),
                    "Restaurant Name": name,
                    "Category": category
                })
    
    # Create DataFrame and save
    df = pd.DataFrame(restaurants)
    df.to_csv("restaurant_menu_data.csv", index=False, encoding='utf-8')
    print(f"Generated dataset with {len(df)} dishes across {num_restaurants} restaurants. Saved as 'restaurant_menu_data.csv'.")
    return df

# Run the function
if __name__ == "__main__":
    generate_restaurant_data()  # Runs with default 40 restaurants

Generated dataset with 579 dishes across 40 restaurants. Saved as 'restaurant_menu_data.csv'.
