In [1]:
import pandas as pd
import random
from datetime import datetime, timedelta

# Step 1: Load the restaurant dataset
restaurant_df = pd.read_csv("restaurant_menu_data.csv")
print("Loaded restaurant data with", len(restaurant_df), "dishes.")

# Step 2: Define user details
num_users = 50  # Number of users
users = [f"U{str(i).zfill(3)}" for i in range(1, num_users + 1)]  # U001, U002, ..., U050

# Step 3: Random date generator
start_date = datetime(2025, 1, 1)
end_date = datetime(2025, 3, 15)
def random_date():
    delta = end_date - start_date
    random_days = random.randint(0, delta.days)
    random_time = timedelta(hours=random.randint(0, 23), minutes=random.randint(0, 59))
    return start_date + timedelta(days=random_days) + random_time

# Step 4: Special requests options
special_requests = [
    "extra spicy", "no onions", "less oil", "extra cheese", "no garlic", 
    "mild flavor", "extra sauce", "well done", "no salt", "extra veggies"
]

# Step 5: Generate user order data
orders = []

for user in users:
    num_orders = random.randint(5, 20)  # 5-20 orders per user
    fav_cuisine = random.choice(restaurant_df["Cuisine"].unique())  # Favorite cuisine
    fav_restaurant = random.choice(restaurant_df["Restaurant Name"].unique())  # Favorite restaurant
    
    for _ in range(num_orders):
        # 70% chance to order from favorite cuisine or restaurant
        if random.random() < 0.7:  # 70% preference (can change to 0.6 for 60%)
            if random.random() < 0.5:  # Half the time, use favorite cuisine
                possible_dishes = restaurant_df[restaurant_df["Cuisine"] == fav_cuisine]
            else:  # Other half, use favorite restaurant
                possible_dishes = restaurant_df[restaurant_df["Restaurant Name"] == fav_restaurant]
        else:
            possible_dishes = restaurant_df  # 30% random
        
        # Pick a random dish (ensures dish matches restaurant)
        dish = possible_dishes.sample(1).iloc[0]
        
        # Set order quantity (1-3)
        quantity = random.randint(1, 3)
        
        # Calculate total price
        total_price = dish["Price"] * quantity
        
        # Rating (50% chance, 3.0-5.0)
        rating = round(random.uniform(3.0, 5.0), 1) if random.random() < 0.5 else None
        
        # Special request (30% chance)
        request = random.choice(special_requests) if random.random() < 0.3 else None
        
        # Add the order with consistent restaurant and dish
        orders.append({
            "User ID": user,
            "Restaurant Name": dish["Restaurant Name"],
            "Dish Name": dish["Dish Name"],
            "Cuisine": dish["Cuisine"],
            "Price": dish["Price"],
            "Order Date & Time": random_date().strftime("%Y-%m-%d %H:%M:%S"),  # Formatted for sorting
            "Order Quantity": quantity,
            "Total Price": total_price,
            "Rating Given": rating,
            "Special Requests": request
        })

# Step 6: Create DataFrame and save
user_df = pd.DataFrame(orders)
user_df = user_df.sort_values("Order Date & Time")  # Sort by date
user_df.to_csv("user_order_data.csv", index=False, encoding='utf-8-sig')  # UTF-8 with BOM for Excel
print(f"Generated user dataset with {len(user_df)} orders across {num_users} users. Saved as 'user_order_data.csv'.")

Loaded restaurant data with 579 dishes.
Generated user dataset with 602 orders across 50 users. Saved as 'user_order_data.csv'.
