In [None]:
import pandas as pd

try:
    df = pd.read_csv('final_food_delivery_dataset.csv')
except:
    # Fallback to regeneration if file is missing (robustness)
    orders_df = pd.read_csv('orders.csv')
    users_df = pd.read_json('users.json')
    import re
    restaurant_data = []
    with open('restaurants.sql', 'r') as f:
        for line in f:
            if line.strip().startswith("INSERT INTO restaurants VALUES"):
                match = re.search(r"\((.*)\);", line)
                if match:
                    restaurant_data.append([p.strip().strip("'") for p in match.group(1).split(',')])
    restaurants_df = pd.DataFrame(restaurant_data, columns=['restaurant_id', 'restaurant_name', 'cuisine', 'rating'])
    restaurants_df['restaurant_id'] = pd.to_numeric(restaurants_df['restaurant_id'])
    
    merged = pd.merge(orders_df, users_df, on='user_id', how='left')
    df = pd.merge(merged, restaurants_df, on='restaurant_id', how='left', suffixes=('_order', '_details'))

# Group by user_id and calculate total spend per user
user_total_spend = df.groupby('user_id')['total_amount'].sum()

# Filter for users with total spend > 1000
high_value_users = user_total_spend[user_total_spend > 1000]

count_high_value_users = high_value_users.count()

print(f"Number of users with total spend > 1000: {count_high_value_users}")