<a href="https://colab.research.google.com/github/Jinendra-Gambhir/DoorDash-Analytics/blob/main/Doordash_Data_Cleaning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pyspark py4j
from pyspark.sql import SparkSession

# Create a SparkSession
spark = SparkSession.builder \
    .appName("doordash_cleaning") \
    .getOrCreate()



In [None]:
from pyspark.sql.functions import col, split, explode, countDistinct, count

# Load the CSV file
df = spark.read.csv('/content/doordash_raw.csv', header=True, inferSchema=True)

# Select the 'cuisines' column and split it by '|', then explode the array into separate rows
df_exploded = df.withColumn("cuisine", explode(split(col("cuisines"), "\\|")))

# Count the distinct cuisines
unique_cuisines_count = df_exploded.groupBy("cuisine").agg(count("*").alias("count"))

# Show the result
unique_cuisines_count.show()

+-------------+-----+
|      cuisine|count|
+-------------+-----+
|   Fried Rice|  404|
|       Muffin|   53|
|   Bubble Tea|  109|
|       Nachos|   83|
|Frozen Yogurt|   29|
|      Gourmet|   40|
|       Shrimp|  212|
|           Ve|    3|
|        Roses|   12|
|   Pet Treats|   27|
|        Salad|  118|
|      Tempura|   30|
|     Beer Bar|   15|
| Kid Friendly|   30|
|   Tater Tots|   19|
|     Tortilla|    4|
|   Cat Litter|    9|
|         meat|    3|
|      Falafel|   66|
|         Keto|   15|
+-------------+-----+
only showing top 20 rows



In [None]:
df.toPandas().head()

Unnamed: 0,searched_zipcode,searched_lat,searched_lng,searched_address,searched_state,searched_city,searched_metro,city_slug,latitude,longitude,...,delivery_fee_raw,delivery_fee,delivery_time_raw,delivery_time,service_fee_raw,service_fee,phone,review_count,review_rating,RunDate
0,10003,40.732473,-73.98738,"41 Cooper Square, 7 E 7th St, New York, NY 100...",NY,New York,NY,,40.675464,-73.980782,...,"$0.00 delivery fee, first order",0,24 - 34,29,,,,1364,4.8,25/04/22 7:01
1,11219,40.631075,-73.997695,"4701 15th Ave, Brooklyn, NY 11219, USA",NY,Brooklyn,NY,,40.675464,-73.980782,...,"$0.00 delivery fee, first order",0,24 - 34,29,,,,1364,4.8,25/04/22 7:01
2,11216,40.678832,-73.950677,"300 Madison St, Brooklyn, NY 11216, USA",NY,Brooklyn,NY,,40.675464,-73.980782,...,"$0.00 delivery fee, first order",0,24 - 34,29,,,,1364,4.8,25/04/22 7:01
3,11219,40.631075,-73.997695,"4701 15th Ave, Brooklyn, NY 11219, USA",NY,Brooklyn,NY,,40.672978,-73.950462,...,"$0.00 delivery fee, first order",0,23 - 33,28,,,,2000,4.6,25/04/22 7:01
4,10003,40.732473,-73.98738,"41 Cooper Square, 7 E 7th St, New York, NY 100...",NY,New York,NY,,40.672978,-73.950462,...,"$0.00 delivery fee, first order",0,23 - 33,28,,,,2000,4.6,25/04/22 7:01


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Extended mapping for specialty items
food_category_mapping = [
    {"Hot Dogs": ["Fast Food", "American"]},
    {"Muffin": ["Bakery", "Desserts"]},
    {"Bubble Tea": ["Beverages"]},
    {"Nachos": ["Mexican", "Appetizers"]},
    {"Frozen Yogurt": ["Desserts", "Ice Cream & Frozen Yogurt"]},
    {"Gourmet": ["Specialty Foods"]},
    {"Shrimp": ["Seafood"]},
    {"Ve": ["Vegetarian"]},
    {"Roses": ["Florist"]},
    {"Pet Treats": ["Pet Food"]},
    {"Salad": ["Healthy", "Salads"]},
    {"Tempura": ["Japanese", "Appetizers"]},
    {"Beer Bar": ["Beer", "Bars"]},
    {"Kid Friendly": ["Family Friendly"]},
    {"Tater Tots": ["American", "Appetizers"]},
    {"Tortilla": ["Mexican"]},
    {"Cat Litter": ["Pet Care"]},
    {"meat": ["Meat Shops"]},
    {"Falafel": ["Middle Eastern", "Vegetarian"]},
    {"Keto": ["Specialty Foods", "Low Carb"]},
    {"Pita": ["Middle Eastern", "Bakery"]},
    {"Plant Based": ["Vegetarian", "Specialty Foods"]},
    {"Bulgogi": ["Korean"]},
    {"Mexican": ["Mexican"]},
    {"Unagi": ["Japanese", "Seafood"]},
    {"Sauce": ["Condiments"]},
    {"Bakery": ["Bakery"]},
    {"Ice Coffee": ["Coffee", "Beverages"]},
    {"Pasta & Burgers": ["Italian", "Burgers", "Pasta"]},
    {"Cheeseburger": ["American", "Burgers"]},
    {"Bar": ["Bars"]},
    {"Cheese Fries": ["American", "Appetizers"]},
    {"Veal Parm": ["Italian"]},
    {"BBQ": ["BBQ"]},
    {"Okonomiyaki": ["Japanese"]},
    {"Roast Beef Sandwhich": ["American", "Sandwiches"]},
    {"Espresso": ["Coffee", "Beverages"]},
    {"Comfort": ["Comfort Food"]},
    {"Custom Cocktails": ["Cocktail Bars"]},
    {"Smoothies": ["Smoothies", "Beverages"]},
    {"Melts": ["American", "Sandwiches"]},
    {"French Toast": ["Breakfast", "Desserts"]},
    {"Stew": ["Comfort Food", "Soups"]},
    {"Lobster Rolls": ["Seafood", "Sandwiches"]},
    {"Turkish": ["Turkish"]},
    {"Gluten-Free": ["Specialty Foods"]},
    {"Bakeries": ["Bakery"]},
    {"Roti": ["Indian", "Bread"]},
    {"Creperies": ["French", "Desserts"]},
    {"Persian": ["Middle Eastern"]},
    {"Sweet Potato Fries": ["American", "Appetizers"]},
    {"Gastropubs": ["American", "Bars"]},
    {"Organic": ["Specialty Foods"]},
    {"Chimichanga": ["Mexican"]},
    {"Chicken Tikka Masala": ["Indian"]},
    {"Mozzarella Sticks": ["Italian", "Appetizers"]},
    {"Udon": ["Japanese", "Noodles"]},
    {"Draft Beer": ["Beer", "Bars"]},
    {"Breakfast and Lunch": ["American", "Breakfast", "Lunch"]},
    {"Ethiopian": ["African"]},
    {"Chirashi Sushi": ["Japanese", "Sushi"]},
    {"Cheetos": ["Snacks"]},
    {"Thai": ["Thai"]},
    {"Venues & Event Spaces": ["Venues"]},
    {"Wood Fire Pizza": ["Italian", "Pizza"]},
    {"Curry": ["Indian"]},
    {"Build Your Own": ["Custom"]},
    {"Biryani": ["Indian"]},
    {"American Breakfast": ["American", "Breakfast"]},
    {"Pollo": ["Mexican"]},
    {"Breakfast Burrito": ["American", "Breakfast"]},
    {"Waffles": ["American", "Breakfast"]},
    {"New American restaurant": ["American"]},
    {"Fries": ["American", "Appetizers"]},
    {"Toast": ["American", "Breakfast"]},
    {"Takeout": ["Delivery"]},
    {"Indian": ["Indian"]},
    {"Southern": ["American", "Southern"]},
    {"Sushi": ["Japanese", "Sushi"]},
    {"Roasted Pork": ["Chinese"]},
    {"Al Pastor": ["Mexican"]},
    {"Banh Mi": ["Vietnamese"]},
    {"Toasties": ["American", "Sandwiches"]},
    {"Chinese": ["Chinese"]},
    {"Wine Bars": ["Bars"]},
    {"Halal": ["Halal"]},
    {"Heroes": ["American", "Sandwiches"]},
    {"Bar and Grill": ["Bars", "American"]},
    {"Cupcakes": ["Desserts"]},
    {"Soul Food": ["American", "Southern"]},
    {"Florist": ["Florist"]},
    {"Pretzels": ["Snacks"]},
    {"Macy": ["Department Store"]},
    {"Tacos": ["Mexican", "Tacos"]},
    {"Beer, Wine & Spirits": ["Beer", "Wine & Spirits"]},
    {"Steak": ["American", "Steak"]},
    {"petco": ["Pet Stores"]},
    {"African": ["African"]},
    {"Tikka": ["Indian"]},
    {"Margarita": ["Cocktail Bars"]},
    {"Bibimpap": ["Korean"]},
    {"Beef": ["Meat Shops"]},
    {"Healthy Smoothies": ["Smoothies", "Healthy"]},
    {"Gourmet Gifts": ["Specialty Foods"]},
    {"Kimchi Jeon": ["Korean"]},
    {"Cantonese": ["Chinese"]},
    {"Crawfish": ["Seafood"]},
    {"Donuts": ["Desserts"]},
    {"Baby": ["Kids"]},
    {"Juice Bars & Smoothies": ["Smoothies", "Beverages"]},
    {"Chow Mein": ["Chinese"]},
    {"Cold Beers": ["Beer", "Bars"]},
    {"Nails": ["Beauty"]},
    {"Szechuan": ["Chinese"]},
    {"Family Friendly": ["Family Friendly"]},
    {"Israeli cuisine": ["Israeli"]},
    {"Chicken": ["American", "Chicken"]},
    {"Cheesecake": ["Desserts"]},
    {"Taiwanese": ["Taiwanese"]},
    {"Cold Brew": ["Coffee", "Beverages"]},
    {"Salmon": ["Seafood"]},
    {"Baklava": ["Desserts"]},
    {"Comfort Food": ["Comfort Food"]},
    {"Sausage": ["Meat Shops"]},
    {"Chicken Sandwiches": ["American", "Chicken", "Sandwiches"]},
    {"Oatmeal": ["Breakfast"]},
    {"Irish": ["Irish"]},
    {"Hot and Cold Sandwiches": ["American", "Sandwiches"]},
    {"Deep Dish Pizza": ["Italian", "Pizza"]},
    {"Enchiladas": ["Mexican"]},
    {"burger": ["American", "Burgers"]},
    {"Loaded Fries": ["American", "Appetizers"]},
    {"7-11": ["Convenience"]},
    {"Delivery": ["Delivery"]},
    {"Spaghetti": ["Italian", "Pasta"]},
    {"Dog Toys": ["Pets"]},
    {"Chocolate": ["Desserts"]},
    {"Grocery": ["Grocery"]},
    {"Meat Shops": ["Meat Shops"]},
    {"Sicilian": ["Italian", "Pizza"]},
    {"British": ["British"]},
    {"Florals": ["Florist"]},
    {"Brunch": ["American", "Brunch"]},
    {"Chocolate chip cookie": ["Desserts"]},
    {"Nuggets": ["American", "Chicken"]},
    {"Empanadas": ["Latin American"]},
    {"Breakfast & Brunch": ["American", "Brunch", "Breakfast"]},
    {"Electronics": ["Electronics"]},
    {"Sandwich": ["American", "Sandwiches"]},
    {"Bagels": ["Bakery"]},
    {"Crab": ["Seafood"]},
    {"Rice Bowl": ["Asian", "Bowls"]},
    {"Hawaiian": ["Hawaiian"]},
    {"Craft Beer": ["Beer"]},
    {"Milk Tea": ["Tea", "Beverages"]},
    {"Peruvian": ["Peruvian"]},
    {"Spicy Chicken": ["American", "Chicken"]},
    {"Japanese": ["Japanese"]},
    {"Cheesesteaks": ["American", "Sandwiches"]},
    {"Hamburger Steak": ["American", "Steak"]},
    {"Takoyaki": ["Japanese"]},
    {"Salads": ["Salads"]},
    {"Brisket": ["American", "Meat"]},
    {"Yakisoba": ["Japanese", "Noodles"]},
    {"Pancakes": ["American", "Breakfast"]},
    {"Latin American": ["Latin American"]},
    {"Lobster": ["Seafood"]},
    {"vegetable": ["Vegetarian"]},
    {"Australian": ["Australian"]},
    {"Diners": ["American", "Diners"]},
    {"Fast Food": ["Fast Food"]},
    {"Filipino": ["Filipino"]},
    {"Catering": ["Catering"]},
    {"Pudding": ["Desserts"]},
    {"Kimchi": ["Korean"]},
    {"Dog Food": ["Pets"]},
    {"Ink & Toner": ["Office Supplies"]},
    {"Vietnamese": ["Vietnamese"]},
    {"Spanish": ["Spanish"]},
    {"Cakes": ["Desserts"]},
    {"Cat Treats": ["Pets"]},
    {"Fresh Lobster": ["Seafood"]},
    {"Tenders": ["American", "Chicken"]},
    {"Himalayan/Nepalese": ["Nepalese"]},
    {"Pakistani": ["Pakistani"]},
    {"Burgers": ["Burgers"]},
    {"Indian Cuisine": ["Indian"]},
    {"Haitian": ["Haitian"]},
    {"Breakfast, Salads & Paninis": ["American", "Breakfast", "Salads", "Paninis"]},
    {"Naan": ["Indian", "Bread"]},
    {"Pizza": ["Italian", "Pizza"]},
    {"Chicken Wings": ["American", "Chicken"]},
    {"Fruit Tea": ["Tea", "Beverages"]},
    {"Potato": ["Vegetarian"]},
    {"Non-Vegan": ["Specialty Foods"]},
    {"Pet Cleanup": ["Pets"]},
    {"Burmese": ["Burmese"]},
    {"Onion Rings": ["American", "Appetizers"]},
    {"Arancini": ["Italian", "Appetizers"]},
    {"Chowder": ["Soups"]},
    {"Breakfast": ["American", "Breakfast"]},
    {"Dumplings": ["Asian"]},
    {"Bouquet": ["Florist"]},
    {"Black Owned": ["Specialty Foods"]},
    {"Avocado Toast": ["Vegetarian"]},
    {"Hummus": ["Middle Eastern"]},
    {"Grilled Cheese": ["American", "Sandwiches"]},
    {"7/11": ["Convenience"]},
    {"Pickup": ["Delivery"]},
    {"Coffee & Tea": ["Coffee", "Tea"]},
    {"Bolognese": ["Italian", "Pasta"]},
    {"Family Meals": ["Family Friendly"]},
    {"Austrian": ["Austrian"]},
    {"Milk Shakes": ["Desserts", "Milk Shakes"]},
    {"Juice": ["Juice Bars & Smoothies", "Beverages"]},
    {"Dominican": ["Dominican"]},
    {"Dog Treats": ["Pets"]},
    {"Italian": ["Italian"]},
    {"Lebanese": ["Lebanese"]},
    {"Opens daily": ["Special Occasions"]},
    {"Guatemalan": ["Guatemalan"]},
    {"Caribbean": ["Caribbean"]},
    {"Thai Tea": ["Thai", "Tea"]},
    {"Beverages": ["Beverages"]},
    {"Panini": ["Italian", "Sandwiches"]},
    {"Latte": ["Coffee"]},
    {"Cold Brew Tea": ["Tea", "Beverages"]},
    {"Dinners": ["Special Occasions"]},
    {"Office Supplies": ["Office Supplies"]},
    {"Butcher": ["Meat Shops"]},
    {"brownies": ["Desserts"]},
    {"Fried Food": ["Fast Food"]},
    {"Wings": ["American", "Chicken"]},
    {"Lunch": ["American", "Lunch"]},
    {"Crepes": ["French", "Desserts"]},
    {"Cake Shop": ["Desserts"]},
    {"uyghur": ["Chinese"]},
    {"Dessert": ["Desserts"]},
    {"Party": ["Special Occasions"]},
    {"Slush": ["Beverages"]},
    {"Gyro": ["Greek"]},
    {"Sushi Bars": ["Japanese", "Sushi"]},
    {"Croissant": ["French", "Bakery"]},
    {"Pastries": ["Desserts"]},
    {"Craft Cocktails": ["Cocktail Bars"]},
    {"Fruit": ["Fruits"]},
    {"Soup": ["Soups"]},
    {"Pharmacy": ["Pharmacy"]},
    {"Kung Pao Chicken": ["Chinese"]},
    {"Cafes": ["Cafes"]},
    {"Sandwiches": ["American", "Sandwiches"]},
    {"Caterers": ["Catering"]},
    {"Ramen": ["Japanese", "Ramen"]},
    {"Tortas": ["Mexican", "Sandwiches"]},
    {"Bacon Egg & Cheese": ["American", "Breakfast"]},
    {"Bun": ["Bakery"]},
    {"Churros": ["Desserts"]},
    {"Valentine's Day": ["Special Occasions"]},
    {"Ribs": ["American", "Meat"]},
    {"Coffee": ["Coffee"]},
    {"Vegan Friendly": ["Vegetarian"]},
    {"Hot Pot": ["Asian", "Hot Pot"]},
    {"Focaccia": ["Italian", "Bread"]},
    {"Korean Fried Chicken": ["Korean", "Chicken"]},
    {"Cat Toys": ["Pets"]},
    {"Paper": ["Office Supplies"]},
    {"Hot Dogs": ["Fast Food", "American"]},
    {"Vegetarian": ["Vegetarian"]},
    {"Roast Beef": ["American", "Meat"]},
    {"Chocolates": ["Desserts"]},
    {"Italian Sausage": ["Italian", "Meat"]},
    {"Protein Shakes": ["Specialty Foods"]},
    {"breakfast": ["American", "Breakfast"]},
    {"Russian": ["Russian"]},
    {"Ice Cream & Frozen Yogurt": ["Desserts", "Ice Cream & Frozen Yogurt"]},
    {"Candy": ["Desserts"]},
    {"Kebabs": ["Middle Eastern"]},
    {"Sashimi": ["Japanese", "Sushi"]},
    {"Sa": ["Specialty Foods"]},
    {"Acai Bowls": ["Healthy", "Bowls"]},
    {"Best Buy": ["Electronics"]},
    {"Shawarma": ["Middle Eastern"]},
    {"Mexican Food": ["Mexican"]},
    {"Korean": ["Korean"]},
    {"Asian Food": ["Asian"]},
    {"Pet Health": ["Pets"]},
    {"Tonkotsu": ["Japanese", "Ramen"]},
    {"Famil": ["Family Friendly"]},
    {"Dinner": ["Special Occasions"]},
    {"Medicine": ["Pharmacy"]},
    {"Accessories": ["Fashion"]},
    {"Sukiyaki": ["Japanese"]},
    {"smoothie": ["Smoothies", "Beverages"]},
    {"French": ["French"]},
    {"Rolls": ["Bakery"]},
    {"japanese food": ["Japanese"]},
    {"Carne Asada": ["Mexican"]},
    {"Vegan": ["Vegetarian"]},
    {"Middle East": ["Middle Eastern"]},
    {"Malaysian": ["Malaysian"]},
    {"Fresh": ["Grocery"]},
    {"Local": ["Specialty Foods"]},
    {"Seafood boil": ["Seafood"]},
    {"Italian Ice": ["Desserts"]},
    {"Desserts": ["Desserts"]},
    {"Colombian": ["Colombian"]},
    {"Pet Stores": ["Pets"]},
    {"Pet Food": ["Pets"]},
    {"Staples": ["Office Supplies"]},
    {"Calzones": ["Italian", "Pizza"]},
    {"Kids": ["Kids"]},
    {"Rice": ["Asian"]},
    {"Fresh Steak": ["Meat Shops"]},
    {"Pet Grooming Products": ["Pets"]},
    {"Sweet & Spicy Chicken": ["American", "Chicken"]},
    {"Arabian": ["Middle Eastern"]},
    {"Jamaican": ["Jamaican"]},
    {"Gelato": ["Desserts"]},
    {"Snacks & Sweets": ["Snacks", "Desserts"]},
    {"Beer": ["Beer"]},
    {"Personal Care": ["Personal Care"]},
    {"Pork": ["Meat Shops"]},
    {"Popsicle": ["Desserts"]},
    {"Grill": ["American", "Grill"]},
    {"Ice Tea": ["Tea", "Beverages"]},
    {"Liquor Store": ["Beer", "Wine & Spirits"]},
    {"Juice Bar": ["Juice Bars & Smoothies", "Beverages"]},
    {"carrot cake": ["Desserts"]},
    {"Barbeque": ["BBQ"]},
    {"shakes": ["Desserts", "Milk Shakes"]},
    {"10024 S Figueroa St": ["Address"]},
    {"Iced Teas": ["Tea", "Beverages"]},
    {"Beer to go": ["Beer"]},
    {"Bento Box": ["Asian"]},
    {"Appetizers": ["Appetizers"]},
    {"sports bars": ["Bars", "Sports"]},
    {"Salvadorian": ["Salvadorian"]},
    {"Fried Chicken Sandwich": ["American", "Chicken", "Sandwiches"]},
    {"Fried Chicken": ["American", "Chicken"]},
    {"Middle Eastern": ["Middle Eastern"]},
    {"Vegetarian Friendly": ["Vegetarian"]},
    {"Seafood": ["Seafood"]},
    {"Hamburgers": ["American", "Burgers"]},
    {"Ice Cream": ["Desserts", "Ice Cream & Frozen Yogurt"]},
    {"Dim Sum": ["Chinese"]},
    {"Shaved Ice": ["Desserts"]},
    {"Pho": ["Vietnamese", "Soups"]},
    {"Fish & Chips": ["Seafood"]},
    {"Egg Rolls": ["Asian", "Appetizers"]},
    {"Tostadas": ["Mexican", "Appetizers"]},
    {"Tantanmen": ["Japanese", "Ramen"]},
    {"Alcohol": ["Beer", "Wine & Spirits"]},
    {"Tandoori": ["Indian"]},
    {"Bowls": ["Bowls"]},
    {"Birria": ["Mexican"]},
    {"Hot Subs": ["Sandwiches"]},
    {"Plantain": ["Vegetarian"]},
    {"Chocolatiers & Shops": ["Desserts"]},
    {"Gluten Free": ["Specialty Foods"]},
    {"Sal": ["Salads"]},
    {"Pie": ["Desserts"]},
    {"Deli": ["Deli"]},
    {"Ceviche": ["Latin American"]},
    {"Chilaquiles": ["Mexican"]},
    {"Octopus": ["Seafood"]},
    {"Lamb": ["Meat Shops"]},
    {"Party Platers": ["Special Occasions"]},
    {"Juice & Smoothie": ["Juice Bars & Smoothies", "Beverages"]},
    {"Red Snapper": ["Seafood"]},
    {"Wraps": ["Sandwiches"]},
    {"Tapas/Small Plates": ["Spanish"]},
    {"Sushi 1": ["Japanese", "Sushi"]},
    {"pets": ["Pets"]},
    {"Poutineries": ["Canadian"]},
    {"Pet Toys": ["Pets"]},
    {"Spring Rolls": ["Asian", "Appetizers"]},
    {"Burritos": ["Mexican"]},
    {"spirits": ["Beer", "Wine & Spirits"]},
    {"Beer, W": ["Beer", "Wine & Spirits"]},
    {"Brazilian": ["Brazilian"]},
    {"Meal Prep": ["Meal Prep"]},
    {"Cat Food": ["Pets"]},
    {"Boba": ["Asian"]},
    {"Boneless Wings": ["American", "Chicken"]},
    {"Pet Care": ["Pets"]},
    {"Breakfast, Salads & Sandwiches": ["American", "Breakfast", "Salads", "Sandwiches"]},
    {"Greek": ["Greek"]},
    {"chips": ["Snacks"]},
    {"Puerto Rican": ["Puerto Rican"]},
    {"Popcorn": ["Snacks"]},
    {"Hispanic": ["Hispanic"]},
    {"Healthy": ["Healthy"]},
    {"Cappuccino": ["Coffee"]},
    {"P": ["Specialty Foods"]},
    {"Asian": ["Asian"]},
    {"indian food": ["Indian"]},
    {"Cuban": ["Cuban"]},
    {"Fajitas": ["Mexican"]},
    {"Special Occasions": ["Special Occasions"]},
    {"Street Food": ["Street Food"]},
    {"Chai": ["Tea"]},
    {"Pad Thai": ["Thai"]},
    {"Home Goods": ["Home Goods"]},
    {"Cinnamon Roll": ["Desserts"]},
    {"Cold pressed juices": ["Juice Bars & Smoothies", "Beverages"]},
    {"Hot Spicy Chicken": ["American", "Chicken"]},
    {"Fish": ["Seafood"]},
    {"Cof": ["Coffee"]},
    {"Steakhouses": ["American", "Steak"]},
    {"Sides": ["Sides"]},
    {"Noodles": ["Asian"]},
    {"Teriyaki": ["Japanese"]},
    {"Gyros": ["Greek"]},
    {"Hot Side Choices": ["American"]},
    {"Moroccan": ["Moroccan"]},
    {"Delis": ["Deli"]},
    {"Korean BBQ": ["Korean"]},
    {"Cheesebread": ["Bakery"]},
    {"Chicharron": ["Latin American"]},
    {"Nigiri": ["Japanese", "Sushi"]},
    {"Chorizo": ["Meat Shops"]},
    {"Convenience": ["Convenience"]},
    {"Cafe": ["Cafes"]},
    {"Corn": ["Vegetarian"]},
    {"Surf & Turf": ["American", "Seafood", "Steak"]},
    {"Spicy": ["Spicy"]},
    {"Hardware": ["Hardware"]},
    {"American (New)": ["American"]},
    {"Asian Fusion": ["Asian"]},
    {"French Macarons": ["French", "Desserts"]},
    {"Box Lunch": ["American"]},
    {"Meatballs": ["Italian"]},
    {"German": ["German"]},
    {"Karaage": ["Japanese"]},
    {"Vermicelli": ["Vietnamese"]},
    {"Gyoza": ["Japanese"]},
    {"Scallops": ["Seafood"]},
    {"Sweets": ["Desserts"]},
    {"Coffee Shop": ["Coffee"]},
    {"Hondurenos": ["Honduran"]},
    {"samosa": ["Indian"]},
    {"Organic Shakes": ["Healthy", "Shakes"]},
    {"Biryanis": ["Indian"]},
    {"Mediterranean": ["Mediterranean"]},
    {"Wine": ["Wine"]},
    {"Toys": ["Toys"]},
    {"7 Eleven": ["Convenience"]},
    {"Produce": ["Grocery"]},
    {"Black-Owned": ["Specialty Foods"]},
    {"Pubs": ["Bars"]},
    {"Chips and Salsa": ["Mexican", "Appetizers"]},
    {"Hair": ["Beauty"]},
    {"Venezuelan": ["Venezuelan"]},
    {"Cat Health": ["Pets"]},
    {"Snacks": ["Snacks"]},
    {"latin": ["Latin American"]},
    {"Chicken Tenders": ["American", "Chicken"]},
    {"Barbecue": ["BBQ"]},
    {"Poke": ["Hawaiian"]},
    {"Supplements": ["Health"]},
    {"Specialty Food": ["Specialty Foods"]},
    {"American": ["American"]},
    {"Egg Benedict": ["American", "Breakfast"]},
    {"Miso Soup": ["Japanese", "Soups"]},
    {"Afghan": ["Afghan"]},
    {"Omelets": ["American", "Breakfast"]},
    {"Vitamins": ["Health"]},
    {"Belgian": ["Belgian"]},
    {"Candy Stores": ["Desserts"]},
    {"Kimchi Jigae": ["Korean"]},
    {"storage": ["Storage"]},
    {"Stromboli": ["Italian"]},
    {"Souvlaki": ["Greek"]},
    {"Quesadillas": ["Mexican"]},
    {"Kosher": ["Kosher"]},
    {"Plants": ["Home Goods"]},
    {"Gift": ["Special Occasions"]},
    {"Hibachi": ["Japanese"]},
    {"Cookies": ["Desserts"]},
    {"Eggs": ["Eggs"]},
    {"Sliders": ["American", "Sandwiches"]},
    {"Rotisserie": ["American", "Meat"]},
    {"Makeup": ["Beauty"]},
    {"Tex-Mex": ["Mexican"]},
    {"Nigerian": ["Nigerian"]},
    {"Dog Health": ["Pets"]},
    {"Kabobs": ["Middle Eastern"]},
    {"Side Vegetables": ["Vegetarian", "Sides"]},
    {"Baguette": ["French", "Bakery"]},
    {"Rice Dishes": ["Asian"]},
    {"Gift Shops": ["Gifts"]},
    {"Kids Menu": ["Kids"]},
    {"Plants & Flowers": ["Florist"]},
    {"Vegetarian/Vegan": ["Vegetarian"]},
    {"Fruit Platters": ["Fruits"]},
    {"Crepes & Waffles": ["French", "Desserts"]},
    {"Convenience Stores": ["Convenience"]},
    {"Dips": ["Appetizers"]},
    {"Californian": ["Californian"]},
    {"Keto Friendly": ["Specialty Foods", "Low Carb"]},
    {"Fried Rice": ["Asian"]},
    {"food stands": ["Food Stands"]},
    {"Turkish Coffee": ["Turkish", "Coffee"]},
    {"Hair & Beauty": ["Beauty"]},
    {"Tapas Bars": ["Spanish"]},
    {"Local Business": ["Specialty Foods"]},
    {"Honey": ["Specialty Foods"]},
    {"Food Court": ["Food Court"]},
    {"Indian Sweets": ["Indian", "Desserts"]},
    {"Yogurt": ["Desserts", "Yogurt"]},
    {"Sushi Bars & Bowls": ["Japanese", "Sushi", "Bowls"]},
    {"street vendors": ["Street Food"]},
    {"Gourmet Chocolate": ["Desserts"]},
    {"Fabric Stores": ["Fabric"]},
    {"Beef Jerky": ["Snacks"]},
    {"Mac & Cheese": ["American", "Comfort Food"]},
    {"Groceries": ["Grocery"]},
    {"Tasting Classes": ["Tasting Classes"]},
    {"French Fries": ["American", "Appetizers"]},
    {"Health Market": ["Health"]},
    {"Donut": ["Desserts"]},
    {"Wagyu": ["Meat Shops"]},
    {"Wraps & Salads": ["Salads", "Sandwiches"]},
    {"Bubble Tea, Coffee & Tea": ["Beverages", "Coffee", "Tea"]},
    {"Chicken Parm": ["American", "Chicken"]},
    {"Chinese BBQ": ["Chinese"]},
    {"Produce Market": ["Grocery"]},
    {"Acai Bowl": ["Healthy", "Bowls"]},
    {"Asian Fusion restaurant": ["Asian"]},
    {"Shrimp Po Boys": ["American", "Sandwiches", "Seafood"]},
    {"Fast food restaurant": ["Fast Food"]},
    {"Butcher Shop": ["Meat Shops"]},
    {"Sports Bar": ["Bars", "Sports"]},
    {"Indian Food": ["Indian"]},
    {"Boba Tea": ["Asian", "Tea", "Beverages"]},
    {"Roast Beef Sandwich": ["American", "Sandwiches", "Meat"]},
    {"Meat Store": ["Meat Shops"]},
    {"Dim Sum Restaurant": ["Chinese"]},
    {"Ethiopian Restaurant": ["African"]},
    {"Italian Food": ["Italian"]},
    {"Brazilian Steakhouse": ["Brazilian", "Steak"]},
    {"Soul Food Restaurant": ["American", "Southern"]},
    {"Creperie": ["French", "Desserts"]},
    {"Greek Food": ["Greek"]},
    {"Deli & Grocery": ["Deli", "Grocery"]},
    {"Soul Food": ["American", "Southern"]},
    {"Mexican Restaurants": ["Mexican"]},
    {"Lebanese Food": ["Lebanese"]},
    {"Pharmacy & Health": ["Pharmacy", "Health"]},
    {"Tacos & Tequila": ["Mexican"]},
    {"Persian Food": ["Persian"]},
    {"Tavern": ["Bars"]},
    {"Sushi Restaurant": ["Japanese", "Sushi"]},
    {"Italian restaurant": ["Italian"]},
    {"Sushi Bar": ["Japanese", "Sushi"]},
    {"Gyro Restaurant": ["Greek"]},
    {"Vietnamese Cuisine": ["Vietnamese"]},
    {"Pub": ["Bars"]},
    {"Juice Bar & Smoothies": ["Juice Bars & Smoothies", "Beverages"]},
    {"Bar & Grill": ["Bars", "American"]},
    {"Pharmacy and Health": ["Pharmacy", "Health"]},
    {"Juice Bar & Smoothies": ["Juice Bars & Smoothies", "Beverages"]},
    {"Japanese Food": ["Japanese"]},
    {"Soul Food": ["American", "Southern"]},
    {"Creperie": ["French", "Desserts"]},
    {"Deli & Grocery": ["Deli", "Grocery"]},
    {"Mexican Restaurants": ["Mexican"]},
    {"Lebanese Food": ["Lebanese"]},
    {"Pharmacy & Health": ["Pharmacy", "Health"]},
    {"Tacos & Tequila": ["Mexican"]},
    {"Persian Food": ["Persian"]},
    {"Tavern": ["Bars"]},
    {"Sushi Restaurant": ["Japanese", "Sushi"]},
    {"Italian restaurant": ["Italian"]},
    {"Sushi Bar": ["Japanese", "Sushi"]},
    {"Gyro Restaurant": ["Greek"]},
    {"Vietnamese Cuisine": ["Vietnamese"]},
    {"Pub": ["Bars"]},
    {"Juice Bar & Smoothies": ["Juice Bars & Smoothies", "Beverages"]},
    {"Bar & Grill": ["Bars", "American"]},
    {"Pharmacy and Health": ["Pharmacy", "Health"]},
    {"Juice Bar & Smoothies": ["Juice Bars & Smoothies", "Beverages"]},
];


# Initialize an empty dictionary to store the processed data
processed_mapping = {}

# Iterate over each entry in the original list
for entry in food_category_mapping:
    # Extract the food category and its associated cuisines/types
    for category, cuisines_types in entry.items():
        # Check if the category already exists in the processed mapping
        if category in processed_mapping:
            # If it exists, append the new cuisines/types to the existing list
            processed_mapping[category].extend(cuisines_types)
        else:
            # If it doesn't exist, create a new entry with the list of cuisines/types
            processed_mapping[category] = cuisines_types

for category, cuisines_types in processed_mapping.items():
    print(f"{category}: {cuisines_types}")

Hot Dogs: ['Fast Food', 'American', 'Fast Food', 'American']
Muffin: ['Bakery', 'Desserts']
Bubble Tea: ['Beverages']
Nachos: ['Mexican', 'Appetizers']
Frozen Yogurt: ['Desserts', 'Ice Cream & Frozen Yogurt']
Gourmet: ['Specialty Foods']
Shrimp: ['Seafood']
Ve: ['Vegetarian']
Roses: ['Florist']
Pet Treats: ['Pet Food']
Salad: ['Healthy', 'Salads']
Tempura: ['Japanese', 'Appetizers']
Beer Bar: ['Beer', 'Bars']
Kid Friendly: ['Family Friendly']
Tater Tots: ['American', 'Appetizers']
Tortilla: ['Mexican']
Cat Litter: ['Pet Care']
meat: ['Meat Shops']
Falafel: ['Middle Eastern', 'Vegetarian']
Keto: ['Specialty Foods', 'Low Carb']
Pita: ['Middle Eastern', 'Bakery']
Plant Based: ['Vegetarian', 'Specialty Foods']
Bulgogi: ['Korean']
Mexican: ['Mexican']
Unagi: ['Japanese', 'Seafood']
Sauce: ['Condiments']
Bakery: ['Bakery']
Ice Coffee: ['Coffee', 'Beverages']
Pasta & Burgers: ['Italian', 'Burgers', 'Pasta']
Cheeseburger: ['American', 'Burgers']
Bar: ['Bars']
Cheese Fries: ['American', 'Appeti

In [None]:
unique_cuisines_count_pd = unique_cuisines_count.toPandas()

# Write to csv file
csv_path = '/content/unique_cuisines_count.csv'
unique_cuisines_count_pd.to_csv(csv_path, index=False)

In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv('/content/doordash_raw.csv')

# List of accepted cuisines
accepted_cuisines = [
    "Italian", "Mexican", "Chinese", "Japanese", "American", "Indian",
    "Thai", "Mediterranean", "French", "Spanish", "Korean", "Vietnamese",
    "Greek", "African", "Middle Eastern", "Caribbean", "German", "Turkish",
    "Russian", "Brazilian", "Peruvian", "Argentinian", "European", "Seafood"
]

# Function to clean the 'cuisines' column
def clean_cuisines(cuisine_string):
    # Split the string into individual cuisine items
    cuisine_list = str(cuisine_string).split('|')
    # Filter cuisines to include only those in the accepted list
    cleaned_list = [cuisine.strip() for cuisine in cuisine_list if cuisine.strip() in accepted_cuisines]
    # Join the cleaned list back into a string separated by '|'
    return '|'.join(cleaned_list) if cleaned_list else None

# Apply the cleaning function to the 'cuisines' column
df['cuisines'] = df['cuisines'].apply(clean_cuisines)

# Drop rows where 'cuisines' column is None (after cleaning)
df = df.dropna(subset=['cuisines'])

# Save the cleaned data to a new CSV file
df.to_csv('/content/cleaned_v1.csv', index=False)

In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv('/content/doordash_raw.csv')

# Define lists and mappings for cuisines and categories
accepted_cuisines = [
    "Italian", "Mexican", "Chinese", "Japanese", "American", "Indian",
    "Thai", "Mediterranean", "French", "Spanish", "Korean", "Vietnamese",
    "Greek", "African", "Middle Eastern", "Caribbean", "German", "Turkish",
    "Russian", "Brazilian", "Peruvian", "Argentinian", "European", "Seafood"
]

specialty_items_map = {
    "Pizza": "Pizza", "Sushi": "Sushi", "Burgers": "Burgers", "Tacos": "Tacos",
    "Sandwiches": "Sandwiches", "Pasta": "Pasta", "Ramen": "Ramen", "Salad": "Salad",
    "Nachos": "Mexican", "Curry": "Indian", "Steak": "American", "Shawarma": "Middle Eastern",
    "Pho": "Vietnamese", "Falafel": "Middle Eastern", "Donuts": "Desserts", "Banh Mi": "Vietnamese"
}

meal_types_map = {
    "Breakfast": "Breakfast", "Brunch": "Brunch", "Lunch": "Lunch", "Dinner": "Dinner",
    "Snacks": "Snacks", "Appetizers": "Appetizers", "Main Course": "Main Course", "Desserts": "Desserts",
    "Beverages": "Beverages"
}

dietary_preferences_map = {
    "Vegetarian": "Vegetarian", "Vegan": "Vegan", "Gluten-Free": "Gluten-Free",
    "Dairy-Free": "Dairy-Free", "Keto": "Keto", "Paleo": "Paleo", "Low-Carb": "Low-Carb",
    "Halal": "Halal", "Kosher": "Kosher", "Vitamins": "Health"
}

# Combine all mappings for easy lookup
combined_mapping = {**specialty_items_map, **meal_types_map, **dietary_preferences_map}

# Function to map items to categories
def classify_cuisines(cuisine_string):
    cuisines = str(cuisine_string).split('|')
    final_cuisines = set()
    specialty_items = set()
    meal_types = set()
    dietary_preferences = set()

    for item in cuisines:
        item = item.strip()
        # Determine appropriate categories
        if item in accepted_cuisines:
            final_cuisines.add(item)
        specialty_items.add(specialty_items_map.get(item, "Miscellaneous"))
        meal_types.add(meal_types_map.get(item, "Miscellaneous"))
        dietary_preferences.add(dietary_preferences_map.get(item, "Miscellaneous"))

    return '|'.join(specialty_items), '|'.join(meal_types), '|'.join(dietary_preferences)

# Apply the function to create new columns
df[['Specialty Items', 'Meal Types', 'Dietary Preferences']] = df['cuisines'].apply(classify_cuisines).tolist()

# Save the updated DataFrame to a new CSV file
df.to_csv('/content/cleaned_v5.csv', index=False)


In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv('/content/cleaned_v5.csv')

# List of accepted cuisines
accepted_cuisines = [
    "Italian", "Mexican", "Chinese", "Japanese", "American", "Indian",
    "Thai", "Mediterranean", "French", "Spanish", "Korean", "Vietnamese",
    "Greek", "African", "Middle Eastern", "Caribbean", "German", "Turkish",
    "Russian", "Brazilian", "Peruvian", "Argentinian", "European", "Seafood"
]

# Define a mapping from specific entries to accepted cuisines
cuisine_mapping = {
    # Japanese
    "Sushi": "Japanese", "Tempura": "Japanese", "Sashimi": "Japanese", "Ramen": "Japanese", "Nigiri": "Japanese",

    # Mexican
    "Tacos": "Mexican", "Burritos": "Mexican", "Quesadillas": "Mexican", "Nachos": "Mexican", "Enchiladas": "Mexican",

    # Italian
    "Pizza": "Italian", "Spaghetti": "Italian", "Calzones": "Italian", "Gelato": "Italian", "Lasagna": "Italian",

    # Chinese
    "Dumplings": "Chinese", "Fried Rice": "Chinese", "Dim Sum": "Chinese", "Kung Pao Chicken": "Chinese", "Chow Mein": "Chinese",

    # American
    "Burgers": "American", "Hot Dogs": "American", "BBQ": "American", "Fried Chicken": "American", "Steak": "American",

    # Indian
    "Tandoori": "Indian", "Biryani": "Indian", "Curry": "Indian", "Samosa": "Indian", "Naan": "Indian",

    # Thai
    "Pad Thai": "Thai", "Green Curry": "Thai", "Red Curry": "Thai", "Thai Iced Tea": "Thai",

    # Mediterranean
    "Falafel": "Mediterranean", "Gyro": "Mediterranean", "Shawarma": "Mediterranean", "Hummus": "Mediterranean",

    # Greek
    "Moussaka": "Greek", "Souvlaki": "Greek", "Greek Salad": "Greek",

    # French
    "Croissant": "French", "Baguette": "French", "Crepes": "French", "Quiche": "French",

    # Korean
    "Korean BBQ": "Korean", "Kimchi": "Korean", "Bibimbap": "Korean", "Tteokbokki": "Korean",

    # Spanish
    "Paella": "Spanish", "Tapas": "Spanish", "Churros": "Spanish",

    # Middle Eastern
    "Kebabs": "Middle Eastern", "Falafel": "Middle Eastern", "Baklava": "Middle Eastern",

    # Caribbean
    "Jerk Chicken": "Caribbean", "Curried Goat": "Caribbean", "Cuban Sandwich": "Caribbean",

    # Brazilian
    "Churrasco": "Brazilian", "Feijoada": "Brazilian", "Coxinha": "Brazilian",

    # Vietnamese
    "Pho": "Vietnamese", "Banh Mi": "Vietnamese", "Spring Rolls": "Vietnamese",

    #Seafood
    "Shrimp":"Seafood", "Lobster":"Seafood", "Crab":"Seafood", "Salmon":"Seafood","Octopus":"Seafood",

    # Others
    "Chicken Wings": "Fast Food", "Sandwiches": "Fast Food", "Fries": "Fast Food",
    "Smoothies": "Healthy", "Salads": "Healthy", "Acai Bowls": "Healthy",
    "Pancakes": "Breakfast", "Waffles": "Breakfast", "Omelets": "Breakfast",
    "Coffee": "Cafe", "Tea": "Cafe", "Latte": "Cafe",
    "Ice Cream": "Dessert", "Cakes": "Dessert", "Pies": "Dessert",
    "Beer": "Beverages", "Wine": "Beverages", "Cocktails": "Beverages",
}

# Helper dictionaries for case-insensitive comparison
cuisine_mapping_lower = {k.lower(): v for k, v in cuisine_mapping.items()}
accepted_cuisines_lower = {cuisine.lower(): cuisine for cuisine in accepted_cuisines}

# Function to clean and map the 'cuisines' column
def clean_and_map_cuisines(cuisine_string):
    # Normalize case and split the string into individual cuisine items
    cuisine_list = str(cuisine_string).lower().split('|')
    # Set to store cuisines to avoid duplicates
    final_cuisines = set()
    # Check each cuisine in the list
    for cuisine in cuisine_list:
        cuisine = cuisine.strip()
        if cuisine in accepted_cuisines_lower:
            final_cuisines.add(accepted_cuisines_lower[cuisine])
        elif cuisine in cuisine_mapping_lower:
            mapped_cuisine = cuisine_mapping_lower[cuisine]
            if mapped_cuisine and mapped_cuisine.lower() in accepted_cuisines_lower:
                final_cuisines.add(accepted_cuisines_lower[mapped_cuisine.lower()])
    # If no valid cuisines found, assign to Miscellaneous
    if not final_cuisines:
        final_cuisines.add("Miscellaneous")
    # Return the cleaned and mapped cuisines joined by '|'
    return '|'.join(final_cuisines)

# Apply the cleaning and mapping function to the 'cuisines' column
df['cuisines'] = df['cuisines'].apply(clean_and_map_cuisines)

# Save the cleaned data to a new CSV file
df.to_csv('/content/cleaned_v2.csv', index=False)


In [None]:
df = pd.read_csv('/content/cleaned_v2.csv')

In [None]:
df.isna().sum()

searched_zipcode           0
searched_lat               0
searched_lng               0
searched_address           0
searched_state             0
searched_city              0
searched_metro             0
city_slug              11219
latitude                   0
longitude                  0
distance                   0
loc_name                   0
loc_number                 0
url                        0
address                    0
cuisines                   0
delivery_fee_raw           0
delivery_fee              17
delivery_time_raw          0
delivery_time              0
service_fee_raw        11219
service_fee            11219
phone                  11219
review_count               0
review_rating              0
RunDate                    0
Specialty Items            0
Meal Types                 0
Dietary Preferences        0
dtype: int64

In [None]:
#drop CANCELLED_AT column because it has no values
df = df.drop(['city_slug','service_fee_raw','service_fee','phone', 'delivery_fee', 'delivery_fee_raw', 'url'],axis=1)
df = df.dropna(axis=0)
df

Unnamed: 0,searched_zipcode,searched_lat,searched_lng,searched_address,searched_state,searched_city,searched_metro,latitude,longitude,distance,...,address,cuisines,delivery_time_raw,delivery_time,review_count,review_rating,RunDate,Specialty Items,Meal Types,Dietary Preferences
0,10003,40.732473,-73.987380,"41 Cooper Square, 7 E 7th St, New York, NY 100...",NY,New York,NY,40.675464,-73.980782,1.594482,...,"229 5th Avenue, Brooklyn, NY 11215, USA",Vietnamese,24 - 34,29,1364,4.8,25/04/22 7:01,Sandwiches|Vietnamese|Miscellaneous,Miscellaneous,Miscellaneous
1,11219,40.631075,-73.997695,"4701 15th Ave, Brooklyn, NY 11219, USA",NY,Brooklyn,NY,40.675464,-73.980782,1.594482,...,"229 5th Avenue, Brooklyn, NY 11215, USA",Vietnamese,24 - 34,29,1364,4.8,25/04/22 7:01,Sandwiches|Vietnamese|Miscellaneous,Miscellaneous,Miscellaneous
2,11216,40.678832,-73.950677,"300 Madison St, Brooklyn, NY 11216, USA",NY,Brooklyn,NY,40.675464,-73.980782,1.594482,...,"229 5th Avenue, Brooklyn, NY 11215, USA",Vietnamese,24 - 34,29,1364,4.8,25/04/22 7:01,Sandwiches|Vietnamese|Miscellaneous,Miscellaneous,Miscellaneous
3,11219,40.631075,-73.997695,"4701 15th Ave, Brooklyn, NY 11219, USA",NY,Brooklyn,NY,40.672978,-73.950462,0.404577,...,"752 Nostrand Ave, Brooklyn, NY 11216, USA",Mexican,23 - 33,28,2000,4.6,25/04/22 7:01,Tacos|Miscellaneous,Brunch|Miscellaneous,Miscellaneous
4,10003,40.732473,-73.987380,"41 Cooper Square, 7 E 7th St, New York, NY 100...",NY,New York,NY,40.672978,-73.950462,0.404577,...,"752 Nostrand Ave, Brooklyn, NY 11216, USA",Mexican,23 - 33,28,2000,4.6,25/04/22 7:01,Tacos|Miscellaneous,Brunch|Miscellaneous,Miscellaneous
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11214,2176,42.457335,-71.061662,"119 Myrtle St, Melrose, MA 02176, USA",MA,Melrose,BO,42.463513,-71.026401,1.847284,...,"Buffalo Wild Wings, 180 Main St, Saugus, MA 01...",American,35 - 45,40,955,4.0,25/04/22 7:01,Burgers|Miscellaneous,Dinner|Miscellaneous|Lunch,Miscellaneous
11215,2176,42.457335,-71.061662,"119 Myrtle St, Melrose, MA 02176, USA",MA,Melrose,BO,42.478599,-71.063536,1.472188,...,"Dockside Restaurants, 1099 Main St, Wakefield,...",American|Italian,46 - 56,51,82,4.1,25/04/22 7:01,Sandwiches|Miscellaneous|Pizza|Burgers,Miscellaneous,Miscellaneous
11216,2176,42.457335,-71.061662,"119 Myrtle St, Melrose, MA 02176, USA",MA,Melrose,BO,42.460613,-71.027119,1.775243,...,"1201 Broadway, Saugus, MA 01906, USA",Miscellaneous,27 - 37,32,110,4.5,25/04/22 7:01,Miscellaneous,Beverages|Miscellaneous,Miscellaneous
11217,2176,42.457335,-71.061662,"119 Myrtle St, Melrose, MA 02176, USA",MA,Melrose,BO,42.463535,-71.026164,1.859393,...,"180 Main St, Saugus, MA 01906, USA",American,29 - 39,34,23,3.7,25/04/22 7:01,Burgers|Sandwiches|Miscellaneous,Miscellaneous,Miscellaneous


In [None]:
from pyspark.sql.functions import col


# Filter rows where 'delivery_time' is less than or equal to 100
df = df[df['delivery_time'] <= 180]
df.isna().sum()

searched_zipcode       0
searched_lat           0
searched_lng           0
searched_address       0
searched_state         0
searched_city          0
searched_metro         0
latitude               0
longitude              0
distance               0
loc_name               0
loc_number             0
address                0
cuisines               0
delivery_time_raw      0
delivery_time          0
review_count           0
review_rating          0
RunDate                0
Specialty Items        0
Meal Types             0
Dietary Preferences    0
dtype: int64

In [None]:
df.shape

(11181, 22)

In [None]:
# Ensure that 'review_rating', 'review_count', and 'delivery_time' contain only numeric values
# Convert the columns to numeric, setting errors='coerce' to replace non-numeric values with NaN
df.loc[:, 'review_rating'] = pd.to_numeric(df['review_rating'], errors='coerce')
df.loc[:, 'review_count'] = pd.to_numeric(df['review_count'], errors='coerce')
df.loc[:, 'delivery_time'] = pd.to_numeric(df['delivery_time'], errors='coerce')

# Remove rows where any of the three columns contains NaN
df = df.dropna(subset=['review_rating', 'review_count', 'delivery_time'])

df.to_csv('/content/doordash.csv', index=False)

In [None]:
unique_addresses = df['searched_address'].unique()
unique_addresses.shape

(37,)

In [None]:
dfa = pd.read_csv('/content/doordash.csv')

In [None]:
unique_locations = dfa['loc_name'].unique()
unique_address = dfa['searched_address'].unique()

In [None]:
unique_locations.shape


(3547,)