In [1]:
pip install pyspark

Collecting pyspark
  Downloading pyspark-3.5.2.tar.gz (317.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m317.3/317.3 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.5.2-py2.py3-none-any.whl size=317812363 sha256=8121460223210653bde0731edbac96ea7891946ddaefe6237fef52d95f71e5f0
  Stored in directory: /root/.cache/pip/wheels/34/34/bd/03944534c44b677cd5859f248090daa9fb27b3c8f8e5f49574
Successfully built pyspark
Installing collected packages: pyspark
Successfully installed pyspark-3.5.2


In [22]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col,avg,sum,max

spark=SparkSession.builder.appName("FoodDelivery").getOrCreate()

food_delivert_df=spark.read.csv("/content/food_delivery_data.csv", header=True,inferSchema=True)

In [24]:
# 1. Calculate Total Revenue per Restaurant
total_revenue_per_restaurant = food_delivert_df.withColumn("total_revenue", col("price") * col("quantity")) \
    .groupBy("restaurant_name").agg(sum("total_revenue").alias("total_revenue"))
print("total revenue per restaurant: ")
total_revenue_per_restaurant.show()

# 2.  Find the Fastest Delivery
fastest_delivery=food_delivert_df.orderBy(col("delivery_time_mins").asc()).limit(1)
print("fastest delivery: ")
fastest_delivery.show()

# 3.Calculate Average Delivery Time per Restaurant
avg_delivery_time=food_delivert_df.groupBy("restaurant_name").agg(avg("delivery_time_mins").alias("avg_delivery_time"))
print("average delivery time per restaurant: ")
avg_delivery_time.show()

# 4. Filter Orders for a Specific Customer
order_for_specific_customers=food_delivert_df.filter(col("customer_id")==201)
print("order for specific customers: ")
order_for_specific_customers.show()

# 5. Find Orders Where Total Amount Spent is Greater Than $20
orders_on_amount_spent=food_delivert_df.withColumn("total_amount", col("price") * col("quantity")) \
    .filter(col("total_amount") > 20)
print("orders where total amount spent is greater than $20: ")
orders_on_amount_spent.show()

# 6. Calculate the Total Quantity of Each Food Item Sold
total_quantity_of_each_food_item=food_delivert_df.groupBy("food_item").agg(sum("quantity").alias("total_quantity"))
print("total quantity of each food item sold: ")
total_quantity_of_each_food_item.show()

# 7. Find the Top 3 Most Popular Restaurants by Number of Orders
top_3_restaurants=food_delivert_df.groupBy("restaurant_name").count().orderBy(col("count").desc()).limit(3)
print("top 3 most popular restaurants by number of orders: ")
top_3_restaurants.show()

# 8. Calculate Total Revenue per Day
total_revenue_per_day=food_delivert_df.withColumn("total_revenue", col("price") * col("quantity")) \
    .groupBy("order_d").agg(sum("total_revenue").alias("total_revenue"))
print("total revenue per day: ")
total_revenue_per_day.show()

# 9.  Find the Longest Delivery Time for Each Restaurant
longest_delivery_time=food_delivert_df.groupBy("restaurant_name").agg(max("delivery_time_mins"))
print("longest delivery time for each restaurant: ")
longest_delivery_time.show()

# 10.  Create a New Column for Total Order Value
total_order_value=food_delivert_df.withColumn("total_order_value", col("price") * col("quantity"))
print("new column for total order value: ")
total_order_value.show()

total revenue per restaurant: 
+---------------+------------------+
|restaurant_name|     total_revenue|
+---------------+------------------+
|         Subway|              13.0|
|      Pizza Hut|             12.99|
|    Burger King|              6.99|
|            KFC|             35.96|
|       Domino's|             23.98|
|     McDonald's|20.950000000000003|
|      Starbucks|              13.5|
+---------------+------------------+

fastest delivery: 
+--------+-----------+---------------+---------+--------+-----+------------------+----------+
|order_id|customer_id|restaurant_name|food_item|quantity|price|delivery_time_mins|   order_d|
+--------+-----------+---------------+---------+--------+-----+------------------+----------+
|       6|        205|      Starbucks|   Coffee|       1|  4.5|                15|2023-06-18|
+--------+-----------+---------------+---------+--------+-----+------------------+----------+

average delivery time per restaurant: 
+---------------+---------------