In [1]:
import pandas as pd

orders = pd.read_csv("orders.csv")
orders.head()


Unnamed: 0,order_id,user_id,restaurant_id,order_date,total_amount,restaurant_name
0,1,2508,450,18-02-2023,842.97,New Foods Chinese
1,2,2693,309,18-01-2023,546.68,Ruchi Curry House Multicuisine
2,3,2084,107,15-07-2023,163.93,Spice Kitchen Punjabi
3,4,319,224,04-10-2023,1155.97,Darbar Kitchen Non-Veg
4,5,1064,293,25-12-2023,1321.91,Royal Eatery South Indian


In [2]:
import json

with open("users.json") as f:
    users = pd.json_normalize(json.load(f))

users.head()


Unnamed: 0,user_id,name,city,membership
0,1,User_1,Chennai,Regular
1,2,User_2,Pune,Gold
2,3,User_3,Bangalore,Gold
3,4,User_4,Bangalore,Regular
4,5,User_5,Pune,Gold


In [3]:
import sqlite3

conn = sqlite3.connect(":memory:")
with open("restaurants.sql") as f:
    conn.executescript(f.read())

restaurants = pd.read_sql("SELECT * FROM restaurants", conn)
restaurants.head()


Unnamed: 0,restaurant_id,restaurant_name,cuisine,rating
0,1,Restaurant_1,Chinese,4.8
1,2,Restaurant_2,Indian,4.1
2,3,Restaurant_3,Mexican,4.3
3,4,Restaurant_4,Chinese,4.1
4,5,Restaurant_5,Chinese,4.8


In [4]:
merged = orders.merge(users, on="user_id", how="left")
merged = merged.merge(restaurants, on="restaurant_id", how="left")


In [5]:
merged.to_csv("final_food_delivery_dataset.csv", index=False)


In [6]:
import pandas as pd

df = pd.read_csv("final_food_delivery_dataset.csv")
df["order_date"] = pd.to_datetime(df["order_date"], dayfirst=True)


In [7]:
df[df["membership"]=="Gold"] \
.groupby("city")["total_amount"].sum() \
.sort_values(ascending=False)


city
Chennai      1080909.79
Pune         1003012.32
Bangalore     994702.59
Hyderabad     896740.19
Name: total_amount, dtype: float64

In [8]:
df.groupby("cuisine")["total_amount"].mean() \
.sort_values(ascending=False)


cuisine
Mexican    808.021344
Italian    799.448578
Indian     798.466011
Chinese    798.389020
Name: total_amount, dtype: float64

In [10]:
df[df["membership"]=="Gold"] \
.groupby("city")["total_amount"].mean() \
.sort_values(ascending=False)


city
Chennai      808.459080
Hyderabad    806.421034
Bangalore    793.223756
Pune         781.162243
Name: total_amount, dtype: float64

In [11]:
round((df["membership"]=="Gold").mean() * 100)


50

In [12]:
df.groupby("restaurant_name_x").agg(
    orders=("order_id","count"),
    aov=("total_amount","mean")
).query("orders < 20") \
 .sort_values("aov", ascending=False) \
 .head(1)


Unnamed: 0_level_0,orders,aov
restaurant_name_x,Unnamed: 1_level_1,Unnamed: 2_level_1
Hotel Dhaba Multicuisine,13,1040.222308


In [13]:
df.groupby(["membership","cuisine"])["total_amount"].sum() \
.sort_values(ascending=False)


membership  cuisine
Regular     Mexican    1072943.30
            Italian    1018424.75
Gold        Mexican    1012559.79
            Italian    1005779.05
Regular     Indian      992100.27
Gold        Indian      979312.31
            Chinese     977713.74
Regular     Chinese     952790.91
Name: total_amount, dtype: float64

In [14]:
df["quarter"] = df["order_date"].dt.to_period("Q")

df.groupby("quarter")["total_amount"].sum() \
.sort_values(ascending=False)


quarter
2023Q3    2037385.10
2023Q4    2018263.66
2023Q1    1993425.14
2023Q2    1945348.72
2024Q1      17201.50
Freq: Q-DEC, Name: total_amount, dtype: float64

In [15]:
import pandas as pd

df = pd.read_csv("final_food_delivery_dataset.csv")
df["order_date"] = pd.to_datetime(df["order_date"], dayfirst=True)


In [16]:
(df["membership"] == "Gold").sum()


np.int64(4987)

In [17]:
len(df)


10000

In [18]:
round(df[df["city"] == "Hyderabad"]["total_amount"].sum())


1889367

In [19]:
df["user_id"].nunique()


2883

In [20]:
round(df[df["membership"] == "Gold"]["total_amount"].mean(), 2)


np.float64(797.15)

In [21]:
(df["rating"] >= 4.5).sum()


np.int64(3374)

In [22]:
top_city = (
    df[df["membership"]=="Gold"]
    .groupby("city")["total_amount"].sum()
    .idxmax()
)

((df["membership"]=="Gold") & (df["city"]==top_city)).sum()


np.int64(1337)