In [None]:
import pandas as pd
import sqlite3

# Load CSV

df_users = pd.read_csv('/content/users.csv')
df_restaurants = pd.read_csv('/content/restaurants.csv')
df_orders = pd.read_csv('/content/orders.csv')
df_order_details = pd.read_csv('/content/order_details.csv')
df_menu = pd.read_csv('/content/menu.csv')
df_food = pd.read_csv('/content/food.csv')
df_delivery_partner = pd.read_csv('/content/delivery_partner.csv')

# Create SQLite DB file
conn = sqlite3.connect(':memory:')
df_users.to_sql('users', conn, index=False, if_exists='replace')
df_restaurants.to_sql('restaurants', conn, index=False, if_exists='replace')
df_orders.to_sql('orders', conn, index=False, if_exists='replace')
df_order_details.to_sql('order_details', conn, index=False, if_exists='replace')
df_menu.to_sql('menu', conn, index=False, if_exists='replace')
df_food.to_sql('food', conn, index=False, if_exists='replace')
df_delivery_partner.to_sql('delivery_parter', conn, index=False, if_exists='replace')
#conn.close()
print("CSV converted to DB successfully!")

CSV converted to DB successfully!


In [None]:
print(df_users.columns,'\n',df_restaurants.columns,'\n',df_orders.columns,'\n',df_order_details.columns,'\n',df_menu.columns,'\n',df_food.columns,'\n',df_delivery_partner.columns,'\n',)

Index(['user_id', 'name', 'email', 'password'], dtype='object') 
 Index(['r_id', 'r_name', 'cuisine'], dtype='object') 
 Index(['order_id', 'user_id', 'r_id', 'amount', 'date', 'partner_id',
       'delivery_time', 'delivery_rating', 'restaurant_rating'],
      dtype='object') 
 Index(['id', 'order_id', 'f_id'], dtype='object') 
 Index(['menu_id', 'r_id', 'f_id', 'price'], dtype='object') 
 Index(['f_id', 'f_name', 'type'], dtype='object') 
 Index(['partner_id', 'partner_name'], dtype='object') 



### Independent Row Subqueries

1. Find all users who never ordered

In [None]:
query = """
SELECT * FROM users
WHERE user_id NOT IN (SELECT DISTINCT(user_id) FROM orders)
"""
result = pd.read_sql_query(query, conn)
print(result)

   user_id     name              email password
0        6  Anupama  anupama@gmail.com   46rdw2
1        7  Rishabh  rishabh@gmail.com   4sw123


### Correlated subqueries

2. Find the favorite food of each customer

In [None]:
print(df_users.columns,'\n',df_restaurants.columns,'\n',df_orders.columns,'\n',df_order_details.columns,'\n',df_menu.columns,'\n',df_food.columns,'\n',df_delivery_partner.columns,'\n',)

Index(['user_id', 'name', 'email', 'password'], dtype='object') 
 Index(['r_id', 'r_name', 'cuisine'], dtype='object') 
 Index(['order_id', 'user_id', 'r_id', 'amount', 'date', 'partner_id',
       'delivery_time', 'delivery_rating', 'restaurant_rating'],
      dtype='object') 
 Index(['id', 'order_id', 'f_id'], dtype='object') 
 Index(['menu_id', 'r_id', 'f_id', 'price'], dtype='object') 
 Index(['f_id', 'f_name', 'type'], dtype='object') 
 Index(['partner_id', 'partner_name'], dtype='object') 



In [None]:
query = """
WITH fav_food AS (
SELECT t4.user_id, name,f_name,COUNT(*) as frequency FROM order_details t1
JOIN food t2
ON t1.f_id = t2.f_id
JOIN orders t3
ON t3.order_id = t1.order_id
JOIN users t4
ON t4.user_id = t3.user_id
GROUP BY t4.user_id,t1.f_id
)

SELECT * FROM fav_food f1
WHERE frequency = (SELECT MAX(frequency)
                   FROM fav_food f2
                   WHERE f2.user_id = f1.user_id)
"""

result = pd.read_sql_query(query,conn)
print(result)

   user_id      name            f_name  frequency
0        1    Nitish   Choco Lava cake          5
1        2  Khushboo   Choco Lava cake          3
2        3   Vartika     Chicken Wings          3
3        4     Ankit  Schezwan Noodles          3
4        4     Ankit    Veg Manchurian          3
5        5      Neha   Choco Lava cake          5


3. Display average rating of all restaurents

In [None]:

query = """
SELECT r_name,avg_rating
FROM (SELECT r_id,AVG(restaurant_rating) AS 'avg_rating'
      FROM orders
      GROUP BY r_id) t1 JOIN restaurants t2
      ON t1.r_id = t2.r_id
"""
result = pd.read_sql_query(query,conn)
print(result)

       r_name  avg_rating
0     dominos    1.666667
1         kfc    2.200000
2        box8    4.666667
3  Dosa Plaza    3.666667
4  China Town    3.666667


4. Delete all the customers record who have never ordered

In [None]:

query = """

DELETE FROM users
WHERE user_id IN (SELECT user_id FROM users
              WHERE user_id NOT IN (SELECT DISTINCT(user_id) FROM orders ))
"""
result = pd.read_sql_query(query,conn)
print(result)

TypeError: 'NoneType' object is not iterable

since the users are already deleted it is showing typeError as ' NoneType'



In [None]:
query = """
SELECT user_id FROM users
"""
result = pd.read_sql_query(query,conn)
print(result)

   user_id
0        1
1        2
2        3
3        4
4        5


5. Find top 2 most paying customers of each month

In [None]:
query = """
SELECT * FROM (SELECT strftime('%m', date) as month,user_id,SUM(amount) as amount,
DENSE_RANK() OVER(PARTITION BY strftime('%m', date) ORDER BY SUM(amount) DESC) as month_rank
FROM orders
GROUP BY month,user_id
ORDER BY strftime('%m', date)) t
WHERE t.month_rank < 3
"""
result = pd.read_sql_query(query,conn)
print(result)

  month  user_id  amount  month_rank
0    05        1     965           1
1    05        3     860           2
2    06        2    1480           1
3    06        4     800           2
4    07        5    3035           1
5    07        2    1190           2


6. Find MoM revenue growth of Zomato

In [None]:
query = """
WITH monthly_totals AS (
  SELECT
    strftime('%m', date) AS month,
    SUM(amount) AS total_amount
  FROM orders
  GROUP BY strftime('%m', date)
  ORDER BY strftime('%m', date)
)

SELECT
  month,
  total_amount,
  LAG(total_amount) OVER (ORDER BY month),
  (total_amount - LAG(total_amount) OVER (ORDER BY month)) * 100.0 /(LAG(total_amount) OVER (ORDER BY month)) as revenue_growth_percent
FROM monthly_totals
ORDER BY month;

"""
result = pd.read_sql_query(query,conn)
print(result)

  month  total_amount  LAG(total_amount) OVER (ORDER BY month)  \
0    05          2425                                      NaN   
1    06          3220                                   2425.0   
2    07          4845                                   3220.0   

   revenue_growth_percent  
0                     NaN  
1               32.783505  
2               50.465839  
