In [3]:
import h3 as h3
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from scipy import stats
from pyhive import presto
from keplergl import KeplerGl
from datetime import datetime, timedelta

import warnings
warnings.filterwarnings('ignore')

In [4]:
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 1200)

## Connection

In [5]:
## Connection
connection = presto.connect(
        host='presto-gateway.serving.data.production.internal',
        port=80,
        protocol='http',
        catalog='hive',
        username='manoj.ravirajan@rapido.bike'
)

## Dataset

In [6]:
## Generate date range

start_date = '20230703'
end_date = '20230730'
city = 'Bangalore'
service = 'Delivery'

In [9]:
## datasets.service_mapping

service_mapping = f"""
        SELECT 
            city_display_name AS city,
            service_level AS service_name,
            service_detail_id,
            city_id,
            service_id
        FROM 
            datasets.service_mapping
        WHERE 
            city_display_name = '{city}'
            AND service_level = '{service}'
"""

df_service_mapping = pd.read_sql(service_mapping, connection)
service_detail_id = df_service_mapping.service_detail_id.loc[0]
df_service_mapping
# service_detail_id

Unnamed: 0,city,service_name,service_detail_id,city_id,service_id
0,Bangalore,Delivery,58dceb1f24565ce21202bca4,572ca7ff116b5db3057bd814,58dcea3824565ce21202bca3


In [10]:
df_hex_affluence_tag = pd.read_csv('/Users/rapido/local-datasets/affluence/main/hex_affluence_tag.csv')
df_hex_affluence_tag =df_hex_affluence_tag[['pickup_hex_8', 'affluence_tag']]
df_hex_affluence_tag

Unnamed: 0,pickup_hex_8,affluence_tag
0,88618920a3fffff,High
1,8861892581fffff,High
2,886189258bfffff,High
3,886189258dfffff,High
4,8861892425fffff,High
...,...,...
2680,8860145955fffff,High
2681,8860145957fffff,High
2682,8860145959fffff,High
2683,886014595bfffff,High


In [11]:
df_overlap_details = pd.read_csv('/Users/rapido/local-datasets/affluence/final/Bangalore Affluence List only 70 hex sample.csv')
df_overlap_details = df_overlap_details[['pickup_hex_8' ,'affluence_tag']]
df_overlap_details.head(2)

Unnamed: 0,pickup_hex_8,affluence_tag
0,88618921d3fffff,High Affluence
1,88618921c7fffff,High Affluence


In [8]:
## orders.order_logs_snapshot --> Tips

order_logs_snapshot = f"""
        
        SELECT
            -- yyyymmdd,
            city_name,
            location_hex_8,
            COUNT(DISTINCT CASE WHEN service_obj_service_name = 'Link' THEN order_id END) link_orders,
            COUNT(DISTINCT CASE WHEN service_obj_service_name = 'Auto' THEN order_id END) auto_orders,
            COUNT(DISTINCT CASE WHEN service_obj_service_name IN ('Auto', 'Link') THEN order_id END) taxi_orders,
            COUNT(DISTINCT CASE WHEN service_obj_service_name = 'Delivery' THEN order_id END) swiggy_orders,
            -- COUNT(DISTINCT CASE WHEN service_obj_service_name = 'Delivery' THEN customer_id END) swiggy_customers,
            COALESCE(ROUND(APPROX_PERCENTILE(bill_amount,0.50)), 0) swiggy_median_bill_amount,
            COALESCE(ROUND(AVG(bill_amount)), 0) swiggy_mean_bill_amount,
            COALESCE(SUM(bill_amount), 0) swiggy_total_bill_amount
            
        FROM
        (    
            SELECT  
                yyyymmdd,
                city_name,
                service_obj_service_name,
                order_id,
                customer_id,
                CASE 
                WHEN service_obj_service_name = 'Link' THEN pickup_location_hex_8
                WHEN service_obj_service_name = 'Auto' THEN pickup_location_hex_8
                WHEN service_obj_service_name = 'Delivery' THEN drop_location_hex_8
                END AS location_hex_8,
                delivery_info_bill_amount bill_amount

            FROM

                orders.order_logs_snapshot ols 

            WHERE 
                yyyymmdd >= '{start_date}'
                AND yyyymmdd <= '{end_date}'
                AND city_name = 'Bangalore'
                AND order_status = 'dropped'
                AND service_obj_service_name IN ('Delivery', 'Link', 'Auto')
        )
        GROUP BY 1,2

"""

df_swiggy_orders = pd.read_sql(order_logs_snapshot, connection)
df_swiggy_orders.head(2)

Unnamed: 0,city_name,location_hex_8,link_orders,auto_orders,taxi_orders,swiggy_orders,swiggy_median_bill_amount,swiggy_mean_bill_amount,swiggy_total_bill_amount
0,Bangalore,886014c967fffff,35,7,42,24,373.0,421.0,10098.0
1,Bangalore,8861892195fffff,941,484,1425,681,339.0,437.0,297635.0


In [9]:
df_swiggy_orders.to_csv('/Users/rapido/local-datasets/affluence/raw/raw_ols_swiggy_details_{}_{}_{}_{}.csv' \
                      .format(city,service,start_date,end_date), index = False)

In [12]:
df_swiggy_orders = pd.read_csv('/Users/rapido/local-datasets/affluence/raw/raw_ols_swiggy_details_{}_{}_{}_{}.csv' \
                      .format(city,service,start_date,end_date))

## Analysis

In [13]:
df_swiggy_orders.head()

Unnamed: 0,city_name,location_hex_8,link_orders,auto_orders,taxi_orders,swiggy_orders,swiggy_median_bill_amount,swiggy_mean_bill_amount,swiggy_total_bill_amount
0,Bangalore,886014c967fffff,35,7,42,24,373.0,421.0,10098.0
1,Bangalore,8861892195fffff,941,484,1425,681,339.0,437.0,297635.0
2,Bangalore,8860145a93fffff,15,8,23,2,1597.0,952.0,1903.0
3,Bangalore,8861892d53fffff,12,13,25,3,470.0,450.0,1351.0
4,Bangalore,886014521dfffff,2,0,2,0,0.0,0.0,0.0


In [15]:
df_swiggy_orders.location_hex_8.nunique()

2163

In [16]:
df_swiggy_orders[df_swiggy_orders['link_orders'] < df_swiggy_orders['swiggy_orders']].nunique()

city_name                      1
location_hex_8               367
link_orders                  144
auto_orders                  142
taxi_orders                  166
swiggy_orders                168
swiggy_median_bill_amount    237
swiggy_mean_bill_amount      262
swiggy_total_bill_amount     364
dtype: int64

In [17]:
df_affluence_tag_and_swiggy_orders = pd.merge(df_hex_affluence_tag, 
                                    df_swiggy_orders,
                                    how = 'left',
                                    left_on = ['pickup_hex_8'],
                                    right_on = ['location_hex_8']
                                   )
# df_affluence_tag_and_swiggy_orders['link_orders_contribution'] \
#             = (df_affluence_tag_and_swiggy_orders['link_orders']*100/(df_affluence_tag_and_swiggy_orders['link_orders'] + df_affluence_tag_and_swiggy_orders['swiggy_orders']))

# df_affluence_tag_and_swiggy_orders['auto_orders_contribution'] \
#             = (df_affluence_tag_and_swiggy_orders['auto_orders']*100/(df_affluence_tag_and_swiggy_orders['auto_orders'] + df_affluence_tag_and_swiggy_orders['swiggy_orders']))

# df_affluence_tag_and_swiggy_orders['taxi_orders_contribution'] \
#             = (df_affluence_tag_and_swiggy_orders['taxi_orders']*100/(df_affluence_tag_and_swiggy_orders['taxi_orders'] + df_affluence_tag_and_swiggy_orders['swiggy_orders']))

# df_affluence_tag_and_swiggy_orders['swiggy_orders_contribution'] \
#             = (df_affluence_tag_and_swiggy_orders['swiggy_orders']*100/(df_affluence_tag_and_swiggy_orders['link_orders'] + df_affluence_tag_and_swiggy_orders['swiggy_orders']))


df_affluence_tag_and_swiggy_orders = df_affluence_tag_and_swiggy_orders.round(2)
df_affluence_tag_and_swiggy_orders

Unnamed: 0,pickup_hex_8,affluence_tag,city_name,location_hex_8,link_orders,auto_orders,taxi_orders,swiggy_orders,swiggy_median_bill_amount,swiggy_mean_bill_amount,swiggy_total_bill_amount
0,88618920a3fffff,High,Bangalore,88618920a3fffff,16049.0,11293.0,27342.0,7369.0,271.0,330.0,2432131.0
1,8861892581fffff,High,Bangalore,8861892581fffff,4069.0,4201.0,8270.0,441.0,289.0,363.0,159916.0
2,886189258bfffff,High,Bangalore,886189258bfffff,4293.0,4013.0,8306.0,429.0,301.0,386.0,165752.0
3,886189258dfffff,High,Bangalore,886189258dfffff,5884.0,7033.0,12917.0,1574.0,309.0,387.0,608619.0
4,8861892425fffff,High,Bangalore,8861892425fffff,5397.0,9062.0,14459.0,3512.0,296.0,359.0,1259891.0
...,...,...,...,...,...,...,...,...,...,...,...
2680,8860145955fffff,High,Bangalore,8860145955fffff,672.0,601.0,1273.0,327.0,331.0,427.0,139518.0
2681,8860145957fffff,High,Bangalore,8860145957fffff,2376.0,1695.0,4071.0,141.0,273.0,355.0,50004.0
2682,8860145959fffff,High,Bangalore,8860145959fffff,1027.0,1194.0,2221.0,563.0,312.0,399.0,224795.0
2683,886014595bfffff,High,Bangalore,886014595bfffff,796.0,986.0,1782.0,385.0,337.0,423.0,162768.0


In [18]:
df_affluence_tag_and_swiggy_orders[['link_orders', 'auto_orders', 
                                    'taxi_orders', 'swiggy_orders', 
                                    'swiggy_median_bill_amount']].describe([0.25,0.3,0.6,0.65,0.75,0.8,0.85,0.9,0.95])

Unnamed: 0,link_orders,auto_orders,taxi_orders,swiggy_orders,swiggy_median_bill_amount
count,2095.0,2095.0,2095.0,2095.0,2095.0
mean,1046.10358,1138.868735,2184.972315,417.336516,313.101671
std,2202.811007,2730.408532,4806.696473,941.001577,224.885497
min,0.0,0.0,0.0,0.0,0.0
25%,4.0,6.0,12.0,1.0,275.0
30%,8.0,10.0,17.0,2.0,290.0
50%,77.0,64.0,147.0,26.0,320.0
60%,290.8,237.0,546.4,94.4,333.0
65%,491.1,432.4,955.9,161.1,340.1
75%,1069.5,1053.5,2133.5,413.5,366.0


In [19]:
df_affluence_tag_and_swiggy_orders['swiggy_order_flag'] = np.where(df_affluence_tag_and_swiggy_orders['swiggy_orders'] >= df_affluence_tag_and_swiggy_orders.swiggy_orders.quantile(0.80) , 'High Orders', 
                                                             np.where(df_affluence_tag_and_swiggy_orders['swiggy_orders'] >= df_affluence_tag_and_swiggy_orders.swiggy_orders.quantile(0.30) , 'Medium Orders', 
                                                             'Less/No Orders'))

df_affluence_tag_and_swiggy_orders['swiggy_bill_amount_flag'] = np.where(df_affluence_tag_and_swiggy_orders['swiggy_median_bill_amount'] >= df_affluence_tag_and_swiggy_orders.swiggy_median_bill_amount.quantile(0.80)  , 'High Price', 
                                                             np.where(df_affluence_tag_and_swiggy_orders['swiggy_median_bill_amount'] >= df_affluence_tag_and_swiggy_orders.swiggy_median_bill_amount.quantile(0.30) , 'Medium Price', 
                                                            'Less/No Price'))

df_affluence_tag_and_swiggy_orders              

Unnamed: 0,pickup_hex_8,affluence_tag,city_name,location_hex_8,link_orders,auto_orders,taxi_orders,swiggy_orders,swiggy_median_bill_amount,swiggy_mean_bill_amount,swiggy_total_bill_amount,swiggy_order_flag,swiggy_bill_amount_flag
0,88618920a3fffff,High,Bangalore,88618920a3fffff,16049.0,11293.0,27342.0,7369.0,271.0,330.0,2432131.0,High Orders,Less/No Price
1,8861892581fffff,High,Bangalore,8861892581fffff,4069.0,4201.0,8270.0,441.0,289.0,363.0,159916.0,Medium Orders,Less/No Price
2,886189258bfffff,High,Bangalore,886189258bfffff,4293.0,4013.0,8306.0,429.0,301.0,386.0,165752.0,Medium Orders,Medium Price
3,886189258dfffff,High,Bangalore,886189258dfffff,5884.0,7033.0,12917.0,1574.0,309.0,387.0,608619.0,High Orders,Medium Price
4,8861892425fffff,High,Bangalore,8861892425fffff,5397.0,9062.0,14459.0,3512.0,296.0,359.0,1259891.0,High Orders,Medium Price
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2680,8860145955fffff,High,Bangalore,8860145955fffff,672.0,601.0,1273.0,327.0,331.0,427.0,139518.0,Medium Orders,Medium Price
2681,8860145957fffff,High,Bangalore,8860145957fffff,2376.0,1695.0,4071.0,141.0,273.0,355.0,50004.0,Medium Orders,Less/No Price
2682,8860145959fffff,High,Bangalore,8860145959fffff,1027.0,1194.0,2221.0,563.0,312.0,399.0,224795.0,Medium Orders,Medium Price
2683,886014595bfffff,High,Bangalore,886014595bfffff,796.0,986.0,1782.0,385.0,337.0,423.0,162768.0,Medium Orders,Medium Price


### Swiggy Flag

In [20]:
## swiggy_order_flag

df_aff_swiggy = df_affluence_tag_and_swiggy_orders \
                    .groupby([ 'swiggy_order_flag']) \
                    .agg(
                        location_hex_8 = pd.NamedAgg('location_hex_8', 'nunique'),
                        link_orders = pd.NamedAgg('link_orders', 'sum'),
                        auto_orders = pd.NamedAgg('auto_orders', 'sum'),
                        taxi_orders = pd.NamedAgg('taxi_orders', 'sum'),
                        swiggy_orders = pd.NamedAgg('swiggy_orders', 'sum')
                        ).reset_index()
df_aff_swiggy['swiggy_orders_per_hex'] = df_aff_swiggy['swiggy_orders']/df_aff_swiggy['location_hex_8']
df_aff_swiggy['hex_distr'] = df_aff_swiggy['location_hex_8']*100.0/df_aff_swiggy.location_hex_8.sum()
df_aff_swiggy['swiggy_orders_distr'] = df_aff_swiggy['swiggy_orders']*100.0/df_aff_swiggy.swiggy_orders.sum()


df_aff_swiggy_1 = df_aff_swiggy.round(2)

In [21]:
## affluence_tag

df_aff_swiggy = df_affluence_tag_and_swiggy_orders \
                    .groupby([ 'affluence_tag']) \
                    .agg(
                        location_hex_8 = pd.NamedAgg('location_hex_8', 'nunique'),
                        link_orders = pd.NamedAgg('link_orders', 'sum'),
                        auto_orders = pd.NamedAgg('auto_orders', 'sum'),
                        taxi_orders = pd.NamedAgg('taxi_orders', 'sum'),
                        swiggy_orders = pd.NamedAgg('swiggy_orders', 'sum')
                        ).reset_index()
df_aff_swiggy['swiggy_orders_per_hex'] = df_aff_swiggy['swiggy_orders']/df_aff_swiggy['location_hex_8']
df_aff_swiggy['hex_distr'] = df_aff_swiggy['location_hex_8']*100.0/df_aff_swiggy.location_hex_8.sum()
df_aff_swiggy['swiggy_orders_distr'] = df_aff_swiggy['swiggy_orders']*100.0/df_aff_swiggy.swiggy_orders.sum()


df_aff_swiggy_2 = df_aff_swiggy.round(2)

In [22]:
## Overlap

df_aff_swiggy = df_affluence_tag_and_swiggy_orders \
                    .groupby([ 'affluence_tag', 'swiggy_order_flag']) \
                    .agg(
                        location_hex_8 = pd.NamedAgg('location_hex_8', 'nunique'),
                        link_orders = pd.NamedAgg('link_orders', 'sum'),
                        auto_orders = pd.NamedAgg('auto_orders', 'sum'),
                        taxi_orders = pd.NamedAgg('taxi_orders', 'sum'),
                        swiggy_orders = pd.NamedAgg('swiggy_orders', 'sum')
                        ).reset_index()
df_aff_swiggy['swiggy_orders_per_hex'] = df_aff_swiggy['swiggy_orders']/df_aff_swiggy['location_hex_8']
df_aff_swiggy['hex_distr'] = df_aff_swiggy['location_hex_8']*100.0/df_aff_swiggy.location_hex_8.sum()
df_aff_swiggy['swiggy_orders_distr'] = df_aff_swiggy['swiggy_orders']*100.0/df_aff_swiggy.swiggy_orders.sum()


df_aff_swiggy_3 = df_aff_swiggy.round(2)

In [23]:
df_aff_swiggy_1

Unnamed: 0,swiggy_order_flag,location_hex_8,link_orders,auto_orders,taxi_orders,swiggy_orders,swiggy_orders_per_hex,hex_distr,swiggy_orders_distr
0,High Orders,421,1595695.0,1823971.0,3419666.0,735678.0,1747.45,20.1,84.14
1,Less/No Orders,553,3559.0,4322.0,7881.0,157.0,0.28,26.4,0.02
2,Medium Orders,1121,592333.0,557637.0,1149970.0,138485.0,123.54,53.51,15.84


In [24]:
df_aff_swiggy_2

Unnamed: 0,affluence_tag,location_hex_8,link_orders,auto_orders,taxi_orders,swiggy_orders,swiggy_orders_per_hex,hex_distr,swiggy_orders_distr
0,High,1019,1962256.0,2172881.0,4135137.0,814329.0,799.15,48.64,93.14
1,Less,1076,229331.0,213049.0,442380.0,59991.0,55.75,51.36,6.86


In [25]:
df_aff_swiggy_3

Unnamed: 0,affluence_tag,swiggy_order_flag,location_hex_8,link_orders,auto_orders,taxi_orders,swiggy_orders,swiggy_orders_per_hex,hex_distr,swiggy_orders_distr
0,High,High Orders,403,1565276.0,1796095.0,3361371.0,719049.0,1784.24,19.24,82.24
1,High,Less/No Orders,162,512.0,914.0,1426.0,40.0,0.25,7.73,0.0
2,High,Medium Orders,454,396468.0,375872.0,772340.0,95240.0,209.78,21.67,10.89
3,Less,High Orders,18,30419.0,27876.0,58295.0,16629.0,923.83,0.86,1.9
4,Less,Less/No Orders,391,3047.0,3408.0,6455.0,117.0,0.3,18.66,0.01
5,Less,Medium Orders,667,195865.0,181765.0,377630.0,43245.0,64.84,31.84,4.95


In [26]:
# 30| 80| 80+
ha = round(403*100/421,2)
la = round(391*100/396,2)
print('High Swiggy with High Affluence signal overlap percentage',ha)
print('Less Swiggy with Less Affluence signal overlap percentage',la)

High Swiggy with High Affluence signal overlap percentage 95.72
Less Swiggy with Less Affluence signal overlap percentage 98.74


### Billing amount 

In [27]:
df_aff_swiggy = df_affluence_tag_and_swiggy_orders \
                    .groupby([ 'swiggy_bill_amount_flag']) \
                    .agg(
                        location_hex_8 = pd.NamedAgg('location_hex_8', 'nunique'),
                        link_orders = pd.NamedAgg('link_orders', 'sum'),
                        auto_orders = pd.NamedAgg('auto_orders', 'sum'),
                        taxi_orders = pd.NamedAgg('taxi_orders', 'sum'),
                        swiggy_orders = pd.NamedAgg('swiggy_orders', 'sum')
                        ).reset_index()
df_aff_swiggy['swiggy_orders_per_hex'] = df_aff_swiggy['swiggy_orders']/df_aff_swiggy['location_hex_8']
df_aff_swiggy['hex_distr'] = df_aff_swiggy['location_hex_8']*100.0/df_aff_swiggy.location_hex_8.sum()
df_aff_swiggy['swiggy_orders_distr'] = df_aff_swiggy['swiggy_orders']*100.0/df_aff_swiggy.swiggy_orders.sum()

df_aff_swiggy_bill_1 = df_aff_swiggy.round(2)

In [28]:
df_aff_swiggy = df_affluence_tag_and_swiggy_orders \
                    .groupby(['affluence_tag', 'swiggy_order_flag', 'swiggy_bill_amount_flag']) \
                    .agg(
                        location_hex_8 = pd.NamedAgg('location_hex_8', 'nunique'),
                        link_orders = pd.NamedAgg('link_orders', 'sum'),
                        auto_orders = pd.NamedAgg('auto_orders', 'sum'),
                        taxi_orders = pd.NamedAgg('taxi_orders', 'sum'),
                        swiggy_orders = pd.NamedAgg('swiggy_orders', 'sum')
                        ).reset_index()
df_aff_swiggy['swiggy_orders_per_hex'] = df_aff_swiggy['swiggy_orders']/df_aff_swiggy['location_hex_8']
df_aff_swiggy['hex_distr'] = df_aff_swiggy['location_hex_8']*100.0/df_aff_swiggy.location_hex_8.sum()
df_aff_swiggy['swiggy_orders_distr'] = df_aff_swiggy['swiggy_orders']*100.0/df_aff_swiggy.swiggy_orders.sum()

df_aff_swiggy_bill_2 = df_aff_swiggy.round(2)

In [29]:
df_aff_swiggy = df_affluence_tag_and_swiggy_orders \
                    .groupby(['affluence_tag', 'swiggy_bill_amount_flag']) \
                    .agg(
                        location_hex_8 = pd.NamedAgg('location_hex_8', 'nunique'),
                        link_orders = pd.NamedAgg('link_orders', 'sum'),
                        auto_orders = pd.NamedAgg('auto_orders', 'sum'),
                        taxi_orders = pd.NamedAgg('taxi_orders', 'sum'),
                        swiggy_orders = pd.NamedAgg('swiggy_orders', 'sum')
                        ).reset_index()
df_aff_swiggy['swiggy_orders_per_hex'] = df_aff_swiggy['swiggy_orders']/df_aff_swiggy['location_hex_8']
df_aff_swiggy['hex_distr'] = df_aff_swiggy['location_hex_8']*100.0/df_aff_swiggy.location_hex_8.sum()
df_aff_swiggy['swiggy_orders_distr'] = df_aff_swiggy['swiggy_orders']*100.0/df_aff_swiggy.swiggy_orders.sum()

df_aff_swiggy_bill_3 = df_aff_swiggy.round(2)

In [30]:
#df_aff_swiggy_1

In [31]:
#df_aff_swiggy_2

In [32]:
df_aff_swiggy_bill_1

Unnamed: 0,swiggy_bill_amount_flag,location_hex_8,link_orders,auto_orders,taxi_orders,swiggy_orders,swiggy_orders_per_hex,hex_distr,swiggy_orders_distr
0,High Price,420,20268.0,18520.0,38788.0,10173.0,24.22,20.05,1.16
1,Less/No Price,624,414691.0,493507.0,908198.0,149053.0,238.87,29.79,17.05
2,Medium Price,1051,1756628.0,1873903.0,3630531.0,715094.0,680.39,50.17,81.79


In [33]:
df_aff_swiggy_bill_2

Unnamed: 0,affluence_tag,swiggy_order_flag,swiggy_bill_amount_flag,location_hex_8,link_orders,auto_orders,taxi_orders,swiggy_orders,swiggy_orders_per_hex,hex_distr,swiggy_orders_distr
0,High,High Orders,High Price,2,456.0,251.0,707.0,1466.0,733.0,0.1,0.17
1,High,High Orders,Less/No Price,40,331632.0,411180.0,742812.0,135386.0,3384.65,1.91,15.48
2,High,High Orders,Medium Price,361,1233188.0,1384664.0,2617852.0,582197.0,1612.73,17.23,66.59
3,High,Less/No Orders,High Price,26,90.0,66.0,156.0,26.0,1.0,1.24,0.0
4,High,Less/No Orders,Less/No Price,133,396.0,832.0,1228.0,11.0,0.08,6.35,0.0
5,High,Less/No Orders,Medium Price,3,26.0,16.0,42.0,3.0,1.0,0.14,0.0
6,High,Medium Orders,High Price,79,5498.0,5604.0,11102.0,3827.0,48.44,3.77,0.44
7,High,Medium Orders,Less/No Price,53,58784.0,58950.0,117734.0,8332.0,157.21,2.53,0.95
8,High,Medium Orders,Medium Price,322,332186.0,311318.0,643504.0,83081.0,258.02,15.37,9.5
9,Less,High Orders,Less/No Price,2,5196.0,4350.0,9546.0,2483.0,1241.5,0.1,0.28


In [34]:
df_aff_swiggy_bill_3

Unnamed: 0,affluence_tag,swiggy_bill_amount_flag,location_hex_8,link_orders,auto_orders,taxi_orders,swiggy_orders,swiggy_orders_per_hex,hex_distr,swiggy_orders_distr
0,High,High Price,107,6044.0,5921.0,11965.0,5319.0,49.71,5.11,0.61
1,High,Less/No Price,226,390812.0,470962.0,861774.0,143729.0,635.97,10.79,16.44
2,High,Medium Price,686,1565400.0,1695998.0,3261398.0,665281.0,969.8,32.74,76.09
3,Less,High Price,313,14224.0,12599.0,26823.0,4854.0,15.51,14.94,0.56
4,Less,Less/No Price,398,23879.0,22545.0,46424.0,5324.0,13.38,19.0,0.61
5,Less,Medium Price,365,191228.0,177905.0,369133.0,49813.0,136.47,17.42,5.7


In [35]:
#  Median order price at hex level (0| 1-50| 50+)
ha = round(107*100/420,2)
la = round(398*100/624,2)
print('High Swiggy with High Affluence signal overlap percentage',ha)
print('Less Swiggy with Less Affluence signal overlap percentage',la)

High Swiggy with High Affluence signal overlap percentage 25.48
Less Swiggy with Less Affluence signal overlap percentage 63.78


## PPKM

In [132]:
start_date = '20230714' ## '20230703' 20230710 20230717
end_date = '20230716' ## 20230716 20230730

In [133]:
ppkm_data = f"""
        WITH city_cluster_hex AS (

                SELECT
                    cch.hex_id AS hex_id,
                    cch.cluster AS cluster
                FROM
                    datasets.city_cluster_hex cch
                WHERE
                    cch.resolution = 8
            ),

            fare_estimates AS (

                SELECT
                    fe_ench.epoch AS epoch,
                    fe_ench.yyyymmdd AS yyyymmdd,
                    fe_ench.city AS city_name,
                    fe_ench.service_name AS service_name,
                    fe_ench.service_detail_id AS service_detail_id,
                    fe_ench.pickup_location_hex_8 AS pickup_location_hex_8,
                    fe_ench.drop_location_hex_8 AS drop_location_hex_8,
                    fe_ench.fare_estimate_id AS fare_estimate_id,
                    fe_ench.sub_total AS sub_total,
                    fe_ench.final_amount AS final_amount,
                    CASE 
                    WHEN fe_ench.ride_distance BETWEEN 0.00001 AND 0.99 THEN 1.0 
                    ELSE ride_distance END AS ride_distance,
                    fe_ench.quarter_hour

                FROM
                    pricing.fare_estimates_enriched fe_ench
                WHERE
                    yyyymmdd >= '{start_date}'
                    AND yyyymmdd <= '{end_date}'
                    AND fe_ench.service_name IN ('Link') 
                    AND fe_ench.city = 'Bangalore'
                    AND fe_ench.ride_distance > 0
                    AND quarter_hour >= '0700' 
                    AND quarter_hour <= '2300'
            ),

            orders AS (

                SELECT
                    ols.yyyymmdd AS yyyymmdd,
                    ols.service_detail_id AS service_detail_id,
                    ols.service_obj_service_name AS service_name,
                    ols.estimate_id AS fare_estimate_id,
                    ols.order_status AS order_status,
                    ols.order_id AS order_id,
                    ols.accept_to_pickup_distance  AS accept_to_pickup_distance,
                    ols.spd_fraud_flag
                FROM
                    orders.order_logs_snapshot AS ols
                WHERE
                    yyyymmdd >= '{start_date}'
                    AND yyyymmdd <= '{end_date}'
                    AND ols.service_obj_service_name IN ('Link')
                    AND ols.city_name = 'Bangalore'
            )

            SELECT 
                fe.yyyymmdd AS yyyymmdd,
                fe.city_name AS city_name,
                fe.service_name AS service_name,
                fe.service_detail_id AS service_detail_id,

                CASE 
                WHEN pic.hex_id IN ('88618921d3fffff', '88618921c7fffff', '8861892665fffff', '8860145a33fffff', '8861892c97fffff', '886189219bfffff', '88618920b3fffff', '8861892e37fffff', '8861892ed9fffff',
                                    '8861892c1dfffff', '8861892c11fffff', '8861892ea5fffff', '8861892c55fffff', '8861892639fffff', '886189246dfffff', '8861892cbdfffff', '8861892ee5fffff', '88618925c9fffff',
                                    '88618925bbfffff', '8860145a61fffff', '8861892431fffff', '88618926adfffff', '8860145b43fffff', '8861892ed3fffff', '88618925a5fffff')
                THEN 'High Affluence'
                WHEN pic.hex_id IN ('886014594bfffff', '8860145863fffff', '8861892ca3fffff', '8860145829fffff', '8860145a05fffff', '8860169669fffff', '8860145943fffff', '8861892dd5fffff', '886014584dfffff',
                                    '886189244dfffff', '88618925a9fffff', '8860145913fffff', '8861892c0bfffff', '8861892cc1fffff', '8860145ad1fffff', '88618925e3fffff', '886016966bfffff', '88618920e9fffff',
                                    '8861892645fffff', '8861892521fffff', '8861892ccbfffff', '8861892e63fffff', '8861892f13fffff', '8860145ad9fffff', '88618921e3fffff', '8860145b51fffff', '8861892e2bfffff', 
                                    '8861892c8bfffff', '8860145b67fffff', '8861892e2dfffff', '8861892db5fffff', '8860145a21fffff', '8861892db7fffff', '8860145a29fffff', '8861892c95fffff', '8861892eb1fffff', 
                                    '8861892eb7fffff', '8861892eb5fffff', '8860145b53fffff', '8861892e93fffff', '8860145b55fffff', '886014595dfffff', '8861892e85fffff', '8860145b59fffff', '8860169663fffff')
                THEN 'Low Affluence'
                ELSE 'OTHER' END affluence,

                pic.hex_id AS pickup_hex_8,
                pic.cluster AS pickup_location,
                fe.fare_estimate_id AS fare_estimate_id,
                fe.quarter_hour AS quarter_hour,
                ord.order_id AS order_id,
                CASE 
                WHEN order_status = 'dropped' AND (spd_fraud_flag != true OR spd_fraud_flag IS NULL)
                THEN order_id END dropped_order_id,
                ord.order_status AS order_status,
                -- fe.sub_total AS sub_total,
                -- fe.final_amount AS final_amount,
                fe.ride_distance AS ride_distance,
                COALESCE(TRY(fe.sub_total/fe.ride_distance),0) ppkm
            FROM
                fare_estimates AS fe

            LEFT JOIN
                city_cluster_hex AS pic
                ON fe.pickup_location_hex_8 = pic.hex_id

            LEFT JOIN
                orders AS ord
                ON fe.yyyymmdd = ord.yyyymmdd
                AND fe.service_detail_id = ord.service_detail_id
                AND fe.service_name = ord.service_name
                AND fe.fare_estimate_id = ord.fare_estimate_id
                
"""


df_fe_ppkm_data = pd.read_sql(ppkm_data, connection)
df_fe_ppkm_data.head(2)

Unnamed: 0,yyyymmdd,city_name,service_name,service_detail_id,affluence,pickup_hex_8,pickup_location,fare_estimate_id,quarter_hour,order_id,dropped_order_id,order_status,ride_distance,ppkm
0,20230716,Bangalore,Link,57370b61a6855d70057417d1,OTHER,8861892e03fffff,Mahadevpura,64b3911adea6433e70bbf935,1200,64b38caa9b33b462d62b03ab,64b38caa9b33b462d62b03ab,dropped,1.146,46.247818
1,20230714,Bangalore,Link,57370b61a6855d70057417d1,OTHER,8860145129fffff,Gattigere,64b1297f7f295fec236eb249,1615,,,,4.244,15.080113


In [121]:
df11 = df_fe_ppkm_data
df11.to_csv('df11.csv', index = False)

In [131]:
df12 = df_fe_ppkm_data
df12.to_csv('df12.csv', index = False)

In [134]:
df13 = df_fe_ppkm_data
df13.to_csv('df13.csv', index = False)

In [107]:
df2 = df_fe_ppkm_data
df2.to_csv('df2.csv', index = False)

In [135]:
df11 = pd.read_csv('df11.csv')
df12 = pd.read_csv('df12.csv')
df13 = pd.read_csv('df13.csv')
df2 = pd.read_csv('df2.csv')

In [136]:
frames = [df11,df12,df13,df2]
df_fe_ppkm_data = pd.concat(frames)

In [137]:
# df_aff_hex = pd.read_clipboard()
# df_aff_hex.head(2)

In [138]:
df_fe_ppkm_data.quarter_hour.unique()

array([1245,  930, 1430, 2115, 1100, 2045, 1800, 2015,  945, 2100, 1600,
       1915, 1145, 2000, 1945, 1015, 1815, 1030,  715,  745, 1700, 1330,
       1300, 1845, 1715, 1445, 1900, 1515, 2300, 1830, 1115, 1645, 1000,
       1745,  830, 1230, 1730,  815, 2030, 1045, 1400,  915,  800,  900,
       2145, 1630, 1215, 1130, 1530, 2230, 1930, 2130, 1615,  730,  845,
       2215, 1315, 1500, 1345, 1415, 2200, 2245, 1545, 1200,  700])

In [139]:
df_fe_ppkm_data['ppkm'] = pd.to_numeric(df_fe_ppkm_data['ppkm'])

In [140]:
df_fe_ppkm_data['ppkm'] = df_fe_ppkm_data['ppkm'].round()
df_fe_ppkm_data

Unnamed: 0,yyyymmdd,city_name,service_name,service_detail_id,affluence,pickup_hex_8,pickup_location,fare_estimate_id,quarter_hour,order_id,dropped_order_id,order_status,ride_distance,ppkm
0,20230704,Bangalore,Link,57370b61a6855d70057417d1,Low Affluence,8860145b55fffff,Majestic,64a3c8a741cb0ca61bf01ac8,1245,,,,3.627,20.0
1,20230704,Bangalore,Link,57370b61a6855d70057417d1,High Affluence,88618920b3fffff,Marathahalli Village,64a39bdc5bc4613b65ab5fae,930,64a39be1a83c242c0c879188,,customerCancelled,4.610,18.0
2,20230704,Bangalore,Link,57370b61a6855d70057417d1,OTHER,8861892463fffff,Hosapalya,64a3e2d0aa513d85217f35d4,1430,64a3e2e916dced4587913a58,,customerCancelled,2.598,16.0
3,20230704,Bangalore,Link,57370b61a6855d70057417d1,OTHER,8860145165fffff,Subramanyapura,64a43eace85f6baddee53470,2115,,,,1.000,41.0
4,20230704,Bangalore,Link,57370b61a6855d70057417d1,High Affluence,88618920b3fffff,Marathahalli Village,64a3b05b25729d0bd99ee6d0,1100,,,,6.756,14.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12280252,20230725,Bangalore,Link,57370b61a6855d70057417d1,OTHER,886189246bfffff,Kudlu Gate,64bf4b60ddae85643666b3d4,930,64bf4b66139b7a5a366f86ce,,customerCancelled,3.303,15.0
12280253,20230725,Bangalore,Link,57370b61a6855d70057417d1,OTHER,8861892e0dfffff,Mahadevpura RHB Colony,64bfd6023a0d489950f9f242,1930,64bfd60e990dce1a751b9b58,64bfd60e990dce1a751b9b58,dropped,4.749,14.0
12280254,20230725,Bangalore,Link,57370b61a6855d70057417d1,OTHER,8861892cbbfffff,Sahakara Nagar,64bf4f06377996b01db1098e,945,64bf4f09274fd00139c72093,,customerCancelled,5.981,16.0
12280255,20230725,Bangalore,Link,57370b61a6855d70057417d1,OTHER,88601459e9fffff,Dasarahalli,64bfdd029ad3b7db68a0adb6,2000,,,,2.465,21.0


In [169]:
df_fe_ppkm_data.ppkm.describe([0.20,0.3,0.6,0.65,0.75,0.8,0.85,0.9,0.95])

count    2.560189e+07
mean     1.761898e+01
std      7.901521e+00
min      2.000000e+00
20%      1.300000e+01
30%      1.400000e+01
50%      1.500000e+01
60%      1.600000e+01
65%      1.700000e+01
75%      1.800000e+01
80%      2.000000e+01
85%      2.200000e+01
90%      2.600000e+01
95%      3.500000e+01
max      2.470000e+02
Name: ppkm, dtype: float64

In [168]:
## city test

df_hex_level_ppkm = df_fe_ppkm_data[df_fe_ppkm_data['ride_distance'] > 1] \
                    .groupby(['ppkm']) \
                    .agg(
                         fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'),
                         gross_count = pd.NamedAgg('order_id', 'nunique'),
                         net_count = pd.NamedAgg('dropped_order_id', 'nunique')
                        ).reset_index()
df_hex_level_ppkm['fe-rr'] = df_hex_level_ppkm['gross_count']*100.0/df_hex_level_ppkm['fe_count']
df_hex_level_ppkm['g2n'] = df_hex_level_ppkm['net_count']*100.0/df_hex_level_ppkm['gross_count']
df_hex_level_ppkm['fe-net'] = df_hex_level_ppkm['net_count']*100.0/df_hex_level_ppkm['fe_count']

df_hex_level_ppkm = df_hex_level_ppkm.fillna(0)
df_hex_level_ppkm

Unnamed: 0,ppkm,fe_count,gross_count,net_count,fe-rr,g2n,fe-net
0,2.0,2,0,0,0.0,0.0,0.0
1,3.0,37,2,0,5.405405,0.0,0.0


In [172]:
df_hex_level_ppkm[df_hex_level_ppkm['fe_count'] >= df_hex_level_ppkm.fe_count.quantile(.9)]

Unnamed: 0,ppkm,fe_count,gross_count,net_count,fe-rr,g2n,fe-net
9,11.0,698214,95461,40168,13.672169,42.077917,5.752964
10,12.0,4049269,605684,288639,14.95786,47.655048,7.128175
11,13.0,2780029,427856,202038,15.390343,47.221028,7.267478
12,14.0,2896412,483472,253739,16.6921,52.482667,8.760459
13,15.0,2960852,501338,262589,16.932221,52.377637,8.868697
14,16.0,2262026,369052,191369,16.315109,51.85421,8.460071
15,17.0,2011379,301468,155660,14.988125,51.634004,7.738969
16,18.0,1446265,227193,120704,15.708947,53.128397,8.345912
17,19.0,971421,157741,84760,16.238171,53.733652,8.725362
18,20.0,726194,116493,62632,16.041581,53.764604,8.624693


In [173]:
df_hex_level_ppkm = df_fe_ppkm_data[df_fe_ppkm_data['ride_distance'] > 1] \
                    .groupby(['pickup_location', 'pickup_hex_8','ppkm']) \
                    .agg(
                         fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'),
                         gross_count = pd.NamedAgg('order_id', 'nunique'),
                         net_count = pd.NamedAgg('dropped_order_id', 'nunique')
                        ).reset_index()
df_hex_level_ppkm['fe-rr'] = df_hex_level_ppkm['gross_count']*100.0/df_hex_level_ppkm['fe_count']
df_hex_level_ppkm['g2n'] = df_hex_level_ppkm['net_count']*100.0/df_hex_level_ppkm['gross_count']
df_hex_level_ppkm['fe-net'] = df_hex_level_ppkm['net_count']*100.0/df_hex_level_ppkm['fe_count']

df_hex_level_ppkm = df_hex_level_ppkm.fillna(0)
df_hex_level_ppkm.head(2)

Unnamed: 0,pickup_location,pickup_hex_8,ppkm,fe_count,gross_count,net_count,fe-rr,g2n,fe-net
0,AECS Layout Brookefield,88618920a3fffff,7.0,1,0,0,0.0,0.0,0.0
1,AECS Layout Brookefield,88618920a3fffff,8.0,15,2,0,13.333333,0.0,0.0


In [174]:
df_hex_level_ppkm.fe_count.describe()

count    67753.000000
mean       367.897053
std       1461.311703
min          1.000000
25%          4.000000
50%         20.000000
75%        120.000000
max      40177.000000
Name: fe_count, dtype: float64

In [247]:
df_hex_level_ppkm_fe_filter = df_hex_level_ppkm[df_hex_level_ppkm['fe_count'] > df_hex_level_ppkm.fe_count.quantile(0.5)]
# df_hex_level_ppkm_fe_filter = df_hex_level_ppkm

In [248]:
df_hex_level_ppkm_fe_filter = df_hex_level_ppkm_fe_filter.round(2)
df_hex_level_ppkm_fe_filter

Unnamed: 0,pickup_location,pickup_hex_8,ppkm,fe_count,gross_count,net_count,fe-rr,g2n,fe-net
2,AECS Layout Brookefield,88618920a3fffff,9.0,86,5,1,5.81,20.00,1.16
3,AECS Layout Brookefield,88618920a3fffff,10.0,314,43,7,13.69,16.28,2.23
4,AECS Layout Brookefield,88618920a3fffff,11.0,2488,411,197,16.52,47.93,7.92
5,AECS Layout Brookefield,88618920a3fffff,12.0,22586,3682,1971,16.30,53.53,8.73
6,AECS Layout Brookefield,88618920a3fffff,13.0,15240,2214,1106,14.53,49.95,7.26
...,...,...,...,...,...,...,...,...,...
67737,Yeshwanthpur - Mathikere,886014594bfffff,45.0,35,3,3,8.57,100.00,8.57
67738,Yeshwanthpur - Mathikere,886014594bfffff,46.0,61,14,4,22.95,28.57,6.56
67739,Yeshwanthpur - Mathikere,886014594bfffff,47.0,27,4,3,14.81,75.00,11.11
67740,Yeshwanthpur - Mathikere,886014594bfffff,48.0,29,1,1,3.45,100.00,3.45


In [249]:
df_hex_level_fe_threshold = df_hex_level_ppkm_fe_filter \
                                    .groupby(['pickup_location', 'pickup_hex_8']) \
                                    .agg(fe_threshold=pd.NamedAgg('fe_count', lambda x: x.quantile(0.90)))\
                                    .reset_index()
df_hex_level_fe_threshold

Unnamed: 0,pickup_location,pickup_hex_8,fe_threshold
0,AECS Layout Brookefield,88618920a3fffff,13460.5
1,Adugodi,8861892581fffff,6026.5
2,Adugodi,886189258bfffff,5978.0
3,Adugodi Traffic Station,886189258dfffff,6754.8
4,Agara Lake,8861892425fffff,5227.4
...,...,...,...
2043,Yeshwanthpur,8860145955fffff,1264.5
2044,Yeshwanthpur,8860145957fffff,3303.0
2045,Yeshwanthpur,8860145959fffff,1084.4
2046,Yeshwanthpur,886014595bfffff,1003.6


In [250]:
df_hex_level_threshold_merge = pd.merge(df_hex_level_ppkm_fe_filter,
                                        df_hex_level_fe_threshold,
                                        how = 'inner',
                                        left_on = ['pickup_location', 'pickup_hex_8'],
                                        right_on = ['pickup_location', 'pickup_hex_8']
                                       )
df_hex_level_threshold_merge = df_hex_level_threshold_merge[df_hex_level_threshold_merge['fe_count'] >= df_hex_level_threshold_merge['fe_threshold']]
df_hex_level_threshold_merge

Unnamed: 0,pickup_location,pickup_hex_8,ppkm,fe_count,gross_count,net_count,fe-rr,g2n,fe-net,fe_threshold
3,AECS Layout Brookefield,88618920a3fffff,12.0,22586,3682,1971,16.30,53.53,8.73,13460.5
4,AECS Layout Brookefield,88618920a3fffff,13.0,15240,2214,1106,14.53,49.95,7.26,13460.5
5,AECS Layout Brookefield,88618920a3fffff,14.0,16055,2863,1536,17.83,53.65,9.57,13460.5
6,AECS Layout Brookefield,88618920a3fffff,15.0,20049,3784,2057,18.87,54.36,10.26,13460.5
7,AECS Layout Brookefield,88618920a3fffff,16.0,14392,2654,1330,18.44,50.11,9.24,13460.5
...,...,...,...,...,...,...,...,...,...,...
33386,Yeshwanthpur - Mathikere,886014594bfffff,12.0,7174,1131,660,15.77,58.36,9.20,2897.0
33387,Yeshwanthpur - Mathikere,886014594bfffff,13.0,4632,724,431,15.63,59.53,9.30,2897.0
33388,Yeshwanthpur - Mathikere,886014594bfffff,14.0,5555,1003,647,18.06,64.51,11.65,2897.0
33389,Yeshwanthpur - Mathikere,886014594bfffff,15.0,5608,1143,666,20.38,58.27,11.88,2897.0


In [251]:
df_hex_level_max_fe_net_rate = df_hex_level_threshold_merge \
                                    .groupby(['pickup_location', 'pickup_hex_8']) \
                                    .agg( max_fe_rr = pd.NamedAgg('fe-rr' , 'max'), 
                                         max_fe_net = pd.NamedAgg('fe-net' , 'max'))\
                                    .reset_index()
df_hex_level_max_fe_net_rate

Unnamed: 0,pickup_location,pickup_hex_8,max_fe_rr,max_fe_net
0,AECS Layout Brookefield,88618920a3fffff,18.87,10.26
1,Adugodi,8861892581fffff,15.70,7.32
2,Adugodi,886189258bfffff,17.80,7.99
3,Adugodi Traffic Station,886189258dfffff,15.93,7.57
4,Agara Lake,8861892425fffff,18.20,10.25
...,...,...,...,...
2043,Yeshwanthpur,8860145955fffff,12.50,7.19
2044,Yeshwanthpur,8860145957fffff,19.61,10.38
2045,Yeshwanthpur,8860145959fffff,17.84,11.26
2046,Yeshwanthpur,886014595bfffff,15.11,9.70


In [252]:
df_hex_level_threshold_merge1 = pd.merge(df_hex_level_threshold_merge,
                                        df_hex_level_max_fe_net_rate,
                                        how = 'inner',
                                        left_on = ['pickup_location', 'pickup_hex_8'],
                                        right_on = ['pickup_location', 'pickup_hex_8']
                                       )
df_hex_level_threshold_merge1

Unnamed: 0,pickup_location,pickup_hex_8,ppkm,fe_count,gross_count,net_count,fe-rr,g2n,fe-net,fe_threshold,max_fe_rr,max_fe_net
0,AECS Layout Brookefield,88618920a3fffff,12.0,22586,3682,1971,16.30,53.53,8.73,13460.5,18.87,10.26
1,AECS Layout Brookefield,88618920a3fffff,13.0,15240,2214,1106,14.53,49.95,7.26,13460.5,18.87,10.26
2,AECS Layout Brookefield,88618920a3fffff,14.0,16055,2863,1536,17.83,53.65,9.57,13460.5,18.87,10.26
3,AECS Layout Brookefield,88618920a3fffff,15.0,20049,3784,2057,18.87,54.36,10.26,13460.5,18.87,10.26
4,AECS Layout Brookefield,88618920a3fffff,16.0,14392,2654,1330,18.44,50.11,9.24,13460.5,18.87,10.26
...,...,...,...,...,...,...,...,...,...,...,...,...
4483,Yeshwanthpur - Mathikere,886014594bfffff,12.0,7174,1131,660,15.77,58.36,9.20,2897.0,20.38,12.77
4484,Yeshwanthpur - Mathikere,886014594bfffff,13.0,4632,724,431,15.63,59.53,9.30,2897.0,20.38,12.77
4485,Yeshwanthpur - Mathikere,886014594bfffff,14.0,5555,1003,647,18.06,64.51,11.65,2897.0,20.38,12.77
4486,Yeshwanthpur - Mathikere,886014594bfffff,15.0,5608,1143,666,20.38,58.27,11.88,2897.0,20.38,12.77


In [253]:
df_hex_level_threshold = df_hex_level_threshold_merge1[
                                                       (df_hex_level_threshold_merge1['fe-rr'] == df_hex_level_threshold_merge1['max_fe_rr'])

#                                                         (df_hex_level_threshold_merge['fe-net'] == df_hex_level_threshold_merge['max_fe_rr'])
                                                    ]
df_hex_level_threshold

Unnamed: 0,pickup_location,pickup_hex_8,ppkm,fe_count,gross_count,net_count,fe-rr,g2n,fe-net,fe_threshold,max_fe_rr,max_fe_net
3,AECS Layout Brookefield,88618920a3fffff,15.0,20049,3784,2057,18.87,54.36,10.26,13460.5,18.87,10.26
8,Adugodi,8861892581fffff,15.0,6760,1061,495,15.70,46.65,7.32,6026.5,15.70,7.32
12,Adugodi,886189258bfffff,15.0,7433,1323,594,17.80,44.90,7.99,5978.0,17.80,7.99
16,Adugodi Traffic Station,886189258dfffff,15.0,7432,1184,555,15.93,46.88,7.47,6754.8,15.93,7.57
21,Agara Lake,8861892425fffff,16.0,5461,994,560,18.20,56.34,10.25,5227.4,18.20,10.25
...,...,...,...,...,...,...,...,...,...,...,...,...
4471,Yeshwanthpur,8860145955fffff,14.0,1336,167,96,12.50,57.49,7.19,1264.5,12.50,7.19
4475,Yeshwanthpur,8860145957fffff,15.0,3574,701,365,19.61,52.07,10.21,3303.0,19.61,10.38
4479,Yeshwanthpur,8860145959fffff,15.0,1368,244,154,17.84,63.11,11.26,1084.4,17.84,11.26
4482,Yeshwanthpur,886014595bfffff,14.0,1072,162,104,15.11,64.20,9.70,1003.6,15.11,9.70


In [254]:
df_hex_level_threshold['ppkm_flag'] = np.where(df_hex_level_threshold['ppkm'] > 15, 'High ppkm', 'Less ppkm')
df_hex_level_threshold  = pd.merge(df_hex_level_threshold,
                                   df_hex_affluence_tag,
                                   how = 'left',
                                   left_on = ['pickup_hex_8'],
                                   right_on = ['pickup_hex_8']
                                  )
df_hex_level_threshold

Unnamed: 0,pickup_location,pickup_hex_8,ppkm,fe_count,gross_count,net_count,fe-rr,g2n,fe-net,fe_threshold,max_fe_rr,max_fe_net,ppkm_flag,affluence_tag
0,AECS Layout Brookefield,88618920a3fffff,15.0,20049,3784,2057,18.87,54.36,10.26,13460.5,18.87,10.26,Less ppkm,High
1,Adugodi,8861892581fffff,15.0,6760,1061,495,15.70,46.65,7.32,6026.5,15.70,7.32,Less ppkm,High
2,Adugodi,886189258bfffff,15.0,7433,1323,594,17.80,44.90,7.99,5978.0,17.80,7.99,Less ppkm,High
3,Adugodi Traffic Station,886189258dfffff,15.0,7432,1184,555,15.93,46.88,7.47,6754.8,15.93,7.57,Less ppkm,High
4,Agara Lake,8861892425fffff,16.0,5461,994,560,18.20,56.34,10.25,5227.4,18.20,10.25,High ppkm,High
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2051,Yeshwanthpur,8860145955fffff,14.0,1336,167,96,12.50,57.49,7.19,1264.5,12.50,7.19,Less ppkm,High
2052,Yeshwanthpur,8860145957fffff,15.0,3574,701,365,19.61,52.07,10.21,3303.0,19.61,10.38,Less ppkm,High
2053,Yeshwanthpur,8860145959fffff,15.0,1368,244,154,17.84,63.11,11.26,1084.4,17.84,11.26,Less ppkm,High
2054,Yeshwanthpur,886014595bfffff,14.0,1072,162,104,15.11,64.20,9.70,1003.6,15.11,9.70,Less ppkm,High


In [255]:
df_hex_level_threshold

Unnamed: 0,pickup_location,pickup_hex_8,ppkm,fe_count,gross_count,net_count,fe-rr,g2n,fe-net,fe_threshold,max_fe_rr,max_fe_net,ppkm_flag,affluence_tag
0,AECS Layout Brookefield,88618920a3fffff,15.0,20049,3784,2057,18.87,54.36,10.26,13460.5,18.87,10.26,Less ppkm,High
1,Adugodi,8861892581fffff,15.0,6760,1061,495,15.70,46.65,7.32,6026.5,15.70,7.32,Less ppkm,High
2,Adugodi,886189258bfffff,15.0,7433,1323,594,17.80,44.90,7.99,5978.0,17.80,7.99,Less ppkm,High
3,Adugodi Traffic Station,886189258dfffff,15.0,7432,1184,555,15.93,46.88,7.47,6754.8,15.93,7.57,Less ppkm,High
4,Agara Lake,8861892425fffff,16.0,5461,994,560,18.20,56.34,10.25,5227.4,18.20,10.25,High ppkm,High
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2051,Yeshwanthpur,8860145955fffff,14.0,1336,167,96,12.50,57.49,7.19,1264.5,12.50,7.19,Less ppkm,High
2052,Yeshwanthpur,8860145957fffff,15.0,3574,701,365,19.61,52.07,10.21,3303.0,19.61,10.38,Less ppkm,High
2053,Yeshwanthpur,8860145959fffff,15.0,1368,244,154,17.84,63.11,11.26,1084.4,17.84,11.26,Less ppkm,High
2054,Yeshwanthpur,886014595bfffff,14.0,1072,162,104,15.11,64.20,9.70,1003.6,15.11,9.70,Less ppkm,High


In [256]:
df_hex_level_threshold[df_hex_level_threshold['pickup_hex_8'].isin(['8861892cd7fffff', '8861892c8bfffff'])]

Unnamed: 0,pickup_location,pickup_hex_8,ppkm,fe_count,gross_count,net_count,fe-rr,g2n,fe-net,fe_threshold,max_fe_rr,max_fe_net,ppkm_flag,affluence_tag
842,Hebbal,8861892c8bfffff,15.0,3282,571,298,17.4,52.19,9.08,2968.8,17.4,9.08,Less ppkm,Less
845,Hebbal,8861892cd7fffff,14.0,3825,598,363,15.63,60.7,9.49,2575.1,15.63,9.49,Less ppkm,Less


In [257]:
df_hex_level_threshold[df_hex_level_threshold['pickup_hex_8'] == '8861892cd7fffff']

Unnamed: 0,pickup_location,pickup_hex_8,ppkm,fe_count,gross_count,net_count,fe-rr,g2n,fe-net,fe_threshold,max_fe_rr,max_fe_net,ppkm_flag,affluence_tag
845,Hebbal,8861892cd7fffff,14.0,3825,598,363,15.63,60.7,9.49,2575.1,15.63,9.49,Less ppkm,Less


In [258]:
df_hex_level_threshold_dis = df_hex_level_threshold \
                        .groupby(['affluence_tag', 'ppkm_flag']) \
                        .agg({'pickup_hex_8' : 'nunique'}) \
                        .reset_index()
# df_hex_level_threshold_dis['hex_dist'] = df_hex_level_threshold_dis['pickup_hex_8']*100.0/df_hex_level_threshold_dis.pickup_hex_8.sum()

df_hex_level_threshold_dis.round(2)

Unnamed: 0,affluence_tag,ppkm_flag,pickup_hex_8
0,High,High ppkm,68
1,High,Less ppkm,895
2,Less,High ppkm,17
3,Less,Less ppkm,1061


In [259]:
test_ppkm = df_hex_level_threshold \
                        .groupby(['affluence_tag', 'pickup_hex_8', 'ppkm_flag']) \
                        .agg({'fe_count' : 'sum'}) \
                        .reset_index()

In [260]:
test_ppkm.to_clipboard(index=False)

In [429]:
# less aff overlap
764/(338+764)

0.6932849364791288

In [430]:
# high aff overlap
630/(331+630)

0.6555671175858481

In [432]:
df_hex_level_threshold[(df_hex_level_threshold['affluence_tag'] == 'Less' )
                        &
                        (df_hex_level_threshold['ppkm_flag'] == 'Less ppkm')
                        ]

Unnamed: 0,pickup_location,pickup_hex_8,ppkm,fe_count,gross_count,net_count,fe-rr,g2n,fe-net,max_fe_net,ppkm_flag,affluence_tag
60,Attibele,8861893401fffff,11.0,90,8,4,8.89,50.0,4.44,4.44,Less ppkm,Less
61,Attibele,8861893403fffff,13.0,37,15,5,40.54,33.33,13.51,13.51,Less ppkm,Less
62,Attibele,8861893405fffff,10.0,20,4,1,20.0,25.0,5.0,5.0,Less ppkm,Less
63,Attibele,8861893407fffff,14.0,65,10,7,15.38,70.0,10.77,10.77,Less ppkm,Less
65,Attibele,886189340bfffff,10.0,23,3,1,13.04,33.33,4.35,4.35,Less ppkm,Less
66,Attibele,886189340dfffff,11.0,93,11,5,11.83,45.45,5.38,5.38,Less ppkm,Less
67,Attibele,8861893411fffff,14.0,35,1,1,2.86,100.0,2.86,2.86,Less ppkm,Less
70,Attibele,8861893415fffff,12.0,39,2,1,5.13,50.0,2.56,2.56,Less ppkm,Less
71,Attibele,8861893417fffff,10.0,39,3,2,7.69,66.67,5.13,5.13,Less ppkm,Less
72,Attibele,8861893423fffff,12.0,25,5,1,20.0,20.0,4.0,4.0,Less ppkm,Less


## Other details 

In [291]:
df_overlap_details.head()

Unnamed: 0,pickup_hex_8,affluence_tag
0,88618921d3fffff,High Affluence
1,88618921c7fffff,High Affluence
2,8861892665fffff,High Affluence
3,8860145a33fffff,High Affluence
4,8861892c97fffff,High Affluence


In [177]:
df_overlap_and_swiggy_orders = pd.merge(df_overlap_details, 
                                    df_swiggy_orders,
                                    how = 'inner',
                                    left_on = ['pickup_hex_8'],
                                    right_on = ['location_hex_8']
                                   )

In [178]:
df_overlap_and_swiggy_orders.swiggy_orders.describe([0.75,0.8,0.85,0.9,0.95])

count      70.000000
mean      697.485714
std       937.462328
min         1.000000
50%       350.000000
75%       858.750000
80%      1056.800000
85%      1179.000000
90%      1279.700000
95%      2066.050000
max      5667.000000
Name: swiggy_orders, dtype: float64

In [194]:
df_overlap_and_swiggy_orders['swiggy_order_flag'] = np.where(df_overlap_and_swiggy_orders['swiggy_orders'] >= df_overlap_and_swiggy_orders.swiggy_orders.quantile(0.80) , 'High Swiggy', 
                                                             np.where(df_overlap_and_swiggy_orders['swiggy_orders'] >= df_overlap_and_swiggy_orders.swiggy_orders.quantile(0.20) , 'Medium Swiggy', 
                                                             'Less Swiggy'))
df_overlap_and_swiggy_orders.head(2)

Unnamed: 0,pickup_hex_8,affluence_tag,city_name,location_hex_8,link_orders,auto_orders,taxi_orders,swiggy_orders,swiggy_median_bill_amount,swiggy_mean_bill_amount,swiggy_total_bill_amount,swiggy_order_flag,swiggy_bill_amount_flag
0,88618921d3fffff,High Affluence,Bangalore,88618921d3fffff,5513,4892,10405,3059,326.0,407.0,1246327.0,High Swiggy,Medium Price
1,88618921c7fffff,High Affluence,Bangalore,88618921c7fffff,2074,1301,3375,1186,336.0,432.0,511943.0,High Swiggy,Medium Price


In [195]:
df_overlap_swiggy = df_overlap_and_swiggy_orders \
                    .groupby(['swiggy_order_flag']) \
                    .agg(
                        location_hex_8 = pd.NamedAgg('location_hex_8', 'nunique'),
                        link_orders = pd.NamedAgg('link_orders', 'sum'),
                        auto_orders = pd.NamedAgg('auto_orders', 'sum'),
                        taxi_orders = pd.NamedAgg('taxi_orders', 'sum'),
                        swiggy_orders = pd.NamedAgg('swiggy_orders', 'sum')
                        ).reset_index()
df_overlap_swiggy['swiggy_orders_per_hex'] = df_overlap_swiggy['swiggy_orders']/df_overlap_swiggy['location_hex_8']
df_overlap_swiggy['hex_distr'] = df_overlap_swiggy['location_hex_8']*100.0/df_overlap_swiggy.location_hex_8.sum()
df_overlap_swiggy['swiggy_orders_distr'] = df_overlap_swiggy['swiggy_orders']*100.0/df_overlap_swiggy.swiggy_orders.sum()


df_overlap_swiggy_1 = df_overlap_swiggy.round(2)

In [196]:
df_overlap_swiggy = df_overlap_and_swiggy_orders \
                    .groupby(['affluence_tag']) \
                    .agg(
                        location_hex_8 = pd.NamedAgg('location_hex_8', 'nunique'),
                        link_orders = pd.NamedAgg('link_orders', 'sum'),
                        auto_orders = pd.NamedAgg('auto_orders', 'sum'),
                        taxi_orders = pd.NamedAgg('taxi_orders', 'sum'),
                        swiggy_orders = pd.NamedAgg('swiggy_orders', 'sum')
                        ).reset_index()
df_overlap_swiggy['swiggy_orders_per_hex'] = df_overlap_swiggy['swiggy_orders']/df_overlap_swiggy['location_hex_8']
df_overlap_swiggy['hex_distr'] = df_overlap_swiggy['location_hex_8']*100.0/df_overlap_swiggy.location_hex_8.sum()
df_overlap_swiggy['swiggy_orders_distr'] = df_overlap_swiggy['swiggy_orders']*100.0/df_overlap_swiggy.swiggy_orders.sum()


df_overlap_swiggy_2 = df_overlap_swiggy.round(2)

In [197]:
df_overlap_swiggy = df_overlap_and_swiggy_orders \
                    .groupby(['affluence_tag','swiggy_order_flag']) \
                    .agg(
                        location_hex_8 = pd.NamedAgg('location_hex_8', 'nunique'),
                        link_orders = pd.NamedAgg('link_orders', 'sum'),
                        auto_orders = pd.NamedAgg('auto_orders', 'sum'),
                        taxi_orders = pd.NamedAgg('taxi_orders', 'sum'),
                        swiggy_orders = pd.NamedAgg('swiggy_orders', 'sum')
                        ).reset_index()
df_overlap_swiggy['swiggy_orders_per_hex'] = df_overlap_swiggy['swiggy_orders']/df_overlap_swiggy['location_hex_8']
df_overlap_swiggy['hex_distr'] = df_overlap_swiggy['location_hex_8']*100.0/df_overlap_swiggy.location_hex_8.sum()
df_overlap_swiggy['swiggy_orders_distr'] = df_overlap_swiggy['swiggy_orders']*100.0/df_overlap_swiggy.swiggy_orders.sum()


df_overlap_swiggy_3 = df_overlap_swiggy.round(2)

In [198]:
df_overlap_swiggy_1

Unnamed: 0,swiggy_order_flag,location_hex_8,link_orders,auto_orders,taxi_orders,swiggy_orders,swiggy_orders_per_hex,hex_distr,swiggy_orders_distr
0,High Swiggy,14,60995,69815,130810,28676,2048.29,20.0,58.73
1,Less Swiggy,14,24856,20963,45819,1164,83.14,20.0,2.38
2,Medium Swiggy,42,110990,104703,215693,18984,452.0,60.0,38.88


In [199]:
df_overlap_swiggy_2

Unnamed: 0,affluence_tag,location_hex_8,link_orders,auto_orders,taxi_orders,swiggy_orders,swiggy_orders_per_hex,hex_distr,swiggy_orders_distr
0,High Affluence,25,95031,97137,192168,34908,1396.32,35.71,71.5
1,Low Affluence,45,101810,98344,200154,13916,309.24,64.29,28.5


In [200]:
df_overlap_swiggy_3

Unnamed: 0,affluence_tag,swiggy_order_flag,location_hex_8,link_orders,auto_orders,taxi_orders,swiggy_orders,swiggy_orders_per_hex,hex_distr,swiggy_orders_distr
0,High Affluence,High Swiggy,13,59185,68755,127940,27372,2105.54,18.57,56.06
1,High Affluence,Medium Swiggy,12,35846,28382,64228,7536,628.0,17.14,15.44
2,Low Affluence,High Swiggy,1,1810,1060,2870,1304,1304.0,1.43,2.67
3,Low Affluence,Less Swiggy,14,24856,20963,45819,1164,83.14,20.0,2.38
4,Low Affluence,Medium Swiggy,30,75144,76321,151465,11448,381.6,42.86,23.45


In [201]:
# 20| 80| 80+
ha = round(13*100/14,2)
la = round(14*100/14,2)
print('High Swiggy with High Affluence signal overlap percentage',ha)
print('Less Swiggy with Less Affluence signal overlap percentage',la)

High Swiggy with High Affluence signal overlap percentage 92.86
Less Swiggy with Less Affluence signal overlap percentage 100.0


### Summary
#### Less 0-50 quantile | Medium 50-80 quantile | High 80 Above quantile

- Bangalore city all hex<br>
    High Swiggy with High Affluence signal overlap percentage 98.06<br>
    Less Swiggy with Less Affluence signal overlap percentage 67.43<br><br>
- 70 hex<br>
    High Swiggy with High Affluence signal overlap percentage 92.86<br>
    Less Swiggy with Less Affluence signal overlap percentage 97.14<br><br><br>
    
#### Less 0-35 quantile | Medium 35-75 quantile | High 75 Above quantile

- Bangalore city all hex<br>
    High Swiggy with High Affluence signal overlap percentage 97.53<br>
    Less Swiggy with Less Affluence signal overlap percentage 69.87<br><br>
- 70 hex<br>
    High Swiggy with High Affluence signal overlap percentage 83.33<br>
    Less Swiggy with Less Affluence signal overlap percentage 96.0<br><br><br>
    
#### Less 0-20 quantile | Medium 20-80 quantile | High 80 Above quantile

- Bangalore city all hex<br>
    High Swiggy with High Affluence signal overlap percentage 98.06<br>
    Less Swiggy with Less Affluence signal overlap percentage 72.46<br><br>
- 70 hex<br>
    High Swiggy with High Affluence signal overlap percentage 92.86<br>
    Less Swiggy with Less Affluence signal overlap percentage 100.0

## 30% of Low Affluence hex_8 with negative funnel rate.

BGS Kengeri	- 8860145ad1fffff -> Minor change in PPKM (-0.03) 
<br>
Mathikere Lake	- 8860145943fffff -> PPKM Increased by (0.04)
<br>
Mysore rd	- 8860145a05fffff -> PPKM Increased by (0.11)
<br>
Sahakara Nagar	- 8861892c95fffff -> Minor change in PPKM (-0.03)
<br>
Vijayanagar	- 886014584dfffff -> Minor change in PPKM (-0.03) 
<br>
Yelanhanka	- 8861892db7fffff -> Minor change in PPKM (-0.07) 
<br>
Yeshwanthpur - Mathikere	886014594bfffff -> PPKM Increased by (0.06) 