## Imports

In [1]:
import h3 as h3
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from scipy import stats
from pyhive import presto
from keplergl import KeplerGl
from datetime import datetime, timedelta

import warnings
warnings.filterwarnings('ignore')

In [2]:
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 500)

## Connection

In [3]:
## Connection
connection = presto.connect(
        host='presto-gateway.serving.data.production.internal',
        port=80,
        protocol='http',
        catalog='hive',
        username='manoj.ravirajan@rapido.bike'
)

## Dataset

In [4]:
## Generate date range

start_date = '20230703'
end_date = '20230730'
city = 'Bangalore'
service = 'Auto'

In [5]:
## datasets.service_mapping

service_mapping = f"""
        SELECT 
            city_display_name AS city,
            service_level AS service_name,
            service_detail_id,
            city_id,
            service_id
        FROM 
            datasets.service_mapping
        WHERE 
            city_display_name = '{city}'
            AND service_level = '{service}'
"""

df_service_mapping = pd.read_sql(service_mapping, connection)
service_detail_id = df_service_mapping.service_detail_id.loc[0]
df_service_mapping
# service_detail_id

Unnamed: 0,city,service_name,service_detail_id,city_id,service_id
0,Bangalore,Auto,5c53562fceb6fc9241980547,572ca7ff116b5db3057bd814,5bd6c6e2e79cc313a94728d0


In [91]:
df_hex_affluence_tag = pd.read_csv('/Users/rapido/local-datasets/affluence/main/hex_affluence_tag.csv')
df_hex_affluence_tag =df_hex_affluence_tag[['city_name', 'pickup_hex_8', 'affluence_tag', 'demand_bucket']]
df_hex_affluence_tag

Unnamed: 0,city_name,pickup_hex_8,affluence_tag,demand_bucket
0,Bangalore,88618920a3fffff,High,Highest
1,Bangalore,8861892581fffff,High,Highest
2,Bangalore,886189258bfffff,High,Highest
3,Bangalore,886189258dfffff,High,Highest
4,Bangalore,8861892425fffff,High,Highest
...,...,...,...,...
2680,Bangalore,8860145955fffff,High,High
2681,Bangalore,8860145957fffff,High,Highest
2682,Bangalore,8860145959fffff,High,High
2683,Bangalore,886014595bfffff,High,High


In [None]:
## orders.order_logs_snapshot --> Tips

order_logs_snapshot = f"""
        
        SELECT 
            yyyymmdd,
            city_name,
            hex.cluster,
            pickup_location_hex_8,
            order_id,
            order_status,
            tip_amount,
            tip_status,
            CASE 
            WHEN tip_status = 'paid' THEN 1
            END AS tipped_orders
        FROM
            orders.order_logs_snapshot ols 
            
        LEFT JOIN      
            datasets.city_cluster_hex hex
            ON pickup_location_hex_8 = hex_id
            AND hex.resolution = 8
            AND hex.city = '{city}'
            
        WHERE 
            yyyymmdd >= '{start_date}'
            AND yyyymmdd <= '{end_date}'
            AND service_detail_id = '{service_detail_id}'
            AND order_status = 'dropped'
            AND (spd_fraud_flag = false OR spd_fraud_flag IS NULL)
            
        GROUP BY 1,2,3,4,5,6,7,8,9
"""

df_tip_details = pd.read_sql(order_logs_snapshot, connection)
df_tip_details.head()

In [None]:
df_tip_details.to_csv('/Users/rapido/local-datasets/affluence/raw/raw_ols_tip_details_{}_{}_{}_{}.csv' \
                      .format(city,service,start_date,end_date), index = False)

In [92]:
df_tip_details = pd.read_csv('/Users/rapido/local-datasets/affluence/raw/raw_ols_tip_details_{}_{}_{}_{}.csv' \
                      .format(city,service,start_date,end_date))

In [93]:
df_tip_details.head(3)

Unnamed: 0,yyyymmdd,city_name,cluster,pickup_location_hex_8,order_id,order_status,tip_amount,tip_status,tipped_orders
0,20230706,Bangalore,Doddanekundi,8861892e57fffff,64a658ef7a50846a2bdbaed7,dropped,,,
1,20230706,Bangalore,Frazer Town,8861892ebbfffff,64a656ca0af101632c8a90c0,dropped,,,
2,20230706,Bangalore,Banaswadi,8861892eadfffff,64a6b28bc7f9b07136c23a45,dropped,,,


## Analysis

In [94]:
df_hex_level_tip_details = df_tip_details \
                                .groupby(['cluster', 'pickup_location_hex_8']) \
                                .agg(
                                    total_orders = pd.NamedAgg('order_id', 'nunique'),
                                    tip_orders = pd.NamedAgg('tipped_orders', 'sum'),
                                    tip_amount = pd.NamedAgg('tip_amount', 'sum')
                                    ) \
                                .reset_index()
print(df_hex_level_tip_details.shape)
df_hex_level_tip_details['tip_orders'] = df_hex_level_tip_details['tip_orders'].astype(int)
df_hex_level_tip_details['tip_amount'] = df_hex_level_tip_details['tip_amount'].astype(int)
df_hex_level_tip_details.head(2)

(1957, 5)


Unnamed: 0,cluster,pickup_location_hex_8,total_orders,tip_orders,tip_amount
0,AECS Layout Brookefield,88618920a3fffff,11233,555,17489
1,Adugodi,8861892581fffff,4183,267,8065


In [95]:
df_hex_level_tip_details.describe()

Unnamed: 0,total_orders,tip_orders,tip_amount
count,1957.0,1957.0,1957.0
mean,1214.07,58.73,1820.36
std,2797.88,159.63,5141.84
min,1.0,0.0,0.0
25%,10.0,0.0,0.0
50%,97.0,4.0,110.0
75%,1193.0,43.0,1140.0
max,31993.0,2126.0,66100.0


## Merge affluence hex tag vs. hex level tip data

In [124]:
df_overlap_details = pd.read_csv('/Users/rapido/local-datasets/affluence/final/Bangalore Affluence List only 70 hex sample.csv')
df_overlap_details = df_overlap_details[['pickup_hex_8' ,'affluence_tag']]
df_overlap_details.head(2)

Unnamed: 0,pickup_hex_8,affluence_tag
0,88618921d3fffff,High Affluence
1,88618921c7fffff,High Affluence


In [97]:
df_aff_hex = pd.merge(df_hex_affluence_tag, df_overlap_details, how = 'left', 
                     left_on = ['pickup_hex_8'],
                     right_on = ['pickup_hex_8']
                     ).reset_index()
df_aff_hex = df_aff_hex[['pickup_hex_8', 'affluence_tag_x', 'affluence_tag_y']]
df_aff_hex.describe()

Unnamed: 0,pickup_hex_8,affluence_tag_x,affluence_tag_y
count,2685,2685,70
unique,2685,2,2
top,88618920a3fffff,Less,Low Affluence
freq,1,1548,45


In [98]:
df_affluence_tag_and_tip = pd.merge(df_aff_hex, 
                                    df_hex_level_tip_details,
                                    how = 'left',
                                    left_on = ['pickup_hex_8'],
                                    right_on = ['pickup_location_hex_8']
                                   )
df_affluence_tag_and_tip['tip_orders'] = df_affluence_tag_and_tip['tip_orders'].fillna(0)
df_affluence_tag_and_tip['tip_orders_rate'] = (df_affluence_tag_and_tip['tip_orders']*100/df_affluence_tag_and_tip['total_orders'])
df_affluence_tag_and_tip['tip_amout_per_tipped_orders'] = (df_affluence_tag_and_tip['tip_amount']/df_affluence_tag_and_tip['tip_orders'])

df_affluence_tag_and_tip.head(2)

Unnamed: 0,pickup_hex_8,affluence_tag_x,affluence_tag_y,cluster,pickup_location_hex_8,total_orders,tip_orders,tip_amount,tip_orders_rate,tip_amout_per_tipped_orders
0,88618920a3fffff,High,,AECS Layout Brookefield,88618920a3fffff,11233.0,555.0,17489.0,4.94,31.51
1,8861892581fffff,High,,Adugodi,8861892581fffff,4183.0,267.0,8065.0,6.38,30.21


In [111]:
df_affluence_tag_and_tip[df_affluence_tag_and_tip['affluence_tag_x'] == 'High'].describe()

Unnamed: 0,total_orders,tip_orders,tip_amount,tip_orders_rate,tip_amout_per_tipped_orders
count,975.0,1137.0,975.0,975.0,833.0
mean,2219.69,93.77,3418.91,6.43,inf
std,3611.33,199.59,6850.79,12.13,
min,1.0,0.0,0.0,0.0,10.0
25%,87.0,0.0,105.0,1.74,26.0
50%,996.0,19.0,840.0,3.79,29.83
75%,2706.5,98.0,3840.0,6.59,32.5
max,31993.0,2126.0,66100.0,100.0,inf


In [112]:
df_affluence_tag_and_tip[df_affluence_tag_and_tip['affluence_tag_x'] == 'Less'].describe()

Unnamed: 0,total_orders,tip_orders,tip_amount,tip_orders_rate,tip_amout_per_tipped_orders
count,963.0,1548.0,963.0,963.0,526.0
mean,219.71,5.37,237.81,4.71,inf
std,817.83,28.9,1036.97,10.98,
min,1.0,0.0,0.0,0.0,10.0
25%,5.0,0.0,0.0,0.0,22.7
50%,18.0,0.0,30.0,1.36,27.69
75%,113.0,1.0,130.0,4.94,30.0
max,19405.0,874.0,24894.0,100.0,inf


## Hex level data

In [143]:
df_affluence_tag_and_tip.head(2)

Unnamed: 0,pickup_hex_8,affluence_tag_x,affluence_tag_y,cluster,pickup_location_hex_8,total_orders,tip_orders,tip_amount,tip_orders_rate,tip_amout_per_tipped_orders,tip_amount_tag,tip_orders_rate_tag
0,88618920a3fffff,High,,AECS Layout Brookefield,88618920a3fffff,11233.0,555.0,17489.0,4.94,31.51,1. High,3. Low
1,8861892581fffff,High,,Adugodi,8861892581fffff,4183.0,267.0,8065.0,6.38,30.21,2. Medium,2. Medium


In [144]:
df_affluence_tag_and_tip[['total_orders', 'tip_orders', 
                          'tip_amount', 'tip_orders_rate', 'tip_amout_per_tipped_orders']] \
.describe([0.60,0.65,0.70,0.75,0.8,0.85,0.9,0.95,0.9])

Unnamed: 0,total_orders,tip_orders,tip_amount,tip_orders_rate,tip_amout_per_tipped_orders
count,1938.0,2685.0,1938.0,1938.0,1359.0
mean,1225.89,42.81,1838.21,5.57,inf
std,2809.01,138.75,5163.82,11.6,
min,1.0,0.0,0.0,0.0,10.0
50%,101.5,1.0,120.0,2.74,29.52
60%,338.4,3.0,300.0,3.89,30.0
65%,585.1,5.0,460.0,4.58,30.0
70%,831.0,10.0,680.0,5.26,30.62
75%,1226.75,18.0,1167.5,6.05,31.65
80%,1650.2,30.0,1896.2,6.9,32.68


In [241]:
## Tip amount Tag

df_affluence_tag_and_tip['tip_amount_tag'] = \
    np.where(df_affluence_tag_and_tip['tip_amount'] >= df_affluence_tag_and_tip['tip_amount'].quantile(0.95), '1. High',
    np.where(df_affluence_tag_and_tip['tip_amount'] >= df_affluence_tag_and_tip['tip_amount'].quantile(0.75), '2. Medium',
    np.where(df_affluence_tag_and_tip['tip_amount'] >= df_affluence_tag_and_tip['tip_amount'].quantile(0.50), '3. Low',
             np.where(df_affluence_tag_and_tip['tip_amount'] > 0.0 , '4. Lowest', '5. No tip'))))


## Tip Orders Rate Tag

df_affluence_tag_and_tip['tip_orders_rate_tag'] = \
    np.where(df_affluence_tag_and_tip['tip_orders_rate'] >= df_affluence_tag_and_tip['tip_orders_rate'].quantile(0.90), '1. High',
    np.where(df_affluence_tag_and_tip['tip_orders_rate'] >= df_affluence_tag_and_tip['tip_orders_rate'].quantile(0.70), '2. Medium',
    np.where(df_affluence_tag_and_tip['tip_orders_rate'] >= df_affluence_tag_and_tip['tip_orders_rate'].quantile(0.50), '3. Low',
             np.where(df_affluence_tag_and_tip['tip_orders_rate'] > 0.0 , '4. Lowest', '5. No tip'))))

## Tip Orders 
df_affluence_tag_and_tip['tip_flag'] = np.where(df_affluence_tag_and_tip['tip_orders'] < df_affluence_tag_and_tip.tip_orders.quantile(0.9) , 'Less Tip', 'High Tip')
df_affluence_tag_and_tip.head(2)

Unnamed: 0,pickup_hex_8,affluence_tag_x,affluence_tag_y,cluster,pickup_location_hex_8,total_orders,tip_orders,tip_amount,tip_orders_rate,tip_amout_per_tipped_orders,tip_amount_tag,tip_orders_rate_tag,tip_flag
0,88618920a3fffff,High,,AECS Layout Brookefield,88618920a3fffff,11233.0,555.0,17489.0,4.94,31.51,1. High,3. Low,High Tip
1,8861892581fffff,High,,Adugodi,8861892581fffff,4183.0,267.0,8065.0,6.38,30.21,2. Medium,2. Medium,High Tip


In [102]:
df_affluence_tag_and_tip.columns

Index(['pickup_hex_8', 'affluence_tag_x', 'affluence_tag_y', 'cluster',
       'pickup_location_hex_8', 'total_orders', 'tip_orders', 'tip_amount',
       'tip_orders_rate', 'tip_amout_per_tipped_orders', 'tip_amount_tag',
       'tip_orders_rate_tag'],
      dtype='object')

## High vs Low tips analysis

In [242]:
df_affluence_tag_and_tip_analysis_1 = df_affluence_tag_and_tip\
                                        .groupby(['tip_flag' ])\
                                        .agg(
                                            pickup_hex_8 = pd.NamedAgg('pickup_hex_8', 'nunique'),
                                            total_orders = pd.NamedAgg('total_orders', 'sum'),
                                            tip_orders = pd.NamedAgg('tip_orders', 'sum')
                                            ,tip_amount = pd.NamedAgg('tip_amount', 'sum')
                                            ) \
                                        .reset_index()

df_affluence_tag_and_tip_analysis_1['tip_orders_rate'] = (df_affluence_tag_and_tip_analysis_1['tip_orders']*100.0/df_affluence_tag_and_tip_analysis_1['total_orders'])
df_affluence_tag_and_tip_analysis_1['tipped_orders_pre_hex'] = (df_affluence_tag_and_tip_analysis_1['tip_orders']/df_affluence_tag_and_tip_analysis_1['pickup_hex_8'])

df_affluence_tag_and_tip_analysis_1['hex_distribution'] = (df_affluence_tag_and_tip_analysis_1['pickup_hex_8']*100.0/df_affluence_tag_and_tip_analysis_1.pickup_hex_8.sum())
df_affluence_tag_and_tip_analysis_1['tip_orders_distribution'] = (df_affluence_tag_and_tip_analysis_1['tip_orders']*100.0/df_affluence_tag_and_tip_analysis_1.tip_orders.sum())

df_affluence_tag_and_tip_analysis_1['tipped_amount_pre_tipped_orders'] = (df_affluence_tag_and_tip_analysis_1['tip_amount']/df_affluence_tag_and_tip_analysis_1['tip_orders']).round(2)

df_tag_tip_analysis_1 = df_affluence_tag_and_tip_analysis_1

In [243]:
df_affluence_tag_and_tip_analysis_1 = df_affluence_tag_and_tip\
                                        .groupby(['affluence_tag_x' ])\
                                        .agg(
                                            pickup_hex_8 = pd.NamedAgg('pickup_hex_8', 'nunique'),
                                            total_orders = pd.NamedAgg('total_orders', 'sum'),
                                            tip_orders = pd.NamedAgg('tip_orders', 'sum')
                                            ,tip_amount = pd.NamedAgg('tip_amount', 'sum')
                                            ) \
                                        .reset_index()

df_affluence_tag_and_tip_analysis_1['tip_orders_rate'] = (df_affluence_tag_and_tip_analysis_1['tip_orders']*100.0/df_affluence_tag_and_tip_analysis_1['total_orders'])
df_affluence_tag_and_tip_analysis_1['tipped_orders_pre_hex'] = (df_affluence_tag_and_tip_analysis_1['tip_orders']/df_affluence_tag_and_tip_analysis_1['pickup_hex_8'])

df_affluence_tag_and_tip_analysis_1['hex_distribution'] = (df_affluence_tag_and_tip_analysis_1['pickup_hex_8']*100.0/df_affluence_tag_and_tip_analysis_1.pickup_hex_8.sum())
df_affluence_tag_and_tip_analysis_1['tip_orders_distribution'] = (df_affluence_tag_and_tip_analysis_1['tip_orders']*100.0/df_affluence_tag_and_tip_analysis_1.tip_orders.sum())

df_affluence_tag_and_tip_analysis_1['tipped_amount_pre_tipped_orders'] = (df_affluence_tag_and_tip_analysis_1['tip_amount']/df_affluence_tag_and_tip_analysis_1['tip_orders']).round(2)

df_tag_tip_analysis_2 = df_affluence_tag_and_tip_analysis_1

In [244]:
df_affluence_tag_and_tip_analysis_1 = df_affluence_tag_and_tip\
                                        .groupby(['affluence_tag_x', 'tip_flag' ])\
                                        .agg(
                                            pickup_hex_8 = pd.NamedAgg('pickup_hex_8', 'nunique'),
                                            total_orders = pd.NamedAgg('total_orders', 'sum'),
                                            tip_orders = pd.NamedAgg('tip_orders', 'sum')
                                            ,tip_amount = pd.NamedAgg('tip_amount', 'sum')
                                            ) \
                                        .reset_index()

df_affluence_tag_and_tip_analysis_1['tip_orders_rate'] = (df_affluence_tag_and_tip_analysis_1['tip_orders']*100.0/df_affluence_tag_and_tip_analysis_1['total_orders'])
df_affluence_tag_and_tip_analysis_1['tipped_orders_pre_hex'] = (df_affluence_tag_and_tip_analysis_1['tip_orders']/df_affluence_tag_and_tip_analysis_1['pickup_hex_8'])

df_affluence_tag_and_tip_analysis_1['hex_distribution'] = (df_affluence_tag_and_tip_analysis_1['pickup_hex_8']*100.0/df_affluence_tag_and_tip_analysis_1.pickup_hex_8.sum())
df_affluence_tag_and_tip_analysis_1['tip_orders_distribution'] = (df_affluence_tag_and_tip_analysis_1['tip_orders']*100.0/df_affluence_tag_and_tip_analysis_1.tip_orders.sum())

df_affluence_tag_and_tip_analysis_1['tipped_amount_pre_tipped_orders'] = (df_affluence_tag_and_tip_analysis_1['tip_amount']/df_affluence_tag_and_tip_analysis_1['tip_orders']).round(2)

df_tag_tip_analysis_3 = df_affluence_tag_and_tip_analysis_1

In [248]:
df_affluence_tag_and_tip.pickup_hex_8.nunique()

2685

In [245]:
df_tag_tip_analysis_1

Unnamed: 0,tip_flag,pickup_hex_8,total_orders,tip_orders,tip_amount,tip_orders_rate,tipped_orders_pre_hex,hex_distribution,tip_orders_distribution,tipped_amount_pre_tipped_orders
0,High Tip,269,1604451.0,90422.0,2867426.0,5.64,336.14,10.02,78.67,31.71
1,Less Tip,2416,771326.0,24510.0,695022.0,3.18,10.14,89.98,21.33,28.36


In [246]:
df_tag_tip_analysis_2

Unnamed: 0,affluence_tag_x,pickup_hex_8,total_orders,tip_orders,tip_amount,tip_orders_rate,tipped_orders_pre_hex,hex_distribution,tip_orders_distribution,tipped_amount_pre_tipped_orders
0,High,1137,2164197.0,106612.0,3333440.0,4.93,93.77,42.35,92.76,31.27
1,Less,1548,211580.0,8320.0,229008.0,3.93,5.37,57.65,7.24,27.52


In [247]:
df_tag_tip_analysis_3

Unnamed: 0,affluence_tag_x,tip_flag,pickup_hex_8,total_orders,tip_orders,tip_amount,tip_orders_rate,tipped_orders_pre_hex,hex_distribution,tip_orders_distribution,tipped_amount_pre_tipped_orders
0,High,High Tip,258,1551236.0,87758.0,2790758.0,5.66,340.15,9.61,76.36,31.8
1,High,Less Tip,879,612961.0,18854.0,542682.0,3.08,21.45,32.74,16.4,28.78
2,Less,High Tip,11,53215.0,2664.0,76668.0,5.01,242.18,0.41,2.32,28.78
3,Less,Less Tip,1537,158365.0,5656.0,152340.0,3.57,3.68,57.24,4.92,26.93


In [282]:
ha = round(258*100/1137,2)
la = round(1537*100/1548,2)
print('High affluence with Tip signal overlap percentage',ha)
print('Less affluence with Tip signal overlap percentage',la)

High affluence with Tip signal overlap percentage 22.69
Less affluence with Tip signal overlap percentage 99.29


## Outliers 

In [261]:
exclude_values = ['8861892eb1fffff','8860145b51fffff','8860145b59fffff','8861892e93fffff',
                '8860145b53fffff','8861892ccbfffff',
                '88618925a9fffff','8860145b67fffff','8861892ca3fffff']

# ['8860145b51fffff', '8860145b59fffff', '8860145ad1fffff', '8860145943fffff', 
#                   '8860145a05fffff', '8861892c95fffff', '886014584dfffff', '8861892db7fffff', 
#                   '886014594bfffff']

# ['8861892eb1fffff','8860145b51fffff','8860145b59fffff','8861892e93fffff',
# '8860145ad1fffff','8860145b53fffff','8860145a05fffff','8861892ccbfffff',
# '88618925a9fffff','8860145b67fffff','8861892ca3fffff', '886014594bfffff']

# df_affluence_tag_and_tip[df_affluence_tag_and_tip['pickup_location_hex_8'].isin(exclude_values)]

df_affluence_tag_and_tip[df_affluence_tag_and_tip['affluence_tag_x'].isin(['Less'])][['tip_orders']].describe([0.60,0.65,0.70,0.75,0.8,0.85,0.9,0.95,0.96,0.97,0.98,0.99])

Unnamed: 0,tip_orders
count,1548.0
mean,5.37
std,28.9
min,0.0
50%,0.0
60%,0.0
65%,0.0
70%,1.0
75%,1.0
80%,3.0


In [262]:
df_affluence_tag_and_tip_of = df_affluence_tag_and_tip[df_affluence_tag_and_tip['total_orders'] >= df_affluence_tag_and_tip.total_orders.quantile(0.5)]
df_affluence_tag_and_tip_of = df_affluence_tag_and_tip_of[~df_affluence_tag_and_tip_of['pickup_location_hex_8'].isin(exclude_values)]
df_affluence_tag_and_tip_of.head(2)

Unnamed: 0,pickup_hex_8,affluence_tag_x,affluence_tag_y,cluster,pickup_location_hex_8,total_orders,tip_orders,tip_amount,tip_orders_rate,tip_amout_per_tipped_orders,tip_amount_tag,tip_orders_rate_tag,tip_flag
0,88618920a3fffff,High,,AECS Layout Brookefield,88618920a3fffff,11233.0,555.0,17489.0,4.94,31.51,1. High,3. Low,High Tip
1,8861892581fffff,High,,Adugodi,8861892581fffff,4183.0,267.0,8065.0,6.38,30.21,2. Medium,2. Medium,High Tip


In [274]:
df_analysis_1_of = df_affluence_tag_and_tip_of\
                                        .groupby(['tip_flag' ])\
                                        .agg(
                                            pickup_hex_8 = pd.NamedAgg('pickup_hex_8', 'nunique'),
                                            total_orders = pd.NamedAgg('total_orders', 'sum'),
                                            tip_orders = pd.NamedAgg('tip_orders', 'sum')
                                            ,tip_amount = pd.NamedAgg('tip_amount', 'sum')
                                            ) \
                                        .reset_index()

df_analysis_1_of['tip_orders_rate'] = (df_analysis_1_of['tip_orders']*100.0/df_analysis_1_of['total_orders'])
df_analysis_1_of['tipped_orders_pre_hex'] = (df_analysis_1_of['tip_orders']/df_analysis_1_of['pickup_hex_8'])

df_analysis_1_of['hex_distribution'] = (df_analysis_1_of['pickup_hex_8']*100.0/df_high_low_aff_agg_of.pickup_hex_8.sum())
df_analysis_1_of['tip_orders_distribution'] = (df_analysis_1_of['tip_orders']*100.0/df_high_low_aff_agg_of.tip_orders.sum())

df_analysis_1_of['tipped_amount_pre_tipped_orders'] = (df_analysis_1_of['tip_amount']/df_analysis_1_of['tip_orders']).round(2)

df_analysis_of_1 = df_analysis_1_of

In [275]:
df_analysis_1_of = df_affluence_tag_and_tip_of\
                                        .groupby(['affluence_tag_y' ])\
                                        .agg(
                                            pickup_hex_8 = pd.NamedAgg('pickup_hex_8', 'nunique'),
                                            total_orders = pd.NamedAgg('total_orders', 'sum'),
                                            tip_orders = pd.NamedAgg('tip_orders', 'sum')
                                            ,tip_amount = pd.NamedAgg('tip_amount', 'sum')
                                            ) \
                                        .reset_index()

df_analysis_1_of['tip_orders_rate'] = (df_analysis_1_of['tip_orders']*100.0/df_analysis_1_of['total_orders'])
df_analysis_1_of['tipped_orders_pre_hex'] = (df_analysis_1_of['tip_orders']/df_analysis_1_of['pickup_hex_8'])

df_analysis_1_of['hex_distribution'] = (df_analysis_1_of['pickup_hex_8']*100.0/df_high_low_aff_agg_of.pickup_hex_8.sum())
df_analysis_1_of['tip_orders_distribution'] = (df_analysis_1_of['tip_orders']*100.0/df_high_low_aff_agg_of.tip_orders.sum())

df_analysis_1_of['tipped_amount_pre_tipped_orders'] = (df_analysis_1_of['tip_amount']/df_analysis_1_of['tip_orders']).round(2)

df_analysis_of_2 = df_analysis_1_of

In [276]:
df_analysis_1_of = df_affluence_tag_and_tip_of\
                                        .groupby(['affluence_tag_y', 'tip_flag' ])\
                                        .agg(
                                            pickup_hex_8 = pd.NamedAgg('pickup_hex_8', 'nunique'),
                                            total_orders = pd.NamedAgg('total_orders', 'sum'),
                                            tip_orders = pd.NamedAgg('tip_orders', 'sum')
                                            ,tip_amount = pd.NamedAgg('tip_amount', 'sum')
                                            ) \
                                        .reset_index()

df_analysis_1_of['tip_orders_rate'] = (df_analysis_1_of['tip_orders']*100.0/df_analysis_1_of['total_orders'])
df_analysis_1_of['tipped_orders_pre_hex'] = (df_analysis_1_of['tip_orders']/df_analysis_1_of['pickup_hex_8'])

df_analysis_1_of['hex_distribution'] = (df_analysis_1_of['pickup_hex_8']*100.0/df_high_low_aff_agg_of.pickup_hex_8.sum())
df_analysis_1_of['tip_orders_distribution'] = (df_analysis_1_of['tip_orders']*100.0/df_high_low_aff_agg_of.tip_orders.sum())

df_analysis_1_of['tipped_amount_pre_tipped_orders'] = (df_analysis_1_of['tip_amount']/df_analysis_1_of['tip_orders']).round(2)

df_analysis_of_3 = df_analysis_1_of

In [277]:
df_analysis_of_1

Unnamed: 0,tip_flag,pickup_hex_8,total_orders,tip_orders,tip_amount,tip_orders_rate,tipped_orders_pre_hex,hex_distribution,tip_orders_distribution,tipped_amount_pre_tipped_orders
0,High Tip,264,1587778.0,89484.0,2840715.0,5.64,338.95,27.5,79.44,31.75
1,Less Tip,696,747639.0,23162.0,659930.0,3.1,33.28,72.5,20.56,28.49


In [283]:
df_analysis_of_3

Unnamed: 0,affluence_tag_y,tip_flag,pickup_hex_8,total_orders,tip_orders,tip_amount,tip_orders_rate,tipped_orders_pre_hex,hex_distribution,tip_orders_distribution,tipped_amount_pre_tipped_orders
0,High Affluence,High Tip,13,75114.0,4172.0,134436.0,5.55,320.92,1.35,3.7,32.22
1,High Affluence,Less Tip,12,21569.0,713.0,20830.0,3.31,59.42,1.25,0.63,29.21
2,Low Affluence,High Tip,2,22886.0,1087.0,31305.0,4.75,543.5,0.21,0.96,28.8
3,Low Affluence,Less Tip,34,53512.0,1877.0,51750.0,3.51,55.21,3.54,1.67,27.57


In [281]:
ha = round(23*100/25,2)
la = round(34*100/35,2)
print('High affluence with Tip signal overlap percentage',ha)
print('Less affluence with Tip signal overlap percentage',la)

High affluence with Tip signal overlap percentage 92.0
Less affluence with Tip signal overlap percentage 97.14


### Summary


- Bangalore city all hex<br>
    High affluence with Tip signal overlap percentage 22.69<br>
    less affluence with Tip signal overlap percentage 99.29<br><br>
- Bangalore city 70 hex<br>
    High affluence with Tip signal overlap percentage 92.0<br>
    Less affluence with Tip signal overlap percentage 97.14<br><br>