## <center> Packages & Connection </center>

In [1]:
import os
import h3 as h3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy import stats
from pyhive import presto
from keplergl import KeplerGl
from datetime import datetime, timedelta

import warnings
warnings.filterwarnings('ignore')

In [2]:
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 500)

In [3]:
## Connection
connection = presto.connect(
        host='presto-gateway.serving.data.production.internal',
        port=80,
        protocol='http',
        catalog='hive',
        username='manoj.ravirajan@rapido.bike'
)

## Parameter

In [4]:
city = 'Bangalore'
service = 'Link'

In [5]:
start_date = '20230717'
end_date = '20230813'

In [7]:
## experiments.blr_use_case_hex12

blr_use_case = f"""

       WITH base_data AS (

        SELECT  
            yyyymmdd,
            city_name,
            service_obj_service_name,
            COALESCE(merged_usecase_accuracy, 'Unknown') usecase,
            pickup_location_hex_8,
            pickup_location_hex_12,
            order_id,
            order_status,
            spd_fraud_flag
        
        FROM
            orders.order_logs_snapshot ols 
        
        LEFT JOIN 
            experiments.blr_use_case_hex12
            ON pickup_location_hex_12 = hex_12
        
        WHERE 
            yyyymmdd >= '{start_date}'
            AND yyyymmdd <= '{end_date}'
            AND city_name = '{city}'
            AND service_obj_service_name = '{service}'
    ),
    
    hex_8_agg AS (

        SELECT 
            city_name,
            pickup_location_hex_8,
            usecase,
            COUNT(DISTINCT pickup_location_hex_12) total_pickup_hex_12,
            COUNT(DISTINCT order_id) gross_orders,
            COUNT(DISTINCT CASE WHEN order_status = 'dropped' AND (spd_fraud_flag = false OR spd_fraud_flag IS NULL) THEN order_id END) net_orders
        FROM
            base_data
        -- WHERE 
            -- pickup_location_hex_8 = '8860145a0dfffff'
        GROUP BY 1,2,3
    ),

    view AS (
    
    SELECT
        *,
        LEAD(total_pickup_hex_12,1) OVER (PARTITION BY city_name,pickup_location_hex_8 ORDER BY total_pickup_hex_12 DESC) next_hex_count_geo,
        LEAD(usecase,1) OVER (PARTITION BY city_name,pickup_location_hex_8 ORDER BY total_pickup_hex_12 DESC) next_use_case_geo,
        LEAD(gross_orders,1) OVER (PARTITION BY city_name,pickup_location_hex_8 ORDER BY total_pickup_hex_12 DESC) next_gross_orders_geo,
        LEAD(net_orders,1) OVER (PARTITION BY city_name,pickup_location_hex_8 ORDER BY total_pickup_hex_12 DESC) next_net_orders_geo,

        LEAD(total_pickup_hex_12,1) OVER (PARTITION BY city_name,pickup_location_hex_8 ORDER BY gross_orders DESC) next_hex_count_gross,
        LEAD(usecase,1) OVER (PARTITION BY city_name,pickup_location_hex_8 ORDER BY gross_orders DESC) next_use_case_gross,
        LEAD(gross_orders,1) OVER (PARTITION BY city_name,pickup_location_hex_8 ORDER BY gross_orders DESC) next_gross_orders_gross,
        LEAD(net_orders,1) OVER (PARTITION BY city_name,pickup_location_hex_8 ORDER BY gross_orders DESC) next_net_orders_gross,

        ROW_NUMBER() OVER(PARTITION BY city_name,pickup_location_hex_8 ORDER BY total_pickup_hex_12 DESC) geo_hexes_rn_desc,
        ROW_NUMBER() OVER(PARTITION BY city_name,pickup_location_hex_8 ORDER BY total_pickup_hex_12 ASC) geo_hexes_rn_asc,
        ROW_NUMBER() OVER(PARTITION BY city_name,pickup_location_hex_8 ORDER BY gross_orders DESC) gross_orders_rn_desc,
        ROW_NUMBER() OVER(PARTITION BY city_name,pickup_location_hex_8 ORDER BY gross_orders ASC) gross_orders_rn_asc
    
    FROM 
        hex_8_agg
    ),
    
    hex_agg AS (
    
    SELECT 
        flag,
        pickup_location_hex_8,
        SUM(total_pickup_hex_12) total_pickup_hex_12,
        SUM(gross_orders) total_gross_orders,
        SUM(net_orders) total_net_orders
    FROM 
    (
        SELECT 
            'geo_level_hex_tag' AS flag,
            pickup_location_hex_8,
            usecase,
            total_pickup_hex_12,
            gross_orders,
            net_orders,
            CASE 
            WHEN usecase = 'Unknown' AND geo_hexes_rn_desc = 1 AND geo_hexes_rn_asc = 1 THEN 'yes'
            WHEN usecase = 'Unknown' AND geo_hexes_rn_desc = 1 AND geo_hexes_rn_desc != geo_hexes_rn_asc THEN 'no'
            WHEN usecase != 'Unknown' THEN 'yes'
            END filter 
        FROM 
            view 
            
        UNION ALL 
        
        SELECT 
            'gross_level_hex_tag' AS flag,
            pickup_location_hex_8,
            usecase,
            total_pickup_hex_12,
            gross_orders,
            net_orders,
            CASE 
            WHEN usecase = 'Unknown' AND gross_orders_rn_desc = 1 AND gross_orders_rn_asc = 1 THEN 'yes'
            WHEN usecase = 'Unknown' AND gross_orders_rn_desc = 1 AND gross_orders_rn_desc != gross_orders_rn_asc THEN 'no'
            WHEN usecase != 'Unknown' THEN 'yes'
            END filter 
        FROM
            view 
    ) 
    WHERE filter = 'yes'
    GROUP BY 1,2
    )
    
    SELECT 
        a.flag,
        a.city_name,
        a.pickup_location_hex_8,
        a.usecase,
        a.usecase_pickup_hex_12,
        hex_agg.total_pickup_hex_12,
        COALESCE(TRY(a.usecase_pickup_hex_12*100.00/hex_agg.total_pickup_hex_12),0)  accuracy,
        a.usecase_gross_orders,
        hex_agg.total_gross_orders,
        a.usecase_net_orders,
        hex_agg.total_net_orders
    
    FROM 
    (
        SELECT 
            'geo_level_hex_tag' AS flag,
            city_name,
            pickup_location_hex_8,
            CASE 
            WHEN usecase = 'Unknown' AND geo_hexes_rn_desc = 1 AND geo_hexes_rn_asc = 1 THEN usecase
            WHEN usecase = 'Unknown' AND geo_hexes_rn_desc = 1 AND geo_hexes_rn_desc != geo_hexes_rn_asc THEN next_use_case_geo
            WHEN usecase != 'Unknown' THEN usecase
            END usecase,
            CASE 
            WHEN usecase = 'Unknown' AND geo_hexes_rn_desc = 1 AND geo_hexes_rn_asc = 1 THEN total_pickup_hex_12
            WHEN usecase = 'Unknown' AND geo_hexes_rn_desc = 1 AND geo_hexes_rn_desc != geo_hexes_rn_asc THEN next_hex_count_geo
            WHEN usecase != 'Unknown' THEN total_pickup_hex_12
            END usecase_pickup_hex_12,
            CASE 
            WHEN usecase = 'Unknown' AND geo_hexes_rn_desc = 1 AND geo_hexes_rn_asc = 1 THEN gross_orders
            WHEN usecase = 'Unknown' AND geo_hexes_rn_desc = 1 AND geo_hexes_rn_desc != geo_hexes_rn_asc THEN next_gross_orders_geo
            WHEN usecase != 'Unknown' THEN gross_orders
            END usecase_gross_orders,
            
            CASE 
            WHEN usecase = 'Unknown' AND geo_hexes_rn_desc = 1 AND geo_hexes_rn_asc = 1 THEN net_orders
            WHEN usecase = 'Unknown' AND geo_hexes_rn_desc = 1 AND geo_hexes_rn_desc != geo_hexes_rn_asc THEN next_net_orders_geo
            WHEN usecase != 'Unknown' THEN net_orders
            END usecase_net_orders

        FROM 
            view
        WHERE 
            geo_hexes_rn_desc = 1
            
        UNION ALL 
        
        SELECT 
            'gross_level_hex_tag' AS flag,
            city_name,
            pickup_location_hex_8,
            CASE 
            WHEN usecase = 'Unknown' AND gross_orders_rn_desc = 1 AND gross_orders_rn_asc = 1 THEN usecase
            WHEN usecase = 'Unknown' AND gross_orders_rn_desc = 1 AND gross_orders_rn_desc != gross_orders_rn_asc THEN next_use_case_gross
            WHEN usecase != 'Unknown' THEN usecase
            END usecase,
            CASE 
            WHEN usecase = 'Unknown' AND gross_orders_rn_desc = 1 AND gross_orders_rn_asc = 1 THEN total_pickup_hex_12
            WHEN usecase = 'Unknown' AND gross_orders_rn_desc = 1 AND gross_orders_rn_desc != gross_orders_rn_asc THEN next_hex_count_gross
            WHEN usecase != 'Unknown' THEN total_pickup_hex_12
            END usecase_pickup_hex_12,
            CASE 
            WHEN usecase = 'Unknown' AND gross_orders_rn_desc = 1 AND gross_orders_rn_asc = 1 THEN gross_orders
            WHEN usecase = 'Unknown' AND gross_orders_rn_desc = 1 AND gross_orders_rn_desc != gross_orders_rn_asc THEN next_gross_orders_gross
            WHEN usecase != 'Unknown' THEN gross_orders
            END usecase_gross_orders,
            
            CASE 
            WHEN usecase = 'Unknown' AND gross_orders_rn_desc = 1 AND gross_orders_rn_asc = 1 THEN net_orders
            WHEN usecase = 'Unknown' AND gross_orders_rn_desc = 1 AND gross_orders_rn_desc != gross_orders_rn_asc THEN next_net_orders_gross
            WHEN usecase != 'Unknown' THEN net_orders
            END usecase_net_orders


        FROM 
            view
        WHERE 
            gross_orders_rn_desc = 1
    ) AS a 
    
    LEFT JOIN 
        hex_agg
        ON a.flag = hex_agg.flag
        AND a.pickup_location_hex_8 = hex_agg.pickup_location_hex_8

"""

In [100]:
df_blr_use_case = pd.read_sql(blr_use_case, connection)
df_blr_use_case.head(3)

Unnamed: 0,flag,city_name,pickup_location_hex_8,usecase,usecase_pickup_hex_12,total_pickup_hex_12,accuracy,usecase_gross_orders,total_gross_orders,usecase_net_orders,total_net_orders
0,gross_level_hex_tag,Bangalore,88601459b7fffff,Unknown,41,41,100.0,56,56,12,12
1,gross_level_hex_tag,Bangalore,88601696b9fffff,Unknown,3,3,100.0,5,5,0,0
2,gross_level_hex_tag,Bangalore,88618920b1fffff,residential,564,1063,53.06,6971,22588,2988,10274


In [None]:
df_blr_use_case.to_csv('/Users/rapido/local-datasets/affluence/raw/hex12_residential_raw_data_{}_{}_{}_to_{}.csv' \
                                .format(city, service,start_date,end_date)
                               , index = False)

In [6]:
df_blr_use_case = pd.read_csv('/Users/rapido/local-datasets/affluence/raw/hex12_residential_raw_data_{}_{}_{}_to_{}.csv' \
                               .format(city, service,start_date,end_date))

In [7]:
df_use_case = df_blr_use_case.copy(deep=True)
print(df_use_case.shape)

(4800, 11)


In [8]:
df_hex_affluence_tag = pd.read_csv('/Users/rapido/local-datasets/affluence/main/hex_affluence_tag.csv')
df_hex_affluence_tag = df_hex_affluence_tag[['pickup_hex_8', 'affluence_tag']]

df_aff_use_case = pd.merge(df_use_case, 
                       df_hex_affluence_tag,
                       how = 'left',
                       left_on = ['pickup_location_hex_8'],
                       right_on = ['pickup_hex_8']
                       )


df_aff_use_case = df_aff_use_case[['affluence_tag', 'flag', 'city_name', 'pickup_location_hex_8', 'usecase',
                                   'usecase_pickup_hex_12', 'total_pickup_hex_12', 'accuracy',
                                   'usecase_gross_orders', 'total_gross_orders', 'usecase_net_orders',
                                   'total_net_orders']]
df_aff_use_case.head()

Unnamed: 0,affluence_tag,flag,city_name,pickup_location_hex_8,usecase,usecase_pickup_hex_12,total_pickup_hex_12,accuracy,usecase_gross_orders,total_gross_orders,usecase_net_orders,total_net_orders
0,Less,gross_level_hex_tag,Bangalore,88601459b7fffff,Unknown,41,41,100.0,56,56,12,12
1,High,gross_level_hex_tag,Bangalore,88601696b9fffff,Unknown,3,3,100.0,5,5,0,0
2,High,gross_level_hex_tag,Bangalore,88618920b1fffff,residential,564,1063,53.06,6971,22588,2988,10274
3,Less,gross_level_hex_tag,Bangalore,8861892707fffff,Unknown,226,226,100.0,593,593,266,266
4,Less,geo_level_hex_tag,Bangalore,88601459b7fffff,Unknown,41,41,100.0,56,56,12,12


In [9]:
df_aff_use_case['residential'] = np.where(df_aff_use_case['usecase'].isin(['residential', 'Unknown']), 
                                        df_aff_use_case['usecase'],
                                        'non-residential' )

In [10]:
df_aff_use_case.flag.unique()

array(['gross_level_hex_tag', 'geo_level_hex_tag'], dtype=object)

In [11]:
df_aff_use_case[df_aff_use_case['pickup_location_hex_8'] == '8860145865fffff']

Unnamed: 0,affluence_tag,flag,city_name,pickup_location_hex_8,usecase,usecase_pickup_hex_12,total_pickup_hex_12,accuracy,usecase_gross_orders,total_gross_orders,usecase_net_orders,total_net_orders,residential
1574,High,gross_level_hex_tag,Bangalore,8860145865fffff,leisure,63,252,25.0,206,881,133,586,non-residential
1656,High,geo_level_hex_tag,Bangalore,8860145865fffff,residential,70,252,27.78,200,881,131,586,residential


In [13]:
df_home_hex_8 = pd.read_clipboard()
df_home_hex_8

Unnamed: 0,hex_8,residence_tag
0,8861892743fffff,Home
1,88618920d7fffff,Home
2,8860169157fffff,Home
3,886016974bfffff,Home
4,8861892509fffff,Home
...,...,...
1661,88618925c7fffff,Home
1662,8861892f59fffff,Home
1663,8860145827fffff,Home
1664,8861892417fffff,Home


In [14]:
df_aff_use_case = pd.merge(df_aff_use_case,
                           df_home_hex_8,
                           how = 'left',
                           left_on = ['pickup_location_hex_8'],
                           right_on = ['hex_8']
                          )
df_aff_use_case                           

Unnamed: 0,affluence_tag,flag,city_name,pickup_location_hex_8,usecase,usecase_pickup_hex_12,total_pickup_hex_12,accuracy,usecase_gross_orders,total_gross_orders,usecase_net_orders,total_net_orders,residential,hex_8,residence_tag
0,Less,gross_level_hex_tag,Bangalore,88601459b7fffff,Unknown,41,41,100.00,56,56,12,12,Unknown,88601459b7fffff,Home
1,High,gross_level_hex_tag,Bangalore,88601696b9fffff,Unknown,3,3,100.00,5,5,0,0,Unknown,88601696b9fffff,Home
2,High,gross_level_hex_tag,Bangalore,88618920b1fffff,residential,564,1063,53.06,6971,22588,2988,10274,residential,88618920b1fffff,Home
3,Less,gross_level_hex_tag,Bangalore,8861892707fffff,Unknown,226,226,100.00,593,593,266,266,Unknown,8861892707fffff,Home
4,Less,geo_level_hex_tag,Bangalore,88601459b7fffff,Unknown,41,41,100.00,56,56,12,12,Unknown,88601459b7fffff,Home
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4795,Less,gross_level_hex_tag,Bangalore,88618922cbfffff,Unknown,44,44,100.00,76,76,22,22,Unknown,,
4796,High,gross_level_hex_tag,Bangalore,8861892439fffff,residential,499,863,57.82,3922,7511,1509,3066,residential,8861892439fffff,Home
4797,High,geo_level_hex_tag,Bangalore,8860144ae5fffff,Unknown,2,2,100.00,2,2,0,0,Unknown,,
4798,High,geo_level_hex_tag,Bangalore,88618920d3fffff,residential,11,11,100.00,27,27,11,11,residential,88618920d3fffff,Home


In [20]:
df_aff_use_case.to_csv('/Users/rapido/local-datasets/affluence/final/geo_use_case_all_hex_8.csv', index = False)

In [15]:
df_aff_use_case.to_csv('/Users/rapido/local-datasets/affluence/final/geo_use_case_all_hex_81.csv', index = False)

## Analysis

### Approach 1 
    -- We can consider primary(mode/most repeated use case) hex_12 use_case for the respective hex 8
        - Disadvantage -> Geo-based tagging
        
### Approach 2
    -- We can consider hex_12 use_case that's having more orders contribution for the respective hex 8
        - Disadvantage -> Orders based tagging

In [16]:
df_aff_agg = df_aff_use_case \
            .groupby(['affluence_tag', 'flag']) \
            .agg( aff_hex_count = pd.NamedAgg('pickup_location_hex_8', 'nunique'),
                  aff_gross_orders = pd.NamedAgg('total_gross_orders', 'sum'),
                  aff_net_orders = pd.NamedAgg('total_net_orders', 'sum')
                ).reset_index()
df_aff_agg

Unnamed: 0,affluence_tag,flag,aff_hex_count,aff_gross_orders,aff_net_orders
0,High,geo_level_hex_tag,1045,2981551,1514059
1,High,gross_level_hex_tag,1045,2981551,1514059
2,Less,geo_level_hex_tag,1205,341655,159593
3,Less,gross_level_hex_tag,1205,341655,159593


In [17]:
## Approach 1

df_aff_use_case_type1 = df_aff_use_case[df_aff_use_case['flag'] == 'geo_level_hex_tag']
df_aff_use_case_type1.head(2)

Unnamed: 0,affluence_tag,flag,city_name,pickup_location_hex_8,usecase,usecase_pickup_hex_12,total_pickup_hex_12,accuracy,usecase_gross_orders,total_gross_orders,usecase_net_orders,total_net_orders,residential,hex_8,residence_tag
4,Less,geo_level_hex_tag,Bangalore,88601459b7fffff,Unknown,41,41,100.0,56,56,12,12,Unknown,88601459b7fffff,Home
5,High,geo_level_hex_tag,Bangalore,886014435bfffff,Unknown,12,12,100.0,18,18,0,0,Unknown,,


In [18]:
df_type1_analysis = df_aff_use_case_type1 \
                            .groupby(['affluence_tag', 'usecase']) \
                            .agg(
                                no_hexes = pd.NamedAgg('pickup_location_hex_8', 'nunique'),
                                gross_orders = pd.NamedAgg('total_gross_orders', 'sum'),
                                net_orders = pd.NamedAgg('total_net_orders', 'sum')
                                ).reset_index()

df_type1_analysis['city_hex_distr'] = (df_type1_analysis['no_hexes']*100.0/df_type1_analysis.no_hexes.sum()).round(2)
df_type1_analysis['city_gross_distr'] = (df_type1_analysis['gross_orders']*100.0/df_type1_analysis.gross_orders.sum()).round(2)
df_type1_analysis['agg_hexes'] = np.where(df_type1_analysis['affluence_tag'] == 'High' ,
                                         df_aff_agg[(df_aff_agg['affluence_tag'] == 'High') & (df_aff_agg['flag'] == 'geo_level_hex_tag')].aff_hex_count.sum(),
                                          df_aff_agg[(df_aff_agg['affluence_tag'] == 'Less') & (df_aff_agg['flag'] == 'geo_level_hex_tag')].aff_hex_count.sum()
                                         )
df_type1_analysis['agg_gross'] = np.where(df_type1_analysis['affluence_tag'] == 'High',
                                         df_aff_agg[(df_aff_agg['affluence_tag'] == 'High') & (df_aff_agg['flag'] == 'geo_level_hex_tag')].aff_gross_orders.sum(),
                                          df_aff_agg[(df_aff_agg['affluence_tag'] == 'Less') & (df_aff_agg['flag'] == 'geo_level_hex_tag')].aff_gross_orders.sum()
                                         )
df_type1_analysis['affluence_hex_distr'] = (df_type1_analysis['no_hexes']*100.0/df_type1_analysis['agg_hexes']).round(2)
df_type1_analysis['affluence_gross_distr'] = (df_type1_analysis['gross_orders']*100.0/df_type1_analysis['agg_gross']).round(2)


df_type1_analysis[['affluence_tag', 'usecase', #'residence_tag',
                   'city_hex_distr', 'affluence_hex_distr',
                   'city_gross_distr', 'affluence_gross_distr']] \
.sort_values(['affluence_tag', 'affluence_gross_distr'], ascending = False)

Unnamed: 0,affluence_tag,usecase,city_hex_distr,affluence_hex_distr,city_gross_distr,affluence_gross_distr
20,Less,residential,7.11,13.28,3.55,34.5
11,Less,Unknown,41.64,77.76,2.71,26.39
21,Less,transit_station,0.58,1.08,1.42,13.86
12,Less,educational,1.42,2.66,0.64,6.21
18,Less,office,0.76,1.41,0.6,5.83
17,Less,leisure,0.89,1.66,0.46,4.44
16,Less,household_needs,0.4,0.75,0.43,4.16
15,Less,health_and_personal,0.31,0.58,0.42,4.08
13,Less,food,0.27,0.5,0.03,0.32
19,Less,place_of_worship,0.13,0.25,0.02,0.16


kepler to see the 55 % of unknown hex 8's 

### Insights 

####  Less affluence
           
           residential - 13% hex's contributing  34.5% gross 
           unknown - 77.76% hex's contributing  26% gross
           transit_station - 1.08% hex's contributing  13% gross
            
####    High affluence
            
            residential - 46.89% hex's contributing  63.48% gross 
            office - 4.78% hex's contributing  12% gross
            leisure - 7.27% hex's contributing  11% gross  

In [19]:
df_type1_analysis = df_aff_use_case_type1 \
                            .groupby(['affluence_tag', 'residential']) \
                            .agg(
                                no_hexes = pd.NamedAgg('pickup_location_hex_8', 'nunique'),
                                gross_orders = pd.NamedAgg('total_gross_orders', 'sum'),
                                net_orders = pd.NamedAgg('total_net_orders', 'sum')
                                ).reset_index()

df_type1_analysis['city_hex_distr'] = (df_type1_analysis['no_hexes']*100.0/df_type1_analysis.no_hexes.sum()).round(2)
df_type1_analysis['city_gross_distr'] = (df_type1_analysis['gross_orders']*100.0/df_type1_analysis.gross_orders.sum()).round(2)
df_type1_analysis['agg_hexes'] = np.where(df_type1_analysis['affluence_tag'] == 'High' ,
                                         df_aff_agg[(df_aff_agg['affluence_tag'] == 'High') & (df_aff_agg['flag'] == 'geo_level_hex_tag')].aff_hex_count.sum(),
                                          df_aff_agg[(df_aff_agg['affluence_tag'] == 'Less') & (df_aff_agg['flag'] == 'geo_level_hex_tag')].aff_hex_count.sum()
                                         )
df_type1_analysis['agg_gross'] = np.where(df_type1_analysis['affluence_tag'] == 'High',
                                         df_aff_agg[(df_aff_agg['affluence_tag'] == 'High') & (df_aff_agg['flag'] == 'geo_level_hex_tag')].aff_gross_orders.sum(),
                                          df_aff_agg[(df_aff_agg['affluence_tag'] == 'Less') & (df_aff_agg['flag'] == 'geo_level_hex_tag')].aff_gross_orders.sum()
                                         )
df_type1_analysis['affluence_hex_distr'] = (df_type1_analysis['no_hexes']*100.0/df_type1_analysis['agg_hexes']).round(2)
df_type1_analysis['affluence_gross_distr'] = (df_type1_analysis['gross_orders']*100.0/df_type1_analysis['agg_gross']).round(2)


df_type1_analysis[['affluence_tag', 'residential', 'no_hexes',
                   'city_hex_distr', 'affluence_hex_distr',
                   'city_gross_distr', 'affluence_gross_distr']] \
.sort_values(['affluence_tag', 'residential'], ascending = False)

Unnamed: 0,affluence_tag,residential,no_hexes,city_hex_distr,affluence_hex_distr,city_gross_distr,affluence_gross_distr
5,Less,residential,160,7.11,13.28,3.55,34.5
4,Less,non-residential,108,4.8,8.96,4.02,39.11
3,Less,Unknown,937,41.64,77.76,2.71,26.39
2,High,residential,490,21.78,46.89,56.96,63.48
1,High,non-residential,222,9.87,21.24,28.74,32.03
0,High,Unknown,333,14.8,31.87,4.02,4.48


In [20]:
df_type1_analysis = df_aff_use_case_type1 \
                            .groupby(['affluence_tag']) \
                            .agg(
                                no_hexes = pd.NamedAgg('pickup_location_hex_8', 'nunique'),
                                gross_orders = pd.NamedAgg('total_gross_orders', 'sum'),
                                net_orders = pd.NamedAgg('total_net_orders', 'sum')
                                ).reset_index()

df_type1_analysis['city_hex_distr'] = (df_type1_analysis['no_hexes']*100.0/df_type1_analysis.no_hexes.sum()).round(2)
df_type1_analysis['city_gross_distr'] = (df_type1_analysis['gross_orders']*100.0/df_type1_analysis.gross_orders.sum()).round(2)
df_type1_analysis['agg_hexes'] = np.where(df_type1_analysis['affluence_tag'] == 'High' ,
                                         df_aff_agg[(df_aff_agg['affluence_tag'] == 'High') & (df_aff_agg['flag'] == 'geo_level_hex_tag')].aff_hex_count.sum(),
                                          df_aff_agg[(df_aff_agg['affluence_tag'] == 'Less') & (df_aff_agg['flag'] == 'geo_level_hex_tag')].aff_hex_count.sum()
                                         )
df_type1_analysis['agg_gross'] = np.where(df_type1_analysis['affluence_tag'] == 'High',
                                         df_aff_agg[(df_aff_agg['affluence_tag'] == 'High') & (df_aff_agg['flag'] == 'geo_level_hex_tag')].aff_gross_orders.sum(),
                                          df_aff_agg[(df_aff_agg['affluence_tag'] == 'Less') & (df_aff_agg['flag'] == 'geo_level_hex_tag')].aff_gross_orders.sum()
                                         )
df_type1_analysis['affluence_hex_distr'] = (df_type1_analysis['no_hexes']*100.0/df_type1_analysis['agg_hexes']).round(2)
df_type1_analysis['affluence_gross_distr'] = (df_type1_analysis['gross_orders']*100.0/df_type1_analysis['agg_gross']).round(2)


df_type1_analysis[['affluence_tag', 'no_hexes',
                   'city_hex_distr', 'affluence_hex_distr',
                   'city_gross_distr', 'affluence_gross_distr']] \
.sort_values(['affluence_tag'], ascending = False)

Unnamed: 0,affluence_tag,no_hexes,city_hex_distr,affluence_hex_distr,city_gross_distr,affluence_gross_distr
1,Less,1205,53.56,100.0,10.28,100.0
0,High,1045,46.44,100.0,89.72,100.0


## Logic

In [21]:
df_aff_use_case_type1['city_geo_flag'] = np.where(df_aff_use_case_type1['usecase'] == 'Unknown', 'Outskirts', 'Intra City')
df_aff_use_case_type1['density_flag'] = np.where(df_aff_use_case_type1['total_gross_orders'] >= df_aff_use_case_type1.total_gross_orders.quantile(0.76) ,  'High', 'Less')

df_aff_use_case_type1

Unnamed: 0,affluence_tag,flag,city_name,pickup_location_hex_8,usecase,usecase_pickup_hex_12,total_pickup_hex_12,accuracy,usecase_gross_orders,total_gross_orders,usecase_net_orders,total_net_orders,residential,hex_8,residence_tag,city_geo_flag,density_flag
4,Less,geo_level_hex_tag,Bangalore,88601459b7fffff,Unknown,41,41,100.00,56,56,12,12,Unknown,88601459b7fffff,Home,Outskirts,Less
5,High,geo_level_hex_tag,Bangalore,886014435bfffff,Unknown,12,12,100.00,18,18,0,0,Unknown,,,Outskirts,Less
6,Less,geo_level_hex_tag,Bangalore,8860144a59fffff,Unknown,1,1,100.00,2,2,0,0,Unknown,,,Outskirts,Less
7,High,geo_level_hex_tag,Bangalore,88601451c1fffff,Unknown,3,3,100.00,5,5,0,0,Unknown,,,Outskirts,Less
8,Less,geo_level_hex_tag,Bangalore,88601451d7fffff,Unknown,77,77,100.00,235,235,55,55,Unknown,88601451d7fffff,Home,Outskirts,Less
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4775,High,geo_level_hex_tag,Bangalore,8860145b05fffff,leisure,372,677,54.95,1427,3529,881,2129,non-residential,8860145b05fffff,Home,Intra City,High
4776,High,geo_level_hex_tag,Bangalore,886189273dfffff,residential,168,252,66.67,764,1328,324,606,residential,886189273dfffff,Home,Intra City,High
4797,High,geo_level_hex_tag,Bangalore,8860144ae5fffff,Unknown,2,2,100.00,2,2,0,0,Unknown,,,Outskirts,Less
4798,High,geo_level_hex_tag,Bangalore,88618920d3fffff,residential,11,11,100.00,27,27,11,11,residential,88618920d3fffff,Home,Intra City,Less


In [22]:
df_type1_analysis = df_aff_use_case_type1 \
                            .groupby(['affluence_tag', 'density_flag']) \
                            .agg(
                                no_hexes = pd.NamedAgg('pickup_location_hex_8', 'nunique'),
                                gross_orders = pd.NamedAgg('total_gross_orders', 'sum'),
                                net_orders = pd.NamedAgg('total_net_orders', 'sum')
                                ).reset_index()

df_type1_analysis['city_hex_distr'] = (df_type1_analysis['no_hexes']*100.0/df_type1_analysis.no_hexes.sum()).round(2)
df_type1_analysis['city_gross_distr'] = (df_type1_analysis['gross_orders']*100.0/df_type1_analysis.gross_orders.sum()).round(2)
df_type1_analysis['agg_hexes'] = np.where(df_type1_analysis['affluence_tag'] == 'High' ,
                                         df_aff_agg[(df_aff_agg['affluence_tag'] == 'High') & (df_aff_agg['flag'] == 'geo_level_hex_tag')].aff_hex_count.sum(),
                                          df_aff_agg[(df_aff_agg['affluence_tag'] == 'Less') & (df_aff_agg['flag'] == 'geo_level_hex_tag')].aff_hex_count.sum()
                                         )
df_type1_analysis['agg_gross'] = np.where(df_type1_analysis['affluence_tag'] == 'High',
                                         df_aff_agg[(df_aff_agg['affluence_tag'] == 'High') & (df_aff_agg['flag'] == 'geo_level_hex_tag')].aff_gross_orders.sum(),
                                          df_aff_agg[(df_aff_agg['affluence_tag'] == 'Less') & (df_aff_agg['flag'] == 'geo_level_hex_tag')].aff_gross_orders.sum()
                                         )
df_type1_analysis['affluence_hex_distr'] = (df_type1_analysis['no_hexes']*100.0/df_type1_analysis['agg_hexes']).round(2)
df_type1_analysis['affluence_gross_distr'] = (df_type1_analysis['gross_orders']*100.0/df_type1_analysis['agg_gross']).round(2)


df_type1_analysis[['affluence_tag', 'density_flag', 'no_hexes',
                   'city_hex_distr', 'affluence_hex_distr',
                   'city_gross_distr', 'affluence_gross_distr']] \
.sort_values(['affluence_tag', 'density_flag'], ascending = False)

Unnamed: 0,affluence_tag,density_flag,no_hexes,city_hex_distr,affluence_hex_distr,city_gross_distr,affluence_gross_distr
3,Less,Less,1138,50.58,94.44,3.99,38.83
2,Less,High,67,2.98,5.56,6.29,61.17
1,High,Less,536,23.82,51.29,3.58,3.99
0,High,High,509,22.62,48.71,86.14,96.01


In [27]:
df_type1_analysis = df_aff_use_case_type1 \
                            .groupby(['affluence_tag', 'density_flag', 'city_geo_flag']) \
                            .agg(
                                no_hexes = pd.NamedAgg('pickup_location_hex_8', 'nunique'),
                                gross_orders = pd.NamedAgg('total_gross_orders', 'sum'),
                                net_orders = pd.NamedAgg('total_net_orders', 'sum')
                                ).reset_index()

df_type1_analysis['city_hex_distr'] = (df_type1_analysis['no_hexes']*100.0/df_type1_analysis.no_hexes.sum()).round(2)
df_type1_analysis['city_gross_distr'] = (df_type1_analysis['gross_orders']*100.0/df_type1_analysis.gross_orders.sum()).round(2)
df_type1_analysis['agg_hexes'] = np.where(df_type1_analysis['affluence_tag'] == 'High' ,
                                         df_aff_agg[(df_aff_agg['affluence_tag'] == 'High') & (df_aff_agg['flag'] == 'geo_level_hex_tag')].aff_hex_count.sum(),
                                          df_aff_agg[(df_aff_agg['affluence_tag'] == 'Less') & (df_aff_agg['flag'] == 'geo_level_hex_tag')].aff_hex_count.sum()
                                         )
df_type1_analysis['agg_gross'] = np.where(df_type1_analysis['affluence_tag'] == 'High',
                                         df_aff_agg[(df_aff_agg['affluence_tag'] == 'High') & (df_aff_agg['flag'] == 'geo_level_hex_tag')].aff_gross_orders.sum(),
                                          df_aff_agg[(df_aff_agg['affluence_tag'] == 'Less') & (df_aff_agg['flag'] == 'geo_level_hex_tag')].aff_gross_orders.sum()
                                         )
df_type1_analysis['affluence_hex_distr'] = (df_type1_analysis['no_hexes']*100.0/df_type1_analysis['agg_hexes']).round(2)
df_type1_analysis['affluence_gross_distr'] = (df_type1_analysis['gross_orders']*100.0/df_type1_analysis['agg_gross']).round(2)


df_type1_analysis[['affluence_tag', 'density_flag', 'city_geo_flag', 'no_hexes',
                   'city_hex_distr', 'affluence_hex_distr',
                   'city_gross_distr', 'affluence_gross_distr']] \
.sort_values(['affluence_tag', 'city_geo_flag'], ascending = False)

Unnamed: 0,affluence_tag,density_flag,city_geo_flag,no_hexes,city_hex_distr,affluence_hex_distr,city_gross_distr,affluence_gross_distr
5,Less,High,Outskirts,10,0.44,0.83,0.49,4.78
7,Less,Less,Outskirts,927,41.2,76.93,2.22,21.61
4,Less,High,Intra City,57,2.53,4.73,5.8,56.39
6,Less,Less,Intra City,211,9.38,17.51,1.77,17.22
1,High,High,Outskirts,31,1.38,2.97,3.23,3.6
3,High,Less,Outskirts,302,13.42,28.9,0.79,0.88
0,High,High,Intra City,478,21.24,45.74,82.91,92.41
2,High,Less,Intra City,234,10.4,22.39,2.79,3.11


In [33]:
df_exp_final = df_aff_use_case_type1[(df_aff_use_case_type1['affluence_tag'] == 'Less') 
                      &
                      (df_aff_use_case_type1['density_flag'] == 'High')
                      &
                      (df_aff_use_case_type1['city_geo_flag'] == 'Intra City')
                     ]
df_exp_final[['city_name', 'affluence_tag', 'pickup_location_hex_8', 'usecase', 'residential']]

Unnamed: 0,city_name,affluence_tag,pickup_location_hex_8,usecase,residential
91,Bangalore,Less,8861892e2dfffff,residential,residential
195,Bangalore,Less,8860145a55fffff,health_and_personal,non-residential
229,Bangalore,Less,8861892dd9fffff,residential,residential
302,Bangalore,Less,8861892dd5fffff,residential,residential
303,Bangalore,Less,8861892eb7fffff,residential,residential
395,Bangalore,Less,886016966dfffff,residential,residential
450,Bangalore,Less,8860169669fffff,office,non-residential
479,Bangalore,Less,8860145a2bfffff,transit_station,non-residential
482,Bangalore,Less,8861892cd9fffff,residential,residential
606,Bangalore,Less,8861892e3bfffff,residential,residential
