## <center> Packages & Connection </center>

In [1]:
import h3 as h3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy import stats
from pyhive import presto
from keplergl import KeplerGl
from datetime import datetime, timedelta

import warnings
warnings.filterwarnings('ignore')

In [2]:
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 500)

## <center> Local extracted files </center>

In [3]:
refined_hex_8_all_data = pd.read_csv('hex_8_all_data_2023_07_10_to_16.csv')
refined_hex_8_hsr_yesh = pd.read_csv('hex_8_data_2023_07_10_to_16.csv')

In [4]:
print(refined_hex_8_all_data.shape)
print(refined_hex_8_hsr_yesh.shape)

(4032, 29)
(30, 29)


In [5]:
hex_8_all_data = refined_hex_8_all_data[refined_hex_8_all_data['service_name'].isin(['Link'])]
hex_8_all_data.columns

Index(['city', 'service_name', 'pickup_cluster', 'pickup_hex_8',
       'fe_cus_count', 'fe_count', 'requested_orders', 'cobrm',
       'expiry_mapped', 'cobra', 'accepted_orders', 'ocara', 'net_orders',
       'aor', 'fe2rr', 'fe2net', 'taxi_high_income', 'taxi_medium_income',
       'taxi_low_income', 'link_only_service', 'auto_only_service',
       'both_service', 'link_ps', 'link_nps', 'auto_ps', 'auto_nps',
       'fe_intent_stable', 'fe_intent_increasing', 'fe_intent_declining'],
      dtype='object')

In [6]:
hex_8_all_data.describe()

Unnamed: 0,fe_cus_count,fe_count,requested_orders,cobrm,expiry_mapped,cobra,accepted_orders,ocara,net_orders,aor,fe2rr,fe2net,taxi_high_income,taxi_medium_income,taxi_low_income,link_only_service,auto_only_service,both_service,link_ps,link_nps,auto_ps,auto_nps,fe_intent_stable,fe_intent_increasing,fe_intent_declining
count,1999.0,1999.0,1999.0,1999.0,1999.0,1999.0,1999.0,1999.0,1999.0,1998.0,1999.0,1999.0,1999.0,1999.0,1999.0,1999.0,1999.0,1999.0,1999.0,1999.0,1999.0,1999.0,1999.0,1999.0,1999.0
mean,1025.482741,3759.492246,675.464232,2.057529,40.968984,131.326663,502.478739,111.851426,271.029515,62.354354,18.061531,5.417709,404.630815,272.687844,51.189095,98.815408,144.536268,645.574287,139.452726,245.38019,221.26013,363.391696,337.763882,403.431216,170.871436
std,2103.330224,8212.575206,1432.721151,3.915677,120.463644,314.321455,1026.933039,232.362412,557.164357,29.021819,8.953372,3.944067,894.232561,537.595619,96.525761,189.521964,292.914341,1383.512775,281.119674,520.950135,486.036235,773.491357,678.963273,848.749936,377.831765
min,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,22.0,62.5,8.0,0.0,1.0,1.0,4.0,1.0,1.0,50.0,14.0,2.0,5.0,5.0,1.0,3.0,2.0,8.0,2.0,3.0,2.0,4.0,5.0,6.0,2.0
50%,143.0,453.0,78.0,1.0,5.0,13.0,57.0,16.0,26.0,73.0,18.0,6.0,43.0,39.0,8.0,19.0,18.0,70.0,18.0,28.0,21.0,40.0,43.0,49.0,18.0
75%,1090.5,3643.0,675.5,3.0,21.0,101.0,537.0,118.0,287.0,82.0,21.0,8.0,391.0,308.5,61.0,118.0,152.5,655.5,151.0,244.5,211.0,364.0,367.5,415.0,161.0
max,25922.0,100666.0,14122.0,77.0,1368.0,2937.0,10310.0,2898.0,5596.0,100.0,100.0,50.0,12122.0,6695.0,1232.0,2966.0,3669.0,17808.0,3152.0,6463.0,6398.0,10067.0,8480.0,10943.0,4981.0


## <center> Adding new required columns </center>

In [7]:
hex_8_all_data['high_income_%'] = round(hex_8_all_data['taxi_high_income']*100/hex_8_all_data['fe_cus_count'])
hex_8_all_data['high_income_thrshld'] = hex_8_all_data['high_income_%'].median()

hex_8_all_data['link_only_service_%'] = round(hex_8_all_data['link_only_service']*100/hex_8_all_data['fe_cus_count'])
hex_8_all_data['auto_only_service_%'] = round(hex_8_all_data['auto_only_service']*100/hex_8_all_data['fe_cus_count'])


hex_8_all_data['affluence_hi_tag'] = np.where(
                                        hex_8_all_data['high_income_%'] <= hex_8_all_data['high_income_thrshld'] , 
                                        'Less', 
                                        'High')
hex_8_all_data['service_aff_tag'] = np.where(
                                        hex_8_all_data['link_only_service_%'] > hex_8_all_data['auto_only_service_%'] , 
                                        'Yes', 
                                        'No')

In [8]:
hex_8_all_data.head(5)

Unnamed: 0,city,service_name,pickup_cluster,pickup_hex_8,fe_cus_count,fe_count,requested_orders,cobrm,expiry_mapped,cobra,accepted_orders,ocara,net_orders,aor,fe2rr,fe2net,taxi_high_income,taxi_medium_income,taxi_low_income,link_only_service,auto_only_service,both_service,link_ps,link_nps,auto_ps,auto_nps,fe_intent_stable,fe_intent_increasing,fe_intent_declining,high_income_%,high_income_thrshld,link_only_service_%,auto_only_service_%,affluence_hi_tag,service_aff_tag
0,Bangalore,Link,Hoskote_BLR,8861893893fffff,40,48,2,0,0,0,0,0,0,0.0,4.0,0.0,10,8,0,5,7,15,4,6,3,7,10,11,4,25.0,32.0,12.0,18.0,Less,No
2,Bangalore,Link,Attibele,8861893713fffff,112,268,21,2,1,10,8,4,2,38.0,8.0,1.0,23,20,5,12,7,35,7,9,13,13,25,24,4,21.0,32.0,11.0,6.0,Less,Yes
3,Bangalore,Link,Attibele,88618936a7fffff,4,22,2,0,0,2,0,0,0,0.0,9.0,0.0,1,1,0,1,0,2,0,0,0,1,1,2,0,25.0,32.0,25.0,0.0,Less,Yes
5,Bangalore,Link,Sarjapur,88618935ebfffff,11,57,9,4,0,0,4,2,2,44.0,16.0,4.0,5,1,2,1,1,7,1,3,0,2,2,4,3,45.0,32.0,9.0,9.0,High,No
7,Bangalore,Link,Sarjapur,88618935e9fffff,1,1,1,0,1,0,1,0,0,100.0,100.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,32.0,0.0,0.0,Less,No


In [16]:
## Filtering required columns

df_hex_8_all_data = hex_8_all_data [[
                                        'city', 'pickup_cluster', 'pickup_hex_8', 
                                        'fe_count', 'requested_orders', 'net_orders',
                                        'high_income_%', 'high_income_thrshld',
                                        'link_only_service_%',  'auto_only_service_%',
                                        'affluence_hi_tag', 'service_aff_tag'
                                    ]]

df_hex_8_all_data['flag'] = np.where(
                                    df_hex_8_all_data['affluence_hi_tag'].isin(['Less'])
                                    &
                                    df_hex_8_all_data['service_aff_tag'].isin(['Yes']),
                                    'Low',
                                    'High'
                                    )

In [17]:
## Final data

df_hex_8_all_data

Unnamed: 0,city,pickup_cluster,pickup_hex_8,fe_count,requested_orders,net_orders,high_income_%,high_income_thrshld,link_only_service_%,auto_only_service_%,affluence_hi_tag,service_aff_tag,flag
0,Bangalore,Hoskote_BLR,8861893893fffff,48,2,0,25.0,32.0,12.0,18.0,Less,No,High
2,Bangalore,Attibele,8861893713fffff,268,21,2,21.0,32.0,11.0,6.0,Less,Yes,Low
3,Bangalore,Attibele,88618936a7fffff,22,2,0,25.0,32.0,25.0,0.0,Less,Yes,Low
5,Bangalore,Sarjapur,88618935ebfffff,57,9,2,45.0,32.0,9.0,9.0,High,No,High
7,Bangalore,Sarjapur,88618935e9fffff,1,1,0,0.0,32.0,0.0,0.0,Less,No,High
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4017,Bangalore,Bidadi,886014413dfffff,12,1,0,60.0,32.0,0.0,20.0,High,No,High
4021,Bangalore,Bidadi,8860144135fffff,60,1,0,34.0,32.0,7.0,76.0,High,No,High
4022,Bangalore,Bidadi,8860144125fffff,15,2,0,0.0,32.0,0.0,0.0,Less,No,High
4024,Bangalore,Bidadi,8860144113fffff,3,2,0,0.0,32.0,0.0,0.0,Less,No,High


In [18]:
cluster_high_low = df_hex_8_all_data.groupby(['pickup_cluster','affluence_hi_tag', 'service_aff_tag']) \
                                            .agg({ 'pickup_hex_8' : 'nunique'}).reset_index()
cluster_high_low

Unnamed: 0,pickup_cluster,affluence_hi_tag,service_aff_tag,pickup_hex_8
0,AECS Layout Brookefield,High,No,1
1,Adugodi,High,No,2
2,Adugodi Traffic Station,High,No,1
3,Agara Lake,High,No,2
4,Akshaynagar,High,No,4
5,Akshaynagar Lake,High,No,1
6,Anepalya,High,No,1
7,Anepalya,Less,No,1
8,Anjanapura,High,No,6
9,Anjanapura,High,Yes,1


In [19]:
df_hex_8_all_data.groupby(['flag']) \
                        .agg({ 'pickup_hex_8' : 'nunique', 
                               'fe_count' : 'sum', 
                               'requested_orders' : 'sum',
                               'net_orders' : 'sum'
                             }).reset_index()

Unnamed: 0,flag,pickup_hex_8,fe_count,requested_orders,net_orders
0,High,1469,7080046,1275902,513905
1,Low,530,435179,74351,27883
