In [1]:
import h3 as h3
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from scipy import stats
from pyhive import presto
from keplergl import KeplerGl
from datetime import datetime, timedelta

import warnings
warnings.filterwarnings('ignore')

In [2]:
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 500)

In [3]:
## Connection
connection = presto.connect(
        host='presto-gateway.serving.data.production.internal',
        port=80,
        protocol='http',
        catalog='hive',
        username='manoj.ravirajan@rapido.bike'
)

In [8]:
## Generate date range

start_date = '20230703'
end_date = '20230730'
city = 'Bangalore'

In [9]:
## canonical.clevertap_customer_fare_estimate

home_tag_hex_8 = f"""
       select 
           drop_location_hex_8 hex_8 
        from 
            canonical.clevertap_customer_fare_estimate
        where 
            yyyymmdd>='{start_date}' and yyyymmdd<='{end_date}'
            and current_city='{city}'
            and ((lower(drop_type) like '%home%') or (lower(drop_type) like '%house%') or (lower(drop_type) like '%residence%'))
            and lower(eventprops_dropselectionmode) like '%favourites%'
        group by 1

        union

        select 
            pickup_location_hex_8 hex_8 
        from 
            canonical.clevertap_customer_fare_estimate
        where 
            yyyymmdd>='{start_date}' and yyyymmdd<='{end_date}'
            and current_city='{city}'
            and ((lower(pickup_type) like '%home%') or (lower(pickup_type) like '%house%') or (lower(pickup_type) like '%residence%'))
            and lower(event_props_pickup_selection_mode) like '%favourites%'
        group by 1
"""

df_home_tag_hex_8 = pd.read_sql(home_tag_hex_8, connection)
df_home_tag_hex_8

Unnamed: 0,hex_8
0,8861892c6dfffff
1,8860144b4bfffff
2,8861892e95fffff
3,886014511dfffff
4,886016955bfffff
...,...
1695,8861892433fffff
1696,8861892437fffff
1697,8861892dc9fffff
1698,8860145169fffff


In [30]:
df_home_tag_hex_8['residence_tag'] = 'Home'
df_home_tag_hex_8.to_clipboard(index=False)

In [14]:
## canonical.clevertap_customer_fare_estimate

city_hex_8 = f"""
       SELECT 
            city_name,
            pickup_location,
            pickup_location_hex_8,
            COUNT(DISTINCT fare_estimate_id) fe_count
        FROM 
        (
        SELECT
            fe_ench.yyyymmdd AS yyyymmdd,
            fe_ench.city AS city_name,
            fe_ench.service_name AS service_name,
            fe_ench.service_detail_id AS service_detail_id,
            pic.cluster AS pickup_location,
            fe_ench.pickup_location_hex_8 AS pickup_location_hex_8,
            fe_ench.fare_estimate_id AS fare_estimate_id
        FROM
            pricing.fare_estimates_enriched fe_ench
            
        LEFT JOIN
                datasets.city_cluster_hex AS pic
                ON fe_ench.pickup_location_hex_8 = pic.hex_id    
            
        WHERE
            fe_ench.yyyymmdd >= '{start_date}'
            AND fe_ench.yyyymmdd <= '{end_date}'
            AND fe_ench.service_name IN ('Link') 
            AND fe_ench.city = '{city}'
        )

        GROUP BY 1,2,3
       
"""

df_city_hex_8 = pd.read_sql(city_hex_8, connection)
df_city_hex_8

Unnamed: 0,city_name,pickup_location,pickup_location_hex_8,fe_count
0,Bangalore,Mallasandra,8860145149fffff,3689
1,Bangalore,Attibele,88618934e5fffff,2714
2,Bangalore,Doddaballapura,8860169707fffff,283
3,Bangalore,Nagasandra,88601459d1fffff,761
4,Bangalore,Chikkabanavara,88601459bdfffff,658
...,...,...,...,...
3075,Bangalore,Majestic,8860145b55fffff,280560
3076,Bangalore,Chikkagobbi,8861892d11fffff,195
3077,Bangalore,Hoskote_BLR,8861892149fffff,141
3078,Bangalore,Bidadi,8860144155fffff,18


In [16]:
df_city_hex_8.fe_count.describe()

count      3080.000000
mean       8887.715260
std       24398.512898
min           1.000000
25%          26.000000
50%         234.500000
75%        5327.500000
max      343881.000000
Name: fe_count, dtype: float64

In [18]:
df_home_tag_merge = pd.merge(
                                df_city_hex_8,
                                df_home_tag_hex_8,
                                how = 'left',
                                left_on = ['pickup_location_hex_8'],
                                right_on = ['hex_8']
                            )
df_home_tag_merge

Unnamed: 0,city_name,pickup_location,pickup_location_hex_8,fe_count,hex_8
0,Bangalore,Mallasandra,8860145149fffff,3689,8860145149fffff
1,Bangalore,Attibele,88618934e5fffff,2714,88618934e5fffff
2,Bangalore,Doddaballapura,8860169707fffff,283,8860169707fffff
3,Bangalore,Nagasandra,88601459d1fffff,761,88601459d1fffff
4,Bangalore,Chikkabanavara,88601459bdfffff,658,88601459bdfffff
...,...,...,...,...,...
3075,Bangalore,Majestic,8860145b55fffff,280560,8860145b55fffff
3076,Bangalore,Chikkagobbi,8861892d11fffff,195,
3077,Bangalore,Hoskote_BLR,8861892149fffff,141,
3078,Bangalore,Bidadi,8860144155fffff,18,


In [19]:
df_home_tag_merge['home_tag'] = np.where(df_home_tag_merge['pickup_location_hex_8'] == df_home_tag_merge['hex_8'], 
                                          'yes',
                                          'no'
                                         )
df_home_tag_merge

Unnamed: 0,city_name,pickup_location,pickup_location_hex_8,fe_count,hex_8,home_tag
0,Bangalore,Mallasandra,8860145149fffff,3689,8860145149fffff,yes
1,Bangalore,Attibele,88618934e5fffff,2714,88618934e5fffff,yes
2,Bangalore,Doddaballapura,8860169707fffff,283,8860169707fffff,yes
3,Bangalore,Nagasandra,88601459d1fffff,761,88601459d1fffff,yes
4,Bangalore,Chikkabanavara,88601459bdfffff,658,88601459bdfffff,yes
...,...,...,...,...,...,...
3075,Bangalore,Majestic,8860145b55fffff,280560,8860145b55fffff,yes
3076,Bangalore,Chikkagobbi,8861892d11fffff,195,,no
3077,Bangalore,Hoskote_BLR,8861892149fffff,141,,no
3078,Bangalore,Bidadi,8860144155fffff,18,,no


In [20]:
df_hex_affluence_tag = pd.read_csv('/Users/rapido/local-datasets/affluence/main/hex_affluence_tag.csv')
df_hex_affluence_tag =df_hex_affluence_tag[['city_name', 'pickup_hex_8', 'affluence_tag']]
df_hex_affluence_tag

Unnamed: 0,city_name,pickup_hex_8,affluence_tag
0,Bangalore,88618920a3fffff,High
1,Bangalore,8861892581fffff,High
2,Bangalore,886189258bfffff,High
3,Bangalore,886189258dfffff,High
4,Bangalore,8861892425fffff,High
...,...,...,...
2680,Bangalore,8860145955fffff,High
2681,Bangalore,8860145957fffff,High
2682,Bangalore,8860145959fffff,High
2683,Bangalore,886014595bfffff,High


In [21]:
df_home_aff_merge = pd.merge(
                                df_home_tag_merge,
                                df_hex_affluence_tag,
                                how = 'left',
                                left_on = ['pickup_location_hex_8'],
                                right_on = ['pickup_hex_8']
                            )
df_home_aff_merge

Unnamed: 0,city_name_x,pickup_location,pickup_location_hex_8,fe_count,hex_8,home_tag,city_name_y,pickup_hex_8,affluence_tag
0,Bangalore,Mallasandra,8860145149fffff,3689,8860145149fffff,yes,Bangalore,8860145149fffff,High
1,Bangalore,Attibele,88618934e5fffff,2714,88618934e5fffff,yes,Bangalore,88618934e5fffff,Less
2,Bangalore,Doddaballapura,8860169707fffff,283,8860169707fffff,yes,Bangalore,8860169707fffff,Less
3,Bangalore,Nagasandra,88601459d1fffff,761,88601459d1fffff,yes,Bangalore,88601459d1fffff,High
4,Bangalore,Chikkabanavara,88601459bdfffff,658,88601459bdfffff,yes,Bangalore,88601459bdfffff,Less
...,...,...,...,...,...,...,...,...,...
3075,Bangalore,Majestic,8860145b55fffff,280560,8860145b55fffff,yes,Bangalore,8860145b55fffff,Less
3076,Bangalore,Chikkagobbi,8861892d11fffff,195,,no,Bangalore,8861892d11fffff,Less
3077,Bangalore,Hoskote_BLR,8861892149fffff,141,,no,Bangalore,8861892149fffff,Less
3078,Bangalore,Bidadi,8860144155fffff,18,,no,,,


In [27]:
df_home_tag = df_home_aff_merge \
                    .groupby(['affluence_tag', 'home_tag']) \
                    .agg(
                        hex_count = pd.NamedAgg('pickup_location_hex_8', 'nunique'),
                        fe_count = pd.NamedAgg('fe_count', 'sum')
                        ).reset_index()
df_home_tag['hex_distribution'] = df_home_tag['hex_count']*100.0/df_home_tag.hex_count.sum()
df_home_tag['fe_distribution'] = df_home_tag['fe_count']*100.0/df_home_tag.fe_count.sum()

df_home_tag.round(2)

Unnamed: 0,affluence_tag,home_tag,hex_count,fe_count,hex_distribution,fe_distribution
0,High,no,255,16994,9.5,0.06
1,High,yes,882,24072796,32.85,88.01
2,Less,no,790,76757,29.42,0.28
3,Less,yes,758,3186155,28.23,11.65


##### fe_contribution 99.66 %