In [1]:
import h3 as h3
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from scipy import stats
from pyhive import presto
from keplergl import KeplerGl
from datetime import datetime, timedelta

import warnings
warnings.filterwarnings('ignore')

In [2]:
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 500)

In [3]:
## Connection
connection = presto.connect(
        host='presto-gateway.serving.data.production.internal',
        port=80,
        protocol='http',
        catalog='hive',
        username='manoj.ravirajan@rapido.bike'
)

In [4]:
## Generate date range

start_date = '20230703'
end_date = '20230730'
city = 'Bangalore'

In [5]:
## canonical.clevertap_customer_fare_estimate

home_tag_hex_8 = f"""
       select 
           drop_location_hex_8 hex_8 
        from 
            canonical.clevertap_customer_fare_estimate
        where 
            yyyymmdd>='{start_date}' and yyyymmdd<='{end_date}'
            and current_city='{city}'
            and ((lower(drop_type) like '%home%') or (lower(drop_type) like '%house%') or (lower(drop_type) like '%residence%'))
            and lower(eventprops_dropselectionmode) like '%favourites%'
        group by 1

        union

        select 
            pickup_location_hex_8 hex_8 
        from 
            canonical.clevertap_customer_fare_estimate
        where 
            yyyymmdd>='{start_date}' and yyyymmdd<='{end_date}'
            and current_city='{city}'
            and ((lower(pickup_type) like '%home%') or (lower(pickup_type) like '%house%') or (lower(pickup_type) like '%residence%'))
            and lower(event_props_pickup_selection_mode) like '%favourites%'
        group by 1
"""

df_home_tag_hex_8 = pd.read_sql(home_tag_hex_8, connection)
df_home_tag_hex_8

Unnamed: 0,hex_8
0,886189272bfffff
1,8861892f09fffff
2,886014c915fffff
3,8860145857fffff
4,8860169469fffff
...,...
1695,8861892c33fffff
1696,8861892507fffff
1697,8860145817fffff
1698,8861893737fffff


In [6]:
df_home_tag_hex_8['residence_tag'] = 'Home'
df_home_tag_hex_8.to_clipboard(index=False)

In [7]:
## canonical.clevertap_customer_fare_estimate

city_hex_8 = f"""
       SELECT 
            city_name,
            pickup_location,
            pickup_location_hex_8,
            COUNT(DISTINCT fare_estimate_id) fe_count
        FROM 
        (
        SELECT
            fe_ench.yyyymmdd AS yyyymmdd,
            fe_ench.city AS city_name,
            fe_ench.service_name AS service_name,
            fe_ench.service_detail_id AS service_detail_id,
            pic.cluster AS pickup_location,
            fe_ench.pickup_location_hex_8 AS pickup_location_hex_8,
            fe_ench.fare_estimate_id AS fare_estimate_id
        FROM
            pricing.fare_estimates_enriched fe_ench
            
        LEFT JOIN
                datasets.city_cluster_hex AS pic
                ON fe_ench.pickup_location_hex_8 = pic.hex_id    
            
        WHERE
            fe_ench.yyyymmdd >= '{start_date}'
            AND fe_ench.yyyymmdd <= '{end_date}'
            AND fe_ench.service_name IN ('Link') 
            AND fe_ench.city = '{city}'
        )

        GROUP BY 1,2,3
       
"""

df_city_hex_8 = pd.read_sql(city_hex_8, connection)
df_city_hex_8

Unnamed: 0,city_name,pickup_location,pickup_location_hex_8,fe_count
0,Bangalore,South Bidadi,88601450e1fffff,12
1,Bangalore,Bidadi,886014420dfffff,71
2,Bangalore,Nelamangala,8860144a09fffff,3
3,Bangalore,Huskur,8861892743fffff,903
4,Bangalore,Bannerghatta Zoo,886014c911fffff,71
...,...,...,...,...
3075,Bangalore,Kurudusonnehalli,8861892f59fffff,14978
3076,Bangalore,Peenya,8860145827fffff,5989
3077,Bangalore,Arekere Extension,8861892417fffff,40064
3078,Bangalore,Mallasandra,8860145321fffff,1644


In [8]:
df_city_hex_8.fe_count.describe()

count      3080.000000
mean       8887.715260
std       24398.512898
min           1.000000
25%          26.000000
50%         234.500000
75%        5327.500000
max      343881.000000
Name: fe_count, dtype: float64

In [9]:
df_home_tag_merge = pd.merge(
                                df_city_hex_8,
                                df_home_tag_hex_8,
                                how = 'left',
                                left_on = ['pickup_location_hex_8'],
                                right_on = ['hex_8']
                            )
df_home_tag_merge

Unnamed: 0,city_name,pickup_location,pickup_location_hex_8,fe_count,hex_8,residence_tag
0,Bangalore,South Bidadi,88601450e1fffff,12,,
1,Bangalore,Bidadi,886014420dfffff,71,,
2,Bangalore,Nelamangala,8860144a09fffff,3,,
3,Bangalore,Huskur,8861892743fffff,903,8861892743fffff,Home
4,Bangalore,Bannerghatta Zoo,886014c911fffff,71,,
...,...,...,...,...,...,...
3075,Bangalore,Kurudusonnehalli,8861892f59fffff,14978,8861892f59fffff,Home
3076,Bangalore,Peenya,8860145827fffff,5989,8860145827fffff,Home
3077,Bangalore,Arekere Extension,8861892417fffff,40064,8861892417fffff,Home
3078,Bangalore,Mallasandra,8860145321fffff,1644,8860145321fffff,Home


In [16]:
df_home_tag_merge['home_tag'] = np.where(df_home_tag_merge['pickup_location_hex_8'] == df_home_tag_merge['hex_8'], 
                                          'yes',
                                          'no'
                                         )
df_home_tag_merge

Unnamed: 0,city_name,pickup_location,pickup_location_hex_8,fe_count,hex_8,residence_tag,home_tag
0,Bangalore,South Bidadi,88601450e1fffff,12,,,no
1,Bangalore,Bidadi,886014420dfffff,71,,,no
2,Bangalore,Nelamangala,8860144a09fffff,3,,,no
3,Bangalore,Huskur,8861892743fffff,903,8861892743fffff,Home,yes
4,Bangalore,Bannerghatta Zoo,886014c911fffff,71,,,no
...,...,...,...,...,...,...,...
3075,Bangalore,Kurudusonnehalli,8861892f59fffff,14978,8861892f59fffff,Home,yes
3076,Bangalore,Peenya,8860145827fffff,5989,8860145827fffff,Home,yes
3077,Bangalore,Arekere Extension,8861892417fffff,40064,8861892417fffff,Home,yes
3078,Bangalore,Mallasandra,8860145321fffff,1644,8860145321fffff,Home,yes


In [17]:
df_hex_affluence_tag = pd.read_csv('/Users/rapido/local-datasets/affluence/main/hex_affluence_tag.csv')
df_hex_affluence_tag =df_hex_affluence_tag[['city_name', 'pickup_hex_8', 'affluence_tag']]
df_hex_affluence_tag

Unnamed: 0,city_name,pickup_hex_8,affluence_tag
0,Bangalore,88618920a3fffff,High
1,Bangalore,8861892581fffff,High
2,Bangalore,886189258bfffff,High
3,Bangalore,886189258dfffff,High
4,Bangalore,8861892425fffff,High
...,...,...,...
2680,Bangalore,8860145955fffff,High
2681,Bangalore,8860145957fffff,High
2682,Bangalore,8860145959fffff,High
2683,Bangalore,886014595bfffff,High


In [12]:
df_home_aff_merge = pd.merge(
                                df_home_tag_merge,
                                df_hex_affluence_tag,
                                how = 'left',
                                left_on = ['pickup_location_hex_8'],
                                right_on = ['pickup_hex_8']
                            )
df_home_aff_merge

Unnamed: 0,city_name_x,pickup_location,pickup_location_hex_8,fe_count,hex_8,residence_tag,home_tag,city_name_y,pickup_hex_8,affluence_tag
0,Bangalore,South Bidadi,88601450e1fffff,12,,,no,Bangalore,88601450e1fffff,Less
1,Bangalore,Bidadi,886014420dfffff,71,,,no,Bangalore,886014420dfffff,Less
2,Bangalore,Nelamangala,8860144a09fffff,3,,,no,,,
3,Bangalore,Huskur,8861892743fffff,903,8861892743fffff,Home,yes,Bangalore,8861892743fffff,Less
4,Bangalore,Bannerghatta Zoo,886014c911fffff,71,,,no,Bangalore,886014c911fffff,Less
...,...,...,...,...,...,...,...,...,...,...
3075,Bangalore,Kurudusonnehalli,8861892f59fffff,14978,8861892f59fffff,Home,yes,Bangalore,8861892f59fffff,High
3076,Bangalore,Peenya,8860145827fffff,5989,8860145827fffff,Home,yes,Bangalore,8860145827fffff,Less
3077,Bangalore,Arekere Extension,8861892417fffff,40064,8861892417fffff,Home,yes,Bangalore,8861892417fffff,High
3078,Bangalore,Mallasandra,8860145321fffff,1644,8860145321fffff,Home,yes,Bangalore,8860145321fffff,Less


In [19]:
df_home_aff_merge[['hex_8', 'residence_tag']][df_home_aff_merge['hex_8'].notnull()]

Unnamed: 0,hex_8,residence_tag
3,8861892743fffff,Home
7,88618920d7fffff,Home
9,8860169157fffff,Home
10,886016974bfffff,Home
12,8861892509fffff,Home
...,...,...
3074,88618925c7fffff,Home
3075,8861892f59fffff,Home
3076,8860145827fffff,Home
3077,8861892417fffff,Home


In [13]:
df_home_tag = df_home_aff_merge \
                    .groupby(['affluence_tag', 'home_tag']) \
                    .agg(
                        hex_count = pd.NamedAgg('pickup_location_hex_8', 'nunique'),
                        fe_count = pd.NamedAgg('fe_count', 'sum')
                        ).reset_index()
df_home_tag['hex_distribution'] = df_home_tag['hex_count']*100.0/df_home_tag.hex_count.sum()
df_home_tag['fe_distribution'] = df_home_tag['fe_count']*100.0/df_home_tag.fe_count.sum()

df_home_tag.round(2)

Unnamed: 0,affluence_tag,home_tag,hex_count,fe_count,hex_distribution,fe_distribution
0,High,no,255,16994,9.5,0.06
1,High,yes,882,24072796,32.85,88.01
2,Less,no,790,76757,29.42,0.28
3,Less,yes,758,3186155,28.23,11.65


##### fe_contribution 99.66 %

In [14]:
## canonical.clevertap_customer_fare_estimate

home_tag_customers = f"""
        select 
            current_city city,
            user_id customer_id, 
            case 
            when ((lower(drop_type) like '%home%') or (lower(drop_type) like '%house%') or (lower(drop_type) like '%residence%')) and lower(eventprops_dropselectionmode) like '%favourites%' 
            then user_id end as home_tag_customers
        from 
            canonical.clevertap_customer_fare_estimate
        where 
            yyyymmdd>='{start_date}' and yyyymmdd<='{end_date}'
            and current_city='{city}'
        group by 1,2,3

        union

        select 
            current_city city,
            user_id customer_id,
            case 
            when ((lower(pickup_type) like '%home%') or (lower(pickup_type) like '%house%') or (lower(pickup_type) like '%residence%')) and lower(event_props_pickup_selection_mode) like '%favourites%'
            then user_id end as home_tag_customers

        from 
            canonical.clevertap_customer_fare_estimate
        where 
            yyyymmdd>='{start_date}' and yyyymmdd<='{end_date}'
            and current_city='{city}'
        group by 1,2,3
"""

df_home_tag_customers = pd.read_sql(home_tag_customers, connection)
df_home_tag_customers

Unnamed: 0,city,customer_id,home_tag_customers
0,Bangalore,6406e43501c25d0a382dfbec,6406e43501c25d0a382dfbec
1,Bangalore,630a025fe3fa3c5db753fca3,
2,Bangalore,63b65ff38ec1984c2293c44b,
3,Bangalore,64b7883bd7d8b46802b93d81,
4,Bangalore,6235f6508689dbb7b1804cc9,6235f6508689dbb7b1804cc9
...,...,...,...
2178068,Bangalore,62916f7c0b92511f02bb8a65,
2178069,Bangalore,62b31278d7ecef3ec60f0ade,
2178070,Bangalore,5d6772b6d0286d106d6a94bf,
2178071,Bangalore,5d637f90d0286d106d64d03a,


In [15]:
df_home_tag_customers['date_range'] = '2023-July'
df_home_tag_customers_coverage =  df_home_tag_customers \
                                            .groupby(['date_range']) \
                                            .agg(total_fe_customers = pd.NamedAgg('customer_id', 'nunique'),
                                                 home_tag_customers = pd.NamedAgg('home_tag_customers' , 'nunique')
                                                ) \
                                            .reset_index()
df_home_tag_customers_coverage['coverage_percentage'] = df_home_tag_customers_coverage['home_tag_customers']*100.0/df_home_tag_customers_coverage['total_fe_customers']
df_home_tag_customers_coverage.round(2)

Unnamed: 0,date_range,total_fe_customers,home_tag_customers,coverage_percentage
0,2023-July,1987664,190447,9.58
