In [1]:
import h3 as h3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy import stats
from pyhive import presto
from keplergl import KeplerGl
from datetime import datetime, timedelta

import warnings
warnings.filterwarnings('ignore')

In [2]:
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 1000)

## Connection

In [3]:
## Connection
connection = presto.connect(
        host='presto-gateway.serving.data.production.internal',
        port=80,
        protocol='http',
        catalog='hive',
        username='manoj.ravirajan@rapido.bike'
)

## Dataset

In [4]:
## Generate date range

start_date = datetime(2023, 10, 1)  
end_date = datetime(2023, 10, 5)
startdate = '20231001'
enddate = '20231005'
city = 'Lucknow'
service = 'Link'
date_range = [start_date + timedelta(days=x) for x in range((end_date - start_date).days + 1)]

In [5]:
## datasets.service_mapping

service_mapping = f"""
        SELECT 
            city_display_name AS city,
            service_level AS service_name,
            service_detail_id,
            city_id,
            service_id
        FROM 
            datasets.service_mapping
        WHERE 
            city_display_name = '{city}'
            AND service_level = '{service}'
"""

df_service_mapping = pd.read_sql(service_mapping, connection)
service_detail_id = df_service_mapping.service_detail_id.loc[0]
service_detail_id

'5c1248c10241913903a42fd6'

In [None]:
## pricing.fare_estimates_enriched & Order_logs_snapshot

fe_orders_data = []

for date in date_range:
    date_value = date.strftime('%Y%m%d')
    query = f""" 
                WITH city_cluster_hex AS (

                        SELECT
                            cch.hex_id AS hex_id,
                            cch.cluster AS cluster
                        FROM
                            datasets.city_cluster_hex cch
                        WHERE
                            cch.resolution = 8
                            AND cch.cluster != ''
                    ),

                    fare_estimates AS (

                        SELECT
                            fe_ench.yyyymmdd AS yyyymmdd,
                            fe_ench.quarter_hour AS quarter_hour,
                            CASE 
                            WHEN CAST(SUBSTR(fe_ench.quarter_hour, 1,2) AS INT) >= 8 AND CAST(SUBSTR(fe_ench.quarter_hour, 1,2) AS INT) <= 11 THEN '1.Morning Peak'
                            WHEN CAST(SUBSTR(fe_ench.quarter_hour, 1,2) AS INT) >= 17 AND CAST(SUBSTR(fe_ench.quarter_hour, 1,2) AS INT) <= 21 THEN '3.Evening Peak'
                            WHEN CAST(SUBSTR(fe_ench.quarter_hour, 1,2) AS INT) > 11 AND CAST(SUBSTR(fe_ench.quarter_hour, 1,2) AS INT) < 17 THEN '2.Afternoon'
                            ELSE '4.Rest' END AS time_period,
                            CAST(SUBSTR(fe_ench.quarter_hour, 1,2) AS INT) AS hour,
                            fe_ench.city AS city_name,
                            fe_ench.service_name AS service_name,
                            fe_ench.service_detail_id AS service_detail_id,
                            fe_ench.pickup_location_hex_8 AS pickup_location_hex_8,
                            fe_ench.fare_estimate_id AS fare_estimate_id,
                            fe_ench.user_id  AS customer_id

                        FROM
                            pricing.fare_estimates_enriched fe_ench
                        WHERE
                            fe_ench.yyyymmdd = '{date_value}'
                            AND fe_ench.service_detail_id IN
                                ('5c1248c10241913903a42fd6', '649564c80b573a42c10a4a44')
                    ),

                    orders AS (

                        SELECT
                            ols.yyyymmdd AS yyyymmdd,
                            ols.service_detail_id AS service_detail_id,
                            ols.customer_id AS customer_id,
                            ols.estimate_id AS fare_estimate_id,
                            ols.order_status AS order_status,
                            ols.order_id AS order_id,
                            ols.spd_fraud_flag AS spd_fraud_flag,
                            ols.discount AS discount,
                            ols.sub_total AS sub_total,
                            ols.rate_card_amount AS rate_card_amount,
                            ols.accept_to_pickup_distance  AS accept_to_pickup_distance,
                            ROW_NUMBER() OVER (PARTITION BY ols.order_id ORDER BY ols.updated_epoch DESC) AS row_number,
                            CASE 
                            WHEN ols.order_status IN ('dropped') AND ols.spd_fraud_flag != True THEN 'net_orders'
                            WHEN ols.order_status IN ('customerCancelled') AND ols.cancel_reason IN ('order cancelled before rider accepted') THEN 'cobra'
                            WHEN ols.order_status IN ('customerCancelled') AND ols.cancel_reason IN ('Order cancelled before rider was mapped') THEN 'cobrm'
                            WHEN ols.order_status IN ('customerCancelled') AND ols.cancel_reason NOT IN ('order cancelled before rider accepted', 'Order cancelled before rider was mapped') THEN 'ocara'
                            WHEN ols.order_status IN ('expired') AND length(ols.map_riders) < 28 THEN 'stockout'
                            WHEN ols.order_status IN ('expired') AND length(ols.map_riders) >= 28 THEN 'expiry_mapped' 
                            ELSE 'Other' 
                            END AS order_state

                        FROM
                            orders.order_logs_snapshot ols
                        WHERE
                            ols.yyyymmdd = '{date_value}'
                            AND ols.service_detail_id IN
                                ('5c1248c10241913903a42fd6', '649564c80b573a42c10a4a44')
                    )


                        SELECT 
                            fe.yyyymmdd AS yyyymmdd,
                            fe.city_name AS city_name,
                            fe.service_name AS service_name,
                            fe.service_detail_id AS service_detail_id,
                            pic.cluster AS pickup_location,
                            fe.pickup_location_hex_8 AS pickup_hex_8,
                            fe.customer_id AS fe_customer_id,
                            COUNT(DISTINCT fe.fare_estimate_id) AS fe_count,
                            COUNT(DISTINCT order_id) AS requested_orders,
                            COUNT(DISTINCT CASE WHEN order_status = 'dropped' AND spd_fraud_flag != true THEN order_id END) net_orders,

                            COUNT(DISTINCT CASE WHEN order_state IN ('cobra') AND row_number = 1 THEN order_id END) AS cobra,
                            COUNT(DISTINCT CASE WHEN order_state IN ('ocara') AND row_number = 1 THEN order_id END) AS ocara,
                            COUNT(DISTINCT CASE WHEN accept_to_pickup_distance > 0 THEN order_id END) AS accepted_orders,
                            COUNT(DISTINCT CASE WHEN order_state IN ('cobrm') AND row_number = 1 THEN order_id END) AS cobrm,
                            COUNT(DISTINCT CASE WHEN order_state IN ('stockout') AND row_number = 1 THEN order_id END) AS stockout,
                            COUNT(DISTINCT CASE WHEN order_state IN ('expiry_mapped') AND row_number = 1 THEN order_id END) AS expiry_mapped

                        FROM
                            fare_estimates fe
                        LEFT JOIN
                            city_cluster_hex pic
                            ON fe.pickup_location_hex_8 = pic.hex_id

                        LEFT JOIN
                            orders ord
                            ON fe.yyyymmdd = ord.yyyymmdd
                            AND fe.service_detail_id = ord.service_detail_id
                            AND fe.fare_estimate_id = ord.fare_estimate_id

                        GROUP BY 1,2,3,4,5,6,7

            """
    df_temp = pd.read_sql(query, connection)
    fe_orders_data.append(df_temp)

## Concatenate all the results into a single DataFrame
raw_fe_orders_data = pd.concat(fe_orders_data)

In [None]:
raw_fe_orders_data.head()

In [None]:
raw_fe_orders_data.to_csv('/Users/rapido/local-datasets/affluence/lucknow/raw_fe_orders_data_{}_{}_{}_{}.csv' \
                          .format(city, service,startdate, enddate)
                          , index = False)

In [None]:
## datasets.iallocator_customer_segments

iallocator_customer_segments = []

for date in date_range:
    date_value = date.strftime('%Y%m%d')
    cs_query = f""" 
                SELECT 
                    DATE_FORMAT(cast(run_date as date), '%Y%m%d') yyyymmdd,
                    customer_id,
                    CASE WHEN taxi_income_segment = 'HIGH_INCOME' THEN customer_id END AS taxi_high_income,
                    CASE WHEN taxi_income_segment = 'MEDIUM_INCOME' THEN customer_id END AS taxi_medium_income,
                    CASE WHEN taxi_income_segment = 'LOW_INCOME' THEN customer_id END AS taxi_low_income,

                    CASE WHEN customer_service_segments = 'LINK_ONLY' THEN customer_id END AS link_only_service,
                    CASE WHEN customer_service_segments = 'AUTO_ONLY' THEN customer_id END AS auto_only_service,
                    CASE WHEN customer_service_segments = 'BOTH' THEN customer_id END AS both_service,

                    CASE WHEN ps_tag_link = 'PS' THEN customer_id END AS link_ps,
                    CASE WHEN ps_tag_link = 'NPS' THEN customer_id END AS link_nps,

                    CASE WHEN ps_tag_auto = 'PS' THEN customer_id END AS auto_ps,
                    CASE WHEN ps_tag_auto = 'NPS' THEN customer_id END AS auto_nps

                    -- CASE WHEN fe_intent_trend_type = 'Stable' THEN customer_id END AS fe_intent_stable,
                    -- CASE WHEN fe_intent_trend_type = 'Increasing' THEN customer_id END AS fe_intent_increasing,
                    -- CASE WHEN fe_intent_trend_type = 'Declining' THEN customer_id END AS fe_intent_declining

                FROM 
                    datasets.iallocator_customer_segments
                WHERE
                    DATE_FORMAT(cast(run_date as date), '%Y%m%d') = '{date_value}'
                    AND taxi_recency_segment != 'INACTIVE'
                    AND (taxi_lifetime_last_ride_city = '{city}' 
                        OR 
                        link_lifetime_last_ride_city = '{city}' 
                        OR 
                        link_lifetime_last_ride_city = '{city}')
            """
    df_temp = pd.read_sql(cs_query, connection)
    iallocator_customer_segments.append(df_temp)

# Concatenate all the results into a single DataFrame
raw_iallocator_customer_segments = pd.concat(iallocator_customer_segments)

In [None]:
raw_iallocator_customer_segments.head()

In [None]:
raw_iallocator_customer_segments.to_csv('/Users/rapido/local-datasets/affluence/lucknow/raw_iallocator_customer_segments_{}_{}_{}_{}.csv' \
                                        .format(city, service,startdate, enddate)
                                        , index = False)

In [6]:
## Read back csv

raw_fe_orders_data = pd.read_csv('/Users/rapido/local-datasets/affluence/lucknow/raw_fe_orders_data_{}_{}_{}_{}.csv' \
                          .format(city, service,startdate, enddate))
raw_iallocator_customer_segments = pd.read_csv('/Users/rapido/local-datasets/affluence/lucknow/raw_iallocator_customer_segments_{}_{}_{}_{}.csv' \
                                        .format(city, service,startdate, enddate))

In [7]:
df_fe_orders_data = raw_fe_orders_data.copy(deep=True)
print(df_fe_orders_data.shape)

df_affinity_data = raw_iallocator_customer_segments.copy(deep=True)
print(df_affinity_data.shape)

(356177, 16)
(1271825, 12)


In [8]:
df_fe_orders_data.head(2)

Unnamed: 0,yyyymmdd,city_name,service_name,service_detail_id,pickup_location,pickup_hex_8,fe_customer_id,fe_count,requested_orders,net_orders,cobra,ocara,accepted_orders,cobrm,stockout,expiry_mapped
0,20231001,Lucknow,Link,5c1248c10241913903a42fd6,Golf City,883d8d5283fffff,5cecccb525ee3218d4e64c55,1,0,0,0,0,0,0,0,0
1,20231001,Lucknow,Bike Lite,649564c80b573a42c10a4a44,Preeti Nagar,883d8dcf53fffff,5d122807d6cbfc6a9db2c9ef,1,0,0,0,0,0,0,0,0


In [9]:
df_affinity_data.head(2)

Unnamed: 0,yyyymmdd,customer_id,taxi_high_income,taxi_medium_income,taxi_low_income,link_only_service,auto_only_service,both_service,link_ps,link_nps,auto_ps,auto_nps
0,20231001,63f234a6dfeed31003e4ce3b,,63f234a6dfeed31003e4ce3b,,,,63f234a6dfeed31003e4ce3b,,,,
1,20231001,63f37f54ba42560cb2c34c0f,,63f37f54ba42560cb2c34c0f,,63f37f54ba42560cb2c34c0f,,,63f37f54ba42560cb2c34c0f,,,


## Merge FE/Orders and Customer Segments 

In [10]:
df_raw_merge = pd.merge(df_fe_orders_data,
                        df_affinity_data,
                        how = 'left',
                        left_on = ['yyyymmdd', 'fe_customer_id'],
                        right_on = ['yyyymmdd', 'customer_id']
                       )
df_raw_merge.shape

(356177, 27)

In [11]:
df_combained = df_raw_merge\
                    .groupby(['city_name', 'service_name', 'service_detail_id', 'pickup_location', 'pickup_hex_8']) \
                    .agg(
                        fe_cus_count = pd.NamedAgg('fe_customer_id', 'nunique'),
                        fe_count = pd.NamedAgg('fe_count', 'sum'),
                        requested_orders = pd.NamedAgg('requested_orders', 'sum'),
                        net_orders = pd.NamedAgg('net_orders', 'sum'),
                        cobra = pd.NamedAgg('cobra', 'sum'),
                        ocara = pd.NamedAgg('ocara', 'sum'),
                        accepted_orders = pd.NamedAgg('accepted_orders', 'sum'),
                        cobrm = pd.NamedAgg('cobrm', 'sum'),
                        stockout = pd.NamedAgg('stockout', 'sum'),
                        expiry_mapped = pd.NamedAgg('expiry_mapped', 'sum'),
                        taxi_high_income = pd.NamedAgg('taxi_high_income', 'nunique'),
                        taxi_medium_income = pd.NamedAgg('taxi_medium_income', 'nunique'),
                        taxi_low_income = pd.NamedAgg('taxi_low_income', 'nunique'),
                        link_only_service = pd.NamedAgg('link_only_service', 'nunique'),
                        auto_only_service = pd.NamedAgg('auto_only_service', 'nunique'),
                        both_service = pd.NamedAgg('both_service', 'nunique'),
                        link_ps = pd.NamedAgg('link_ps', 'nunique'),
                        link_nps = pd.NamedAgg('link_nps', 'nunique'),
                        auto_ps = pd.NamedAgg('auto_ps', 'nunique'),
                        auto_nps = pd.NamedAgg('auto_nps', 'nunique')
                        ).reset_index()
df_combained.shape

(1642, 25)

In [12]:
df_combained

Unnamed: 0,city_name,service_name,service_detail_id,pickup_location,pickup_hex_8,fe_cus_count,fe_count,requested_orders,net_orders,cobra,ocara,accepted_orders,cobrm,stockout,expiry_mapped,taxi_high_income,taxi_medium_income,taxi_low_income,link_only_service,auto_only_service,both_service,link_ps,link_nps,auto_ps,auto_nps
0,Lucknow,Bike Lite,649564c80b573a42c10a4a44,1090 Area,883d8dcc21fffff,519,1265,244,176,18,47,223,0,2,0,167,157,42,199,9,187,60,102,14,47
1,Lucknow,Bike Lite,649564c80b573a42c10a4a44,1090 Area,883d8dcc25fffff,534,1309,205,133,12,60,194,0,0,0,179,175,39,197,6,212,77,102,20,54
2,Lucknow,Bike Lite,649564c80b573a42c10a4a44,1090 Area,883d8dcc27fffff,284,820,130,101,6,22,126,0,1,0,67,101,25,90,10,106,36,64,15,32
3,Lucknow,Bike Lite,649564c80b573a42c10a4a44,1090 Area,883d8dcc2dfffff,267,730,129,92,8,29,123,0,0,0,76,96,31,98,7,111,33,64,21,29
4,Lucknow,Bike Lite,649564c80b573a42c10a4a44,1090 Area,883d8dcd11fffff,28,71,9,7,2,0,7,0,0,0,3,14,2,11,1,8,5,4,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1637,Lucknow,Link,5c1248c10241913903a42fd6,Vrindavan,883d8d52e7fffff,193,463,88,57,4,26,85,0,0,0,48,65,11,81,3,51,17,24,3,12
1638,Lucknow,Link,5c1248c10241913903a42fd6,Yahiyaganj,883d8dcf21fffff,362,1083,248,135,19,93,233,0,1,0,105,122,25,171,8,98,36,68,8,22
1639,Lucknow,Link,5c1248c10241913903a42fd6,Yahiyaganj,883d8dcf23fffff,598,1737,335,234,16,82,320,3,0,0,171,195,44,267,10,162,60,102,21,47
1640,Lucknow,Link,5c1248c10241913903a42fd6,Yahiyaganj,883d8dcf27fffff,396,1206,278,166,26,75,256,1,9,0,115,145,33,170,10,134,48,71,16,40


In [13]:
df_combained.pickup_hex_8.nunique()

821

In [14]:
df_combained.groupby(['service_name']).agg({'pickup_hex_8':'nunique'})

Unnamed: 0_level_0,pickup_hex_8
service_name,Unnamed: 1_level_1
Bike Lite,821
Link,821


In [15]:
df_combained.groupby(['service_name']).agg({'pickup_hex_8':'nunique'})

Unnamed: 0_level_0,pickup_hex_8
service_name,Unnamed: 1_level_1
Bike Lite,821
Link,821


In [17]:
test = df_combained.groupby(['service_name', 'pickup_hex_8']).agg({'requested_orders':'sum'}).reset_index() \
.pivot(index ='pickup_hex_8' , columns ='service_name', values =['requested_orders']).reset_index()
test.to_clipboard(index=False)
test = pd.read_clipboard()
test.rename(columns = { 'requested_orders' : 'bike_lite_rr', 'requested_orders.1' : 'link_rr'}, inplace = True)
test = test.iloc[1:]
test['bike_lite_rr'] = test['bike_lite_rr'].astype(int)
test['link_rr'] = test['link_rr'].astype(int)
test['service_tag'] = np.where((test['bike_lite_rr'] == 0) & (test['link_rr'] == 0), 'outskirts',
                         np.where((test['bike_lite_rr'] > test['link_rr']), 'bike_lite',
                                  np.where((test['bike_lite_rr'] <= test['link_rr']), 'link', 'link'
                        )))
# test[test['ratio'].isin(['both'])]
test.groupby(['service_tag']).pickup_hex_8.nunique()
test = test[['pickup_hex_8', 'service_tag']]
test

Unnamed: 0,pickup_hex_8,service_tag
1,883d8c2693fffff,link
2,883d8c2697fffff,bike_lite
3,883d8c269bfffff,outskirts
4,883d8c26b3fffff,link
5,883d8c26bbfffff,link
6,883d8d1813fffff,bike_lite
7,883d8d1815fffff,outskirts
8,883d8d1823fffff,outskirts
9,883d8d1825fffff,link
10,883d8d1827fffff,link


In [18]:
df_combained.groupby(['service_name', 'pickup_hex_8']).agg({'fe_count':'sum'}).reset_index() \
.pivot(index ='pickup_hex_8' , columns ='service_name', values =['fe_count'])

Unnamed: 0_level_0,fe_count,fe_count
service_name,Bike Lite,Link
pickup_hex_8,Unnamed: 1_level_2,Unnamed: 2_level_2
883d8c2693fffff,15,16
883d8c2697fffff,29,28
883d8c269bfffff,3,3
883d8c26b3fffff,12,13
883d8c26bbfffff,28,28
883d8d1813fffff,3,3
883d8d1815fffff,1,1
883d8d1823fffff,3,3
883d8d1825fffff,35,37
883d8d1827fffff,43,43


## Analysing affluence_hi_tag - Less 


      - High Income median customer % 
      - Demand should be high at least more than Q3 level 
      

In [19]:
df_combained['high_income_%'] = round(df_combained['taxi_high_income']*100/df_combained['fe_cus_count'])
df_combained['high_income_thrshld'] = df_combained['high_income_%'].median()


df_combained['affluence_tag'] = np.where(
                                        df_combained['high_income_%'] <= df_combained['high_income_thrshld'] , 
                                        'Less', 
                                        'High')

df_combained.head(2)

Unnamed: 0,city_name,service_name,service_detail_id,pickup_location,pickup_hex_8,fe_cus_count,fe_count,requested_orders,net_orders,cobra,ocara,accepted_orders,cobrm,stockout,expiry_mapped,taxi_high_income,taxi_medium_income,taxi_low_income,link_only_service,auto_only_service,both_service,link_ps,link_nps,auto_ps,auto_nps,high_income_%,high_income_thrshld,affluence_tag
0,Lucknow,Bike Lite,649564c80b573a42c10a4a44,1090 Area,883d8dcc21fffff,519,1265,244,176,18,47,223,0,2,0,167,157,42,199,9,187,60,102,14,47,32.0,25.0,High
1,Lucknow,Bike Lite,649564c80b573a42c10a4a44,1090 Area,883d8dcc25fffff,534,1309,205,133,12,60,194,0,0,0,179,175,39,197,6,212,77,102,20,54,34.0,25.0,High


In [20]:
df_combained = pd.merge(df_combained, test,how = 'inner', on = 'pickup_hex_8' )
df_combained

Unnamed: 0,city_name,service_name,service_detail_id,pickup_location,pickup_hex_8,fe_cus_count,fe_count,requested_orders,net_orders,cobra,ocara,accepted_orders,cobrm,stockout,expiry_mapped,taxi_high_income,taxi_medium_income,taxi_low_income,link_only_service,auto_only_service,both_service,link_ps,link_nps,auto_ps,auto_nps,high_income_%,high_income_thrshld,affluence_tag,service_tag
0,Lucknow,Bike Lite,649564c80b573a42c10a4a44,1090 Area,883d8dcc21fffff,519,1265,244,176,18,47,223,0,2,0,167,157,42,199,9,187,60,102,14,47,32.0,25.0,High,bike_lite
1,Lucknow,Link,5c1248c10241913903a42fd6,1090 Area,883d8dcc21fffff,525,1262,192,143,8,39,185,0,1,0,175,155,46,203,9,193,58,107,14,46,33.0,25.0,High,bike_lite
2,Lucknow,Bike Lite,649564c80b573a42c10a4a44,1090 Area,883d8dcc25fffff,534,1309,205,133,12,60,194,0,0,0,179,175,39,197,6,212,77,102,20,54,34.0,25.0,High,link
3,Lucknow,Link,5c1248c10241913903a42fd6,1090 Area,883d8dcc25fffff,536,1328,229,155,15,55,211,1,3,0,183,174,37,200,6,212,77,99,21,53,34.0,25.0,High,link
4,Lucknow,Bike Lite,649564c80b573a42c10a4a44,1090 Area,883d8dcc27fffff,284,820,130,101,6,22,126,0,1,0,67,101,25,90,10,106,36,64,15,32,24.0,25.0,Less,link
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1637,Lucknow,Link,5c1248c10241913903a42fd6,Yahiyaganj,883d8dcf23fffff,598,1737,335,234,16,82,320,3,0,0,171,195,44,267,10,162,60,102,21,47,29.0,25.0,High,link
1638,Lucknow,Bike Lite,649564c80b573a42c10a4a44,Yahiyaganj,883d8dcf27fffff,379,1160,137,79,22,35,116,0,1,0,109,136,30,156,10,128,44,71,16,40,29.0,25.0,High,link
1639,Lucknow,Link,5c1248c10241913903a42fd6,Yahiyaganj,883d8dcf27fffff,396,1206,278,166,26,75,256,1,9,0,115,145,33,170,10,134,48,71,16,40,29.0,25.0,High,link
1640,Lucknow,Bike Lite,649564c80b573a42c10a4a44,Yahiyaganj,883d8dcf3dfffff,401,918,94,61,11,22,86,0,0,0,89,149,21,154,1,117,47,53,14,30,22.0,25.0,Less,link


In [21]:
df_combained.fe_count.quantile([ 0.25, 0.50, 0.75, 0.80, 0.85, 0.90, 0.95])

0.25      16.00
0.50     110.00
0.75     574.50
0.80     770.80
0.85    1023.55
0.90    1340.00
0.95    1929.50
Name: fe_count, dtype: float64

In [22]:
df_combained['demand_bucket'] = \
            np.where(df_combained['fe_count'] <= df_combained.fe_count.quantile(0.5), 'Lowest',
            np.where(df_combained['fe_count'] <= df_combained.fe_count.quantile(0.75), 'Low',
            np.where(df_combained['fe_count'] <= df_combained.fe_count.quantile(0.85), 'High','Highest')))

In [23]:
df_combained.groupby(['affluence_tag']).pickup_hex_8.nunique()

affluence_tag
High    423
Less    437
Name: pickup_hex_8, dtype: int64

In [24]:
df_combained.groupby(['affluence_tag', 'demand_bucket']).pickup_hex_8.nunique()

affluence_tag  demand_bucket
High           High              64
               Highest          107
               Low              121
               Lowest           133
Less           High              27
               Highest           21
               Low              103
               Lowest           294
Name: pickup_hex_8, dtype: int64

In [25]:
df1_analysis =  df_combained \
                    .groupby(['affluence_tag', 'service_name']) \
                    .agg(
                        hex_8_count = pd.NamedAgg('pickup_hex_8', 'nunique'),                    
                        fe_cus_count = pd.NamedAgg('fe_cus_count', 'sum'),
                        fe_count = pd.NamedAgg('fe_count', 'sum'),
                        requested_orders = pd.NamedAgg('requested_orders', 'sum'),
                        net_orders = pd.NamedAgg('net_orders', 'sum')
#                         link_ps = pd.NamedAgg('link_ps', 'sum')
#                         link_nps = pd.NamedAgg('link_nps', 'sum')
                        ).reset_index()
df1_analysis.sort_values(['service_name'])

Unnamed: 0,affluence_tag,service_name,hex_8_count,fe_cus_count,fe_count,requested_orders,net_orders
0,High,Bike Lite,389,108337,279581,40430,26201
2,Less,Bike Lite,432,38181,96520,10448,5796
1,High,Link,418,114508,295908,50997,35571
3,Less,Link,403,34969,88226,13702,8220


In [26]:
df2_analysis = df1_analysis.groupby(['affluence_tag']).agg({'fe_cus_count':'sum'}).reset_index()
df2_analysis

Unnamed: 0,affluence_tag,fe_cus_count
0,High,222845
1,Less,73150


In [27]:
df3_analysis = pd.merge(df1_analysis, df2_analysis, how = 'left', on = ['affluence_tag'])

df3_analysis['city_hex'] = df_combained.pickup_hex_8.nunique()

df3_analysis['fe_cust_distr'] = df3_analysis['fe_cus_count_x']*100/df3_analysis['fe_cus_count_y']
df3_analysis['hex_distr'] = df3_analysis['hex_8_count']*100/df_combained.pickup_hex_8.nunique()
df3_analysis['fe-rr'] = df3_analysis['requested_orders']*100.0/df3_analysis['fe_count']
df3_analysis['g2n'] = df3_analysis['net_orders']*100.0/df3_analysis['requested_orders']
df3_analysis['fe-net'] = df3_analysis['net_orders']*100.0/df3_analysis['fe_count']

df3_analysis['fe_count/hex'] = df3_analysis['fe_count']*100.0/df3_analysis['hex_8_count']



df3_analysis[['affluence_tag', 'service_name', 'city_hex',
              'hex_8_count', 'hex_distr', 'fe_cus_count_x', #'fe_cus_count_y', 
              'fe_cust_distr',
              'fe_count', 'requested_orders', 'net_orders', 
              'fe-rr', 'g2n', 'fe-net', #'fe_count/hex'
             ]].round(1)

Unnamed: 0,affluence_tag,service_name,city_hex,hex_8_count,hex_distr,fe_cus_count_x,fe_cust_distr,fe_count,requested_orders,net_orders,fe-rr,g2n,fe-net
0,High,Bike Lite,821,389,47.4,108337,48.6,279581,40430,26201,14.5,64.8,9.4
1,High,Link,821,418,50.9,114508,51.4,295908,50997,35571,17.2,69.8,12.0
2,Less,Bike Lite,821,432,52.6,38181,52.2,96520,10448,5796,10.8,55.5,6.0
3,Less,Link,821,403,49.1,34969,47.8,88226,13702,8220,15.5,60.0,9.3


In [28]:
df_combained.groupby(['affluence_tag']).pickup_hex_8.nunique()

affluence_tag
High    423
Less    437
Name: pickup_hex_8, dtype: int64

Insights
    
   1. Proportion of Bike Lite FE customers are higher in Less affluence area.<br>
      (Proportion of Link FE customers are higher in High affluence area.)
   2. Proportion of Bike Lite hex's are higher in Less affluence area.<br>
      (Proportion of Link hex's are higher in High affluence area.)