In [1]:
import os
import h3 as h3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy import stats
from pyhive import presto
from keplergl import KeplerGl
from datetime import datetime, timedelta

import warnings
warnings.filterwarnings('ignore')

In [2]:
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 300)

In [3]:
## Connection
connection = presto.connect(
        host='presto-gateway.serving.data.production.internal',
        port=80,
        protocol='http',
        catalog='hive',
        username='manoj.ravirajan@rapido.bike'
)

## Parameter 

In [4]:
city = 'Bangalore'
service = 'Link'

In [5]:
## datasets.service_mapping

service_mapping = f"""
        SELECT 
            city_display_name AS city,
            service_level AS service_name,
            service_detail_id,
            city_id,
            service_id
        FROM 
            datasets.service_mapping
        WHERE 
            city_display_name = '{city}'
            AND service_level = '{service}'
"""

df_service_mapping = pd.read_sql(service_mapping, connection)
df_service_mapping.head()

Unnamed: 0,city,service_name,service_detail_id,city_id,service_id
0,Bangalore,Link,57370b61a6855d70057417d1,572ca7ff116b5db3057bd814,572e29b0116b5db3057bd821


In [6]:
service_detail_id = df_service_mapping.service_detail_id.loc[0]
service_detail_id

'57370b61a6855d70057417d1'

## Dataset

In [7]:
pre_start_date = '20230724'
pre_end_date = '20230813'
post_start_date = '20230824'
post_end_date = '20230910'

In [8]:
## Experimental Hex's

experimental_hex = pd.read_csv('/Users/rapido/local-datasets/affluence/experiment/exp_affluence_bangalore_link_circuit_break_hex_8_list_v1.csv')
experimental_hex_list = experimental_hex[['pickup_cluster', 'pickup_hex_8','income_signal', 'affluence_tag','group_tc']]

In [9]:
experimental_hex_list.groupby(['affluence_tag', 'group_tc']).pickup_hex_8.count()

affluence_tag   group_tc
High Affluence  control     12
                test        13
Low Affluence   control     22
                test        23
Name: pickup_hex_8, dtype: int64

In [10]:
exp_hex_list = experimental_hex_list['pickup_hex_8'].values.tolist()
len(exp_hex_list)

70

AND fe_ench.pickup_location_hex_8 IN ('88618921d3fffff','88618921c7fffff','8861892665fffff','8860145a33fffff','8861892c97fffff','886189219bfffff','88618920b3fffff','8861892e37fffff','8861892ed9fffff','8861892c1dfffff','8861892c11fffff','8861892ea5fffff','8861892c55fffff','8861892639fffff','886189246dfffff','8861892cbdfffff','8861892ee5fffff','88618925c9fffff','88618925bbfffff','8860145a61fffff','8861892431fffff','88618926adfffff','8860145b43fffff','8861892ed3fffff','88618925a5fffff','886014594bfffff','8860145863fffff','8861892ca3fffff','8860145829fffff','8860145a05fffff','8860169669fffff','8860145943fffff','8861892dd5fffff','886014584dfffff','886189244dfffff','88618925a9fffff','8860145913fffff','8861892c0bfffff','8861892cc1fffff','8860145ad1fffff','88618925e3fffff','886016966bfffff','88618920e9fffff','8861892645fffff','8861892521fffff','8861892ccbfffff','8861892e63fffff','8861892f13fffff','8860145ad9fffff','88618921e3fffff','8860145b51fffff','8861892e2bfffff','8861892c8bfffff','8860145b67fffff','8861892e2dfffff','8861892db5fffff','8860145a21fffff','8861892db7fffff','8860145a29fffff','8861892c95fffff','8861892eb1fffff','8861892eb7fffff','8861892eb5fffff','8860145b53fffff','8861892e93fffff','8860145b55fffff','886014595dfffff','8861892e85fffff','8860145b59fffff','8860169663fffff')


In [11]:
## pricing.fare_estimates_enriched

raw_dataset = f"""

        WITH city_cluster_hex AS (

                SELECT
                    cch.hex_id AS hex_id,
                    cch.cluster AS cluster
                FROM
                    datasets.city_cluster_hex cch
                WHERE
                    cch.resolution = 8
                    AND cch.cluster != ''
            ),

            fare_estimates AS (

                SELECT
                    fe_ench.yyyymmdd AS yyyymmdd,
                    CAST(DAY_OF_WEEK(DATE_PARSE(fe_ench.yyyymmdd, '%Y%m%d')) AS VARCHAR ) || '. ' || DATE_FORMAT(DATE_PARSE(fe_ench.yyyymmdd, '%Y%m%d'), '%W') weekday,
                    -- DATE_FORMAT(DATE_PARSE(fe_ench.yyyymmdd, '%Y%m%d'), '%W') weekday,
                    fe_ench.quarter_hour AS quarter_hour,
                    CASE 
                    WHEN CAST(SUBSTR(fe_ench.quarter_hour, 1,2) AS INT) >= 8 AND CAST(SUBSTR(fe_ench.quarter_hour, 1,2) AS INT) <= 11 THEN '1.Morning Peak'
                    WHEN CAST(SUBSTR(fe_ench.quarter_hour, 1,2) AS INT) >= 17 AND CAST(SUBSTR(fe_ench.quarter_hour, 1,2) AS INT) <= 21 THEN '3.Evening Peak'
                    WHEN CAST(SUBSTR(fe_ench.quarter_hour, 1,2) AS INT) > 11 AND CAST(SUBSTR(fe_ench.quarter_hour, 1,2) AS INT) < 17 THEN '2.Afternoon'
                    ELSE '4.Rest' END AS time_period,
                    CAST(SUBSTR(fe_ench.quarter_hour, 1,2) AS INT) AS hour,
                    fe_ench.city AS city_name,
                    fe_ench.service_name AS service_name,
                    fe_ench.service_detail_id AS service_detail_id,
                    fe_ench.pickup_location_hex_8 AS pickup_location_hex_8,
                    fe_ench.drop_location_hex_8 AS drop_location_hex_8,
                    COALESCE(fe_ench.price_increase_strategy,'surge_not_applied') AS surge_strategy,
                    COALESCE(fe_ench.is_gradient_applied,false) AS gradient_applied,
                    fe_ench.fare_estimate_id AS fare_estimate_id,
                    fe_ench.user_id AS customer_id,
                    -- fe_ench.sub_total,
                    fe_ench.final_amount,
                    fe_ench.dynamic_surge AS dynamic_surge,
                    fe_ench.dynamic_fare AS dynamic_fare,
                    fe_ench.ride_distance AS ride_distance

                FROM
                    pricing.fare_estimates_enriched fe_ench
                WHERE
                    -- fe_ench.yyyymmdd BETWEEN '{pre_start_date}' AND '{pre_end_date}'
                    fe_ench.yyyymmdd BETWEEN '{post_start_date}' AND '{post_end_date}'
                    AND fe_ench.service_detail_id = '{service_detail_id}'
                    AND fe_ench.pickup_location_hex_8 IN ('88618921d3fffff','88618921c7fffff','8861892665fffff','8860145a33fffff','8861892c97fffff','886189219bfffff','88618920b3fffff','8861892e37fffff','8861892ed9fffff','8861892c1dfffff','8861892c11fffff','8861892ea5fffff','8861892c55fffff','8861892639fffff','886189246dfffff','8861892cbdfffff','8861892ee5fffff','88618925c9fffff','88618925bbfffff','8860145a61fffff','8861892431fffff','88618926adfffff','8860145b43fffff','8861892ed3fffff','88618925a5fffff','886014594bfffff','8860145863fffff','8861892ca3fffff','8860145829fffff','8860145a05fffff','8860169669fffff','8860145943fffff','8861892dd5fffff','886014584dfffff','886189244dfffff','88618925a9fffff','8860145913fffff','8861892c0bfffff','8861892cc1fffff','8860145ad1fffff','88618925e3fffff','886016966bfffff','88618920e9fffff','8861892645fffff','8861892521fffff','8861892ccbfffff','8861892e63fffff','8861892f13fffff','8860145ad9fffff','88618921e3fffff','8860145b51fffff','8861892e2bfffff','8861892c8bfffff','8860145b67fffff','8861892e2dfffff','8861892db5fffff','8860145a21fffff','8861892db7fffff','8860145a29fffff','8861892c95fffff','8861892eb1fffff','8861892eb7fffff','8861892eb5fffff','8860145b53fffff','8861892e93fffff','8860145b55fffff','886014595dfffff','8861892e85fffff','8860145b59fffff','8860169663fffff')
            ),

            rate_card AS (

                SELECT 
                    city,
                    yyyymmdd,
                    fare_estimate_id,
                    ROUND(CAST(rate_card_amount AS DOUBLE),2) AS rate_card_amount
                FROM 
                    experiments.iprice_cleaned_responses_v2 
                WHERE 
                    -- yyyymmdd BETWEEN '{pre_start_date}' AND '{pre_end_date}'
                    yyyymmdd BETWEEN '{post_start_date}' AND '{post_end_date}'
                    AND service_detail_id = '{service_detail_id}'
            ),

            orders AS (

                SELECT
                    ols.yyyymmdd AS yyyymmdd,
                    ols.service_detail_id AS service_detail_id,
                    ols.customer_id AS customer_id,
                    ols.estimate_id AS fare_estimate_id,
                    ols.order_status AS order_status,
                    ols.order_id AS order_id,
                    ols.spd_fraud_flag AS spd_fraud_flag,
                    ols.discount AS discount, 
                    ols.sub_total AS sub_total,
                    ols.rate_card_amount AS rate_card_amount,
                    ols.accept_to_pickup_distance  AS accept_to_pickup_distance,
                    ROW_NUMBER() OVER (PARTITION BY ols.order_id ORDER BY ols.updated_epoch DESC) AS row_number,
                    CASE 
                    WHEN ols.order_status IN ('dropped') AND ols.spd_fraud_flag != True THEN 'net_orders'
                    WHEN ols.order_status IN ('customerCancelled') AND ols.cancel_reason IN ('order cancelled before rider accepted') THEN 'cobra'
                    WHEN ols.order_status IN ('customerCancelled') AND ols.cancel_reason IN ('Order cancelled before rider was mapped') THEN 'cobrm'
                    WHEN ols.order_status IN ('customerCancelled') AND ols.cancel_reason NOT IN ('order cancelled before rider accepted', 'Order cancelled before rider was mapped') THEN 'ocara'
                    WHEN ols.order_status IN ('expired') AND length(ols.map_riders) < 28 THEN 'stockout'
                    WHEN ols.order_status IN ('expired') AND length(ols.map_riders) >= 28 THEN 'expiry_mapped' 
                    ELSE 'Other' 
                    END AS order_state

                FROM
                    orders.order_logs_snapshot ols
                WHERE
                    -- yyyymmdd BETWEEN '{pre_start_date}' AND '{pre_end_date}'
                    yyyymmdd BETWEEN '{post_start_date}' AND '{post_end_date}'
                    AND service_detail_id = '{service_detail_id}'
            )
        
        SELECT 
            fe.yyyymmdd AS yyyymmdd,
            fe.weekday AS weekday,
            fe.quarter_hour AS quarter_hour,
            fe.hour AS hour,
            fe.time_period,
            CASE 
            WHEN hour IN (8,9) THEN '0800-0959'
            WHEN hour IN (10,11) THEN '1000-1159'
            WHEN hour IN (17,18) THEN '1700-1859'
            WHEN hour IN (19,20,21) THEN '1900-2159'
            WHEN hour IN (12,13) THEN '1200-1359'
            WHEN hour IN (14,15,16) THEN '1400-1659'
            ELSE 'Rest'
            END AS hour_bucket,
            fe.city_name AS city_name,
            fe.service_name AS service_name,
            fe.service_detail_id AS service_detail_id,
            fe.customer_id AS fe_customer_id,
            pic.cluster AS pickup_location,
            pic.hex_id AS pickup_hex_id,
            
            CASE 
            WHEN pic.hex_id IN ('88618921d3fffff', '88618921c7fffff', '8861892665fffff', '8860145a33fffff', '8861892c97fffff', '886189219bfffff', '88618920b3fffff', '8861892e37fffff', '8861892ed9fffff',
                                '8861892c1dfffff', '8861892c11fffff', '8861892ea5fffff', '8861892c55fffff', '8861892639fffff', '886189246dfffff', '8861892cbdfffff', '8861892ee5fffff', '88618925c9fffff',
                                '88618925bbfffff', '8860145a61fffff', '8861892431fffff', '88618926adfffff', '8860145b43fffff', '8861892ed3fffff', '88618925a5fffff')
            THEN 'High Affluence'
            WHEN pic.hex_id IN ('886014594bfffff', '8860145863fffff', '8861892ca3fffff', '8860145829fffff', '8860145a05fffff', '8860169669fffff', '8860145943fffff', '8861892dd5fffff', '886014584dfffff',
                                '886189244dfffff', '88618925a9fffff', '8860145913fffff', '8861892c0bfffff', '8861892cc1fffff', '8860145ad1fffff', '88618925e3fffff', '886016966bfffff', '88618920e9fffff',
                                '8861892645fffff', '8861892521fffff', '8861892ccbfffff', '8861892e63fffff', '8861892f13fffff', '8860145ad9fffff', '88618921e3fffff', '8860145b51fffff', '8861892e2bfffff', 
                                '8861892c8bfffff', '8860145b67fffff', '8861892e2dfffff', '8861892db5fffff', '8860145a21fffff', '8861892db7fffff', '8860145a29fffff', '8861892c95fffff', '8861892eb1fffff', 
                                '8861892eb7fffff', '8861892eb5fffff', '8860145b53fffff', '8861892e93fffff', '8860145b55fffff', '886014595dfffff', '8861892e85fffff', '8860145b59fffff', '8860169663fffff')
            THEN 'Low Affluence'
            ELSE 'OTHER' END affluence,
            
            CASE 
            WHEN pic.hex_id IN ('88618921d3fffff', '8861892665fffff', '8861892c97fffff', '88618920b3fffff', '8861892ed9fffff', '8861892c11fffff', '8861892c55fffff', '886189246dfffff', '8861892ee5fffff',
                                '88618925bbfffff', '8861892431fffff', '8860145b43fffff', '88618925a5fffff', '886014594bfffff', '8861892ca3fffff', '8860145a05fffff', '8860145943fffff', '886014584dfffff',
                                '88618925a9fffff', '8861892c0bfffff', '8860145ad1fffff', '886016966bfffff', '8861892645fffff', '8861892ccbfffff', '8861892f13fffff', '88618921e3fffff', '8861892e2bfffff',
                                '8860145b67fffff', '8861892db5fffff', '8861892db7fffff', '8861892c95fffff', '8861892eb7fffff', '8860145b53fffff', '8860145b55fffff', '8861892e85fffff', '8860169663fffff')
            THEN 'TEST'
            WHEN pic.hex_id IN ('88618921c7fffff', '8860145a33fffff', '886189219bfffff', '8861892e37fffff', '8861892c1dfffff', '8861892ea5fffff', '8861892639fffff', '8861892cbdfffff', '88618925c9fffff',
                                '8860145a61fffff', '88618926adfffff', '8861892ed3fffff', '8860145863fffff', '8860145829fffff', '8860169669fffff', '8861892dd5fffff', '886189244dfffff', '8860145913fffff',
                                '8861892cc1fffff', '88618925e3fffff', '88618920e9fffff', '8861892521fffff', '8861892e63fffff', '8860145ad9fffff', '8860145b51fffff', '8861892c8bfffff', '8861892e2dfffff',
                                '8860145a21fffff', '8860145a29fffff', '8861892eb1fffff', '8861892eb5fffff', '8861892e93fffff', '886014595dfffff', '8860145b59fffff')
            THEN 'CONTROL'
            ELSE 'OTHER' END group_tc,
            
            fe.surge_strategy AS surge_strategy,
            fe.fare_estimate_id AS fare_estimate_id,
            -- fe.sub_total,
            fe.final_amount,
            
            CASE WHEN surge_strategy NOT IN ('surge_not_applied', 'circuit_broken') THEN fe.dynamic_surge END dynamic_surge,
            CASE WHEN surge_strategy NOT IN ('surge_not_applied', 'circuit_broken') THEN fe.dynamic_fare END dynamic_fare,
            CASE WHEN surge_strategy NOT IN ('surge_not_applied', 'circuit_broken') THEN COALESCE(rc.rate_card_amount, ord.rate_card_amount) END rate_card_amount,
            
            fe.ride_distance AS ride_distance,
            ord.customer_id AS gross_customer_id,
            ord.order_id,
            ord.order_status,
            ord.spd_fraud_flag,
            ord.discount,
            ord.sub_total,
            ord.order_state,
            ord.row_number,
            ord.accept_to_pickup_distance,
            
            case 
            when fe.ride_distance <= 2 then '0-2'
            when fe.ride_distance > 2 and fe.ride_distance <= 5 then '2-5'
            when fe.ride_distance > 5 and fe.ride_distance <= 10 then '5-10'
            when fe.ride_distance > 10 then 'Rest'
            else 'check'
            end as distance_tag
            
            
        FROM
            fare_estimates fe
            
        LEFT JOIN
            city_cluster_hex pic
            ON fe.pickup_location_hex_8 = pic.hex_id
            
        LEFT JOIN
            rate_card rc
            ON fe.yyyymmdd = rc.yyyymmdd
            AND fe.city_name = rc.city
            AND fe.fare_estimate_id = rc.fare_estimate_id
        
        LEFT JOIN
            orders ord
            ON fe.yyyymmdd = ord.yyyymmdd
            AND fe.service_detail_id = ord.service_detail_id
            AND fe.fare_estimate_id = ord.fare_estimate_id
"""

In [12]:
# df_raw_dataset = pd.read_sql(raw_dataset, connection)
# df_raw_dataset.head(3)

In [13]:
# df_raw_dataset.to_csv('/Users/rapido/local-datasets/affluence/pre-post-analysis/pre_raw_data_{}_to_{}.csv' \
#                               .format(pre_start_date, pre_end_date)
#                               , index = False)

'''
df_raw_dataset.to_csv('/Users/rapido/local-datasets/affluence/pre-post-analysis/post_raw_data_{}_to_{}.csv' \
                                .format(post_start_date, post_end_date)
                               , index = False)
# '''

"\ndf_raw_dataset.to_csv('/Users/rapido/local-datasets/affluence/pre-post-analysis/post_raw_data_{}_to_{}.csv'                                 .format(post_start_date, post_end_date)\n                               , index = False)\n# "

In [14]:
df_pre_raw_dataset = pd.read_csv('/Users/rapido/local-datasets/affluence/pre-post-analysis/pre_raw_data_{}_to_{}.csv' \
                               .format(pre_start_date, pre_end_date))
df_post_raw_dataset = pd.read_csv('/Users/rapido/local-datasets/affluence/pre-post-analysis/post_raw_data_{}_to_{}.csv' \
                               .format(post_start_date, post_end_date))

In [15]:
df_pre_raw = df_pre_raw_dataset.copy(deep=True)
print(df_pre_raw.shape)
df_post_raw = df_post_raw_dataset.copy(deep=True)
print(df_post_raw.shape)

(1986622, 31)
(1786196, 31)


In [23]:
print('--------------PRE----------------')
print(df_pre_raw.yyyymmdd.nunique())
print('---------------------------------')
print(df_pre_raw.groupby(['yyyymmdd','weekday']).fare_estimate_id.nunique())

print('--------------POST----------------')
print(df_post_raw.yyyymmdd.nunique())
print('---------------------------------')
print(df_post_raw.groupby(['yyyymmdd','weekday']).fare_estimate_id.nunique())

--------------PRE----------------
21
---------------------------------
yyyymmdd  weekday     
20230724  1. Monday       104854
20230725  2. Tuesday      100889
20230726  3. Wednesday     92766
20230727  4. Thursday      78652
20230728  5. Friday        83816
20230729  6. Saturday      90119
20230730  7. Sunday        86521
20230731  1. Monday        96735
20230801  2. Tuesday       89888
20230802  3. Wednesday     85797
20230803  4. Thursday      81642
20230804  5. Friday        84636
20230805  6. Saturday     111360
20230806  7. Sunday       110586
20230807  1. Monday       104166
20230808  2. Tuesday       93159
20230809  3. Wednesday     92333
20230810  4. Thursday      96785
20230811  5. Friday        99987
20230812  6. Saturday     103514
20230813  7. Sunday        94173
Name: fare_estimate_id, dtype: int64
--------------POST----------------
18
---------------------------------
yyyymmdd  weekday     
20230824  4. Thursday      91159
20230825  5. Friday        82374
20230826  6. Sa

## DE

In [24]:
df_pre_raw.head(2)

Unnamed: 0,yyyymmdd,weekday,quarter_hour,hour,time_period,hour_bucket,city_name,service_name,service_detail_id,fe_customer_id,pickup_location,pickup_hex_id,affluence,group_tc,surge_strategy,fare_estimate_id,final_amount,dynamic_surge,dynamic_fare,rate_card_amount,ride_distance,gross_customer_id,order_id,order_status,spd_fraud_flag,discount,sub_total,order_state,row_number,accept_to_pickup_distance,distance_tag
0,20230725,2. Tuesday,2045,20,3.Evening Peak,1900-2159,Bangalore,Link,57370b61a6855d70057417d1,6222d0e7c1a4e430d4643577,Venkatapura,88618925c9fffff,High Affluence,CONTROL,surge_not_applied,64bfe8a5c41fe9d38ab98d5c,67.0,,,,5.076,,,,,,,,,,5-10
1,20230729,6. Saturday,1930,19,3.Evening Peak,1900-2159,Bangalore,Link,57370b61a6855d70057417d1,5c8a460c8c352421eae9c5dc,Domlur,8861892ed9fffff,High Affluence,TEST,surge_not_applied,64c51d53376ccd6820082d42,68.0,,,,5.29,,,,,,,,,,5-10


In [25]:
df_post_raw.head(2)

Unnamed: 0,yyyymmdd,weekday,quarter_hour,hour,time_period,hour_bucket,city_name,service_name,service_detail_id,fe_customer_id,pickup_location,pickup_hex_id,affluence,group_tc,surge_strategy,fare_estimate_id,final_amount,dynamic_surge,dynamic_fare,rate_card_amount,ride_distance,gross_customer_id,order_id,order_status,spd_fraud_flag,discount,sub_total,order_state,row_number,accept_to_pickup_distance,distance_tag
0,20230830,3. Wednesday,815,8,1.Morning Peak,0800-0959,Bangalore,Link,57370b61a6855d70057417d1,62a74e0a7d7967549ee7a7e6,Electronic City,8861892639fffff,High Affluence,CONTROL,surge_not_applied,64eeb08f1bd630639fa939ae,41.0,,,,0.09,,,,,,,,,,0-2
1,20230829,2. Tuesday,800,8,1.Morning Peak,0800-0959,Bangalore,Link,57370b61a6855d70057417d1,64eb0efd5a6ec79d09c3504c,Majestic,8860145b55fffff,Low Affluence,TEST,circuit_broken,64ed5ba81ed3d92ced7ef496,117.0,,,,10.836,,,,,,,,,,Rest


In [27]:
## Feature engineering 

def feature_engineering():

    def get_dropped_order(row):
        return 1 if row['order_status'] == 'dropped' and not row['spd_fraud_flag'] else None

    def get_surged_fe(row):
        return row['fare_estimate_id'] if row['surge_strategy'] != 'surge_not_applied' and row['surge_strategy'] != 'circuit_broken' else None

    def get_surged_fe_rate_card(row):
        return row['rate_card_amount'] if row['surge_strategy'] != 'surge_not_applied' and row['surge_strategy'] != 'circuit_broken' else None

    def get_surged_fe_dynamic_surge(row):
        return row['dynamic_surge'] if row['surge_strategy'] != 'surge_not_applied' and row['surge_strategy'] != 'circuit_broken' else None

    def get_surged_fe_dynamic_fare(row):
        return row['dynamic_fare'] if row['surge_strategy'] != 'surge_not_applied' and row['surge_strategy'] != 'circuit_broken' else None

    def get_surged_net(row):
        return row['fare_estimate_id'] if row['surge_strategy'] != 'surge_not_applied' and row['surge_strategy'] != 'circuit_broken' and row['order_status'] == 'dropped' and not row['spd_fraud_flag'] else None

    def get_cobra(row):
        return row['order_id'] if row['order_state'] == 'cobra' and row['row_number'] == 1 else None

    def get_ocara(row):
        return row['order_id'] if row['order_state'] == 'ocara' and row['row_number'] == 1 else None

    def get_cobrm(row):
        return row['order_id'] if row['order_state'] == 'cobrm' and row['row_number'] == 1 else None

    def get_stockout(row):
        return row['order_id'] if row['order_state'] == 'stockout' and row['row_number'] == 1 else None

    def get_expiry_mapped(row):
        return row['order_id'] if row['order_state'] == 'expiry_mapped' and row['row_number'] == 1 else None

    def get_accepted_orders(row):
        return row['order_id'] if row['accept_to_pickup_distance'] > 0 else None

    ## Pre
    df_pre_raw['net_orders'] = df_pre_raw.apply(get_dropped_order, axis=1)
    df_pre_raw['surged_fe'] = df_pre_raw.apply(get_surged_fe, axis=1)
    df_pre_raw['surged_net'] = df_pre_raw.apply(get_surged_net, axis=1)
    df_pre_raw['cobra'] = df_pre_raw.apply(get_cobra, axis=1)
    df_pre_raw['ocara'] = df_pre_raw.apply(get_ocara, axis=1)
    df_pre_raw['cobrm'] = df_pre_raw.apply(get_cobrm, axis=1)
    df_pre_raw['stockout'] = df_pre_raw.apply(get_stockout, axis=1)
    df_pre_raw['expiry_mapped'] = df_pre_raw.apply(get_expiry_mapped, axis=1)
    df_pre_raw['accepted_orders'] = df_pre_raw.apply(get_accepted_orders, axis=1)
    df_pre_raw['surge_rate_card_amount'] = df_pre_raw.apply(get_surged_fe_rate_card, axis=1)
    df_pre_raw['surge_dynamic_surge'] = df_pre_raw.apply(get_surged_fe_dynamic_surge, axis=1)
    df_pre_raw['surge_dynamic_fare'] = df_pre_raw.apply(get_surged_fe_dynamic_fare, axis=1)
    
    ## Post
    df_post_raw['net_orders'] = df_post_raw.apply(get_dropped_order, axis=1)
    df_post_raw['surged_fe'] = df_post_raw.apply(get_surged_fe, axis=1)
    df_post_raw['surged_net'] = df_post_raw.apply(get_surged_net, axis=1)
    df_post_raw['cobra'] = df_post_raw.apply(get_cobra, axis=1)
    df_post_raw['ocara'] = df_post_raw.apply(get_ocara, axis=1)
    df_post_raw['cobrm'] = df_post_raw.apply(get_cobrm, axis=1)
    df_post_raw['stockout'] = df_post_raw.apply(get_stockout, axis=1)
    df_post_raw['expiry_mapped'] = df_post_raw.apply(get_expiry_mapped, axis=1)
    df_post_raw['accepted_orders'] = df_post_raw.apply(get_accepted_orders, axis=1)
    df_post_raw['surge_rate_card_amount'] = df_post_raw.apply(get_surged_fe_rate_card, axis=1)
    df_post_raw['surge_dynamic_surge'] = df_post_raw.apply(get_surged_fe_dynamic_surge, axis=1)
    df_post_raw['surge_dynamic_fare'] = df_post_raw.apply(get_surged_fe_dynamic_fare, axis=1)
    
feature_engineering()

In [28]:
df_pre_raw.head(2)

Unnamed: 0,yyyymmdd,weekday,quarter_hour,hour,time_period,hour_bucket,city_name,service_name,service_detail_id,fe_customer_id,pickup_location,pickup_hex_id,affluence,group_tc,surge_strategy,fare_estimate_id,final_amount,dynamic_surge,dynamic_fare,rate_card_amount,ride_distance,gross_customer_id,order_id,order_status,spd_fraud_flag,discount,sub_total,order_state,row_number,accept_to_pickup_distance,distance_tag,net_orders,surged_fe,surged_net,cobra,ocara,cobrm,stockout,expiry_mapped,accepted_orders,surge_rate_card_amount,surge_dynamic_surge,surge_dynamic_fare
0,20230725,2. Tuesday,2045,20,3.Evening Peak,1900-2159,Bangalore,Link,57370b61a6855d70057417d1,6222d0e7c1a4e430d4643577,Venkatapura,88618925c9fffff,High Affluence,CONTROL,surge_not_applied,64bfe8a5c41fe9d38ab98d5c,67.0,,,,5.076,,,,,,,,,,5-10,,,,,,,,,,,,
1,20230729,6. Saturday,1930,19,3.Evening Peak,1900-2159,Bangalore,Link,57370b61a6855d70057417d1,5c8a460c8c352421eae9c5dc,Domlur,8861892ed9fffff,High Affluence,TEST,surge_not_applied,64c51d53376ccd6820082d42,68.0,,,,5.29,,,,,,,,,,5-10,,,,,,,,,,,,


In [29]:
df_post_raw.head(2)

Unnamed: 0,yyyymmdd,weekday,quarter_hour,hour,time_period,hour_bucket,city_name,service_name,service_detail_id,fe_customer_id,pickup_location,pickup_hex_id,affluence,group_tc,surge_strategy,fare_estimate_id,final_amount,dynamic_surge,dynamic_fare,rate_card_amount,ride_distance,gross_customer_id,order_id,order_status,spd_fraud_flag,discount,sub_total,order_state,row_number,accept_to_pickup_distance,distance_tag,net_orders,surged_fe,surged_net,cobra,ocara,cobrm,stockout,expiry_mapped,accepted_orders,surge_rate_card_amount,surge_dynamic_surge,surge_dynamic_fare
0,20230830,3. Wednesday,815,8,1.Morning Peak,0800-0959,Bangalore,Link,57370b61a6855d70057417d1,62a74e0a7d7967549ee7a7e6,Electronic City,8861892639fffff,High Affluence,CONTROL,surge_not_applied,64eeb08f1bd630639fa939ae,41.0,,,,0.09,,,,,,,,,,0-2,,,,,,,,,,,,
1,20230829,2. Tuesday,800,8,1.Morning Peak,0800-0959,Bangalore,Link,57370b61a6855d70057417d1,64eb0efd5a6ec79d09c3504c,Majestic,8860145b55fffff,Low Affluence,TEST,circuit_broken,64ed5ba81ed3d92ced7ef496,117.0,,,,10.836,,,,,,,,,,Rest,,,,,,,,,,,,


## Analysis view 1

In [30]:
## Pre

df_analysis_pre_group_v1 = df_pre_raw \
                            .groupby(['affluence', 'group_tc']) \
                            .agg(
                                hex_count = pd.NamedAgg('pickup_hex_id', 'nunique'),
                                fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'), 
                                gross_orders = pd.NamedAgg('order_id', 'nunique'), 
                                net_orders = pd.NamedAgg('net_orders', 'sum'),
                                surged_fe = pd.NamedAgg('surged_fe', 'nunique'),
                                surged_net = pd.NamedAgg('surged_net', 'nunique'),
                                cobra = pd.NamedAgg('cobra', 'nunique'),
                                ocara = pd.NamedAgg('ocara', 'nunique'),
                                cobrm = pd.NamedAgg('cobrm', 'nunique'),
                                stockout = pd.NamedAgg('stockout', 'nunique'),
                                expiry_mapped = pd.NamedAgg('expiry_mapped', 'nunique'),
                                accepted_orders = pd.NamedAgg('accepted_orders', 'nunique'),
                                discount = pd.NamedAgg('discount', 'sum'), 
                                sub_total = pd.NamedAgg('sub_total', 'sum'),
                                final_amount = pd.NamedAgg('final_amount', 'sum'),
                                dynamic_surge = pd.NamedAgg('dynamic_surge', 'sum'),
                                dynamic_fare = pd.NamedAgg('dynamic_fare', 'sum'),
                                rate_card_amount = pd.NamedAgg('rate_card_amount', 'sum'),
                                mean_ride_distance = pd.NamedAgg('ride_distance', 'mean'),
                                ride_distance = pd.NamedAgg('ride_distance', 'sum'),
                                surge_rate_card_amount = pd.NamedAgg('surge_rate_card_amount', 'sum'),
                                surge_dynamic_surge = pd.NamedAgg('surge_dynamic_surge', 'sum'),
                                surge_dynamic_fare = pd.NamedAgg('surge_dynamic_fare', 'sum')
                                ) \
                            .reset_index()

## Adding funnel 
df_analysis_pre_group_v1['fe2rr'] = (df_analysis_pre_group_v1['gross_orders']*100.0/df_analysis_pre_group_v1['fe_count']).round(2)
df_analysis_pre_group_v1['g2n'] = (df_analysis_pre_group_v1['net_orders']*100.0/df_analysis_pre_group_v1['gross_orders']).round(2)
df_analysis_pre_group_v1['fe2net'] = (df_analysis_pre_group_v1['net_orders']*100.0/df_analysis_pre_group_v1['fe_count']).round(2)
df_analysis_pre_group_v1['aor'] = (df_analysis_pre_group_v1['accepted_orders']*100.0/df_analysis_pre_group_v1['gross_orders']).round(2)
df_analysis_pre_group_v1['surged_fe_percentage'] = (df_analysis_pre_group_v1['surged_fe']*100.0/df_analysis_pre_group_v1['fe_count']).round(2)
df_analysis_pre_group_v1['surge_percentage'] = ((df_analysis_pre_group_v1['surge_dynamic_surge']+df_analysis_pre_group_v1['surge_dynamic_fare'])*100.0/df_analysis_pre_group_v1['surge_rate_card_amount']).round(2)
df_analysis_pre_group_v1['ppkm'] = (df_analysis_pre_group_v1['final_amount']/df_analysis_pre_group_v1['ride_distance']).round(2)
df_analysis_pre_group_v1['fe_hex'] = (df_analysis_pre_group_v1['fe_count']/df_analysis_pre_group_v1['hex_count']).round(2)
df_analysis_pre_group_v1['discount_percentage'] = (df_analysis_pre_group_v1['discount']*100.0/df_analysis_pre_group_v1['sub_total']).round(2)

In [31]:
## Post

df_analysis_post_group_v1 = df_post_raw \
                            .groupby(['affluence', 'group_tc']) \
                            .agg(
                                hex_count = pd.NamedAgg('pickup_hex_id', 'nunique'),
                                fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'), 
                                gross_orders = pd.NamedAgg('order_id', 'nunique'), 
                                net_orders = pd.NamedAgg('net_orders', 'sum'),
                                surged_fe = pd.NamedAgg('surged_fe', 'nunique'),
                                surged_net = pd.NamedAgg('surged_net', 'nunique'),
                                cobra = pd.NamedAgg('cobra', 'nunique'),
                                ocara = pd.NamedAgg('ocara', 'nunique'),
                                cobrm = pd.NamedAgg('cobrm', 'nunique'),
                                stockout = pd.NamedAgg('stockout', 'nunique'),
                                expiry_mapped = pd.NamedAgg('expiry_mapped', 'nunique'),
                                accepted_orders = pd.NamedAgg('accepted_orders', 'nunique'),
                                discount = pd.NamedAgg('discount', 'sum'),
                                sub_total = pd.NamedAgg('sub_total', 'sum'),
                                final_amount = pd.NamedAgg('final_amount', 'sum'),
                                dynamic_surge = pd.NamedAgg('dynamic_surge', 'sum'),
                                dynamic_fare = pd.NamedAgg('dynamic_fare', 'sum'),
                                rate_card_amount = pd.NamedAgg('rate_card_amount', 'sum'),
                                mean_ride_distance = pd.NamedAgg('ride_distance', 'mean'),
                                ride_distance = pd.NamedAgg('ride_distance', 'sum'),
                                surge_rate_card_amount = pd.NamedAgg('surge_rate_card_amount', 'sum'),
                                surge_dynamic_surge = pd.NamedAgg('surge_dynamic_surge', 'sum'),
                                surge_dynamic_fare = pd.NamedAgg('surge_dynamic_fare', 'sum')
                                ) \
                            .reset_index()

## Adding funnel 
df_analysis_post_group_v1['fe2rr'] = (df_analysis_post_group_v1['gross_orders']*100.0/df_analysis_post_group_v1['fe_count']).round(2)
df_analysis_post_group_v1['g2n'] = (df_analysis_post_group_v1['net_orders']*100.0/df_analysis_post_group_v1['gross_orders']).round(2)
df_analysis_post_group_v1['fe2net'] = (df_analysis_post_group_v1['net_orders']*100.0/df_analysis_post_group_v1['fe_count']).round(2)
df_analysis_post_group_v1['aor'] = (df_analysis_post_group_v1['accepted_orders']*100.0/df_analysis_post_group_v1['gross_orders']).round(2)
df_analysis_post_group_v1['surged_fe_percentage'] = (df_analysis_post_group_v1['surged_fe']*100.0/df_analysis_post_group_v1['fe_count']).round(2)
df_analysis_post_group_v1['surge_percentage'] = ((df_analysis_post_group_v1['surge_dynamic_surge']+df_analysis_post_group_v1['surge_dynamic_fare'])*100.0/df_analysis_post_group_v1['surge_rate_card_amount']).round(2)
df_analysis_post_group_v1['ppkm'] = (df_analysis_post_group_v1['final_amount']/df_analysis_post_group_v1['ride_distance']).round(2)
df_analysis_post_group_v1['fe_hex'] = (df_analysis_post_group_v1['fe_count']/df_analysis_post_group_v1['hex_count']).round(2)
df_analysis_post_group_v1['discount_percentage'] = (df_analysis_post_group_v1['discount']*100.0/df_analysis_post_group_v1['sub_total']).round(2)

In [32]:
## Adding pre post to column 

def add_pre_post():
    
    ##vpre
    pre_new_column_names = [col + "_pre" for col in df_analysis_pre_group_v1.columns]
    df_analysis_pre_group_v1.columns = pre_new_column_names
    
    ## post
    post_new_column_names = [col + "_post" for col in df_analysis_post_group_v1.columns]
    df_analysis_post_group_v1.columns = post_new_column_names
    
    
add_pre_post()

In [33]:
view_1  = pd.merge(df_analysis_pre_group_v1,
             df_analysis_post_group_v1,
             how = 'inner',
             left_on = ['affluence_pre', 'group_tc_pre'],
             right_on = ['affluence_post', 'group_tc_post']
            )

view_1.rename(columns = {'affluence_pre' : 'affluence', 
                         'group_tc_pre' : 'group_tc', 
                         'hex_count_pre' : 'hex_count'}, inplace = True)

view_1['fe2rr_delta'] = view_1['fe2rr_post'] - view_1['fe2rr_pre']
view_1['g2n_delta'] = view_1['g2n_post'] - view_1['g2n_pre']
view_1['fe2net_delta'] = view_1['fe2net_post'] - view_1['fe2net_pre']
view_1['aor_delta'] = view_1['aor_post'] - view_1['aor_pre']
view_1['surged_fe_delta'] = view_1['surged_fe_percentage_post'] - view_1['surged_fe_percentage_pre']
view_1['ppkm_delta'] = view_1['ppkm_pre'] - view_1['ppkm_post']
view_1['surge_percentage_delta'] = view_1['surge_percentage_post'] - view_1['surge_percentage_pre']
view_1['discount_percentage_delta'] = view_1['discount_percentage_post'] - view_1['discount_percentage_pre']

In [34]:
view_1.head()

Unnamed: 0,affluence,group_tc,hex_count,fe_count_pre,gross_orders_pre,net_orders_pre,surged_fe_pre,surged_net_pre,cobra_pre,ocara_pre,cobrm_pre,stockout_pre,expiry_mapped_pre,accepted_orders_pre,discount_pre,sub_total_pre,final_amount_pre,dynamic_surge_pre,dynamic_fare_pre,rate_card_amount_pre,mean_ride_distance_pre,ride_distance_pre,surge_rate_card_amount_pre,surge_dynamic_surge_pre,surge_dynamic_fare_pre,...,dynamic_surge_post,dynamic_fare_post,rate_card_amount_post,mean_ride_distance_post,ride_distance_post,surge_rate_card_amount_post,surge_dynamic_surge_post,surge_dynamic_fare_post,fe2rr_post,g2n_post,fe2net_post,aor_post,surged_fe_percentage_post,surge_percentage_post,ppkm_post,fe_hex_post,discount_percentage_post,fe2rr_delta,g2n_delta,fe2net_delta,aor_delta,surged_fe_delta,ppkm_delta,surge_percentage_delta,discount_percentage_delta
0,High Affluence,CONTROL,12,462431,78911,38873.0,65161,3303,20441,15795,122,24,3439,55409,885890.0,7383204.0,44386270.0,1640508.08,0.0,5366955.68,7.105756,3292218.0,5366955.68,1640508.08,0.0,...,1569881.29,0.0,4783747.18,7.014024,2800700.0,4783747.18,1569881.29,0.0,17.36,50.32,8.74,72.24,14.71,32.82,13.6,33218.33,6.9,0.3,1.06,0.33,2.02,0.62,-0.12,2.25,-5.1
1,High Affluence,TEST,13,436251,74688,36058.0,66733,3036,19972,13147,71,20,5228,50107,581334.0,6609380.0,41262312.0,1877038.6,0.0,5808129.93,6.955629,3043964.0,5808129.93,1877038.6,0.0,...,494433.02,0.0,1977538.12,6.869172,2497308.0,1977538.12,494433.02,0.0,18.27,48.91,8.94,68.97,6.61,25.0,13.13,27884.38,5.45,1.15,0.63,0.67,1.88,-8.69,0.43,-7.32,-3.35
2,Low Affluence,CONTROL,22,537269,85912,40883.0,78849,3371,20447,19550,303,98,4279,61768,891749.0,8551720.0,57471553.0,2354952.66,0.0,7261887.56,7.973606,4293691.0,7261887.56,2354952.66,0.0,...,3384911.62,0.0,8141760.9,7.816882,3975213.0,8141760.9,3384911.62,0.0,15.63,47.91,7.49,73.24,17.56,41.57,13.73,23067.18,6.03,-0.36,0.32,-0.12,1.34,2.88,-0.34,9.14,-4.4
3,Low Affluence,TEST,23,546427,79837,39071.0,103884,4529,16922,20383,164,52,2922,60695,911183.0,8627389.0,66581837.0,3798349.21,0.0,11074066.23,8.943403,4893750.0,11074066.23,3798349.21,0.0,...,685829.54,0.0,2382586.67,8.903842,4583707.0,2382586.67,685829.54,0.0,15.96,46.18,7.37,72.5,4.63,28.79,13.0,22336.96,6.1,1.35,-2.76,0.22,-3.52,-14.38,0.61,-5.51,-4.46


In [35]:
df_analysis_pre_post_v1_dsm = pd.read_clipboard()
df_analysis_pre_post_v1_dsm

Unnamed: 0,affluence,group_tc,demand_per_day_pre,demand_per_day_post,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,demand_pd_delta,mismatch_qr_pd_delta
0,High Affluence,CONTROL,3372.67,3508.83,2102.71,1929.06,461.19,524.61,136.16,63.42
1,High Affluence,TEST,3237.05,3261.78,2090.95,1845.67,507.76,561.22,24.73,53.46
2,Low Affluence,CONTROL,3805.52,4117.44,2515.19,2270.83,842.57,934.72,311.92,92.15
3,Low Affluence,TEST,3549.67,4211.61,2871.43,2681.06,645.24,772.67,661.94,127.43


In [36]:
df_analysis_pre_post_v1 = view_1.merge(df_analysis_pre_post_v1_dsm,
                                                              how = 'inner',
                                                              on = ['affluence','group_tc']
                                                             )
df_analysis_pre_post_v1

Unnamed: 0,affluence,group_tc,hex_count,fe_count_pre,gross_orders_pre,net_orders_pre,surged_fe_pre,surged_net_pre,cobra_pre,ocara_pre,cobrm_pre,stockout_pre,expiry_mapped_pre,accepted_orders_pre,discount_pre,sub_total_pre,final_amount_pre,dynamic_surge_pre,dynamic_fare_pre,rate_card_amount_pre,mean_ride_distance_pre,ride_distance_pre,surge_rate_card_amount_pre,surge_dynamic_surge_pre,surge_dynamic_fare_pre,...,fe2rr_post,g2n_post,fe2net_post,aor_post,surged_fe_percentage_post,surge_percentage_post,ppkm_post,fe_hex_post,discount_percentage_post,fe2rr_delta,g2n_delta,fe2net_delta,aor_delta,surged_fe_delta,ppkm_delta,surge_percentage_delta,discount_percentage_delta,demand_per_day_pre,demand_per_day_post,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,demand_pd_delta,mismatch_qr_pd_delta
0,High Affluence,CONTROL,12,462431,78911,38873.0,65161,3303,20441,15795,122,24,3439,55409,885890.0,7383204.0,44386270.0,1640508.08,0.0,5366955.68,7.105756,3292218.0,5366955.68,1640508.08,0.0,...,17.36,50.32,8.74,72.24,14.71,32.82,13.6,33218.33,6.9,0.3,1.06,0.33,2.02,0.62,-0.12,2.25,-5.1,3372.67,3508.83,2102.71,1929.06,461.19,524.61,136.16,63.42
1,High Affluence,TEST,13,436251,74688,36058.0,66733,3036,19972,13147,71,20,5228,50107,581334.0,6609380.0,41262312.0,1877038.6,0.0,5808129.93,6.955629,3043964.0,5808129.93,1877038.6,0.0,...,18.27,48.91,8.94,68.97,6.61,25.0,13.13,27884.38,5.45,1.15,0.63,0.67,1.88,-8.69,0.43,-7.32,-3.35,3237.05,3261.78,2090.95,1845.67,507.76,561.22,24.73,53.46
2,Low Affluence,CONTROL,22,537269,85912,40883.0,78849,3371,20447,19550,303,98,4279,61768,891749.0,8551720.0,57471553.0,2354952.66,0.0,7261887.56,7.973606,4293691.0,7261887.56,2354952.66,0.0,...,15.63,47.91,7.49,73.24,17.56,41.57,13.73,23067.18,6.03,-0.36,0.32,-0.12,1.34,2.88,-0.34,9.14,-4.4,3805.52,4117.44,2515.19,2270.83,842.57,934.72,311.92,92.15
3,Low Affluence,TEST,23,546427,79837,39071.0,103884,4529,16922,20383,164,52,2922,60695,911183.0,8627389.0,66581837.0,3798349.21,0.0,11074066.23,8.943403,4893750.0,11074066.23,3798349.21,0.0,...,15.96,46.18,7.37,72.5,4.63,28.79,13.0,22336.96,6.1,1.35,-2.76,0.22,-3.52,-14.38,0.61,-5.51,-4.46,3549.67,4211.61,2871.43,2681.06,645.24,772.67,661.94,127.43


In [37]:
sorted_list = sorted(df_analysis_pre_post_v1.columns)
sorted_list

['accepted_orders_post',
 'accepted_orders_pre',
 'affluence',
 'affluence_post',
 'aor_delta',
 'aor_post',
 'aor_pre',
 'cobra_post',
 'cobra_pre',
 'cobrm_post',
 'cobrm_pre',
 'demand_pd_delta',
 'demand_per_day_post',
 'demand_per_day_pre',
 'discount_percentage_delta',
 'discount_percentage_post',
 'discount_percentage_pre',
 'discount_post',
 'discount_pre',
 'dynamic_fare_post',
 'dynamic_fare_pre',
 'dynamic_surge_post',
 'dynamic_surge_pre',
 'expiry_mapped_post',
 'expiry_mapped_pre',
 'fe2net_delta',
 'fe2net_post',
 'fe2net_pre',
 'fe2rr_delta',
 'fe2rr_post',
 'fe2rr_pre',
 'fe_count_post',
 'fe_count_pre',
 'fe_hex_post',
 'fe_hex_pre',
 'final_amount_post',
 'final_amount_pre',
 'g2n_delta',
 'g2n_post',
 'g2n_pre',
 'gross_orders_post',
 'gross_orders_pre',
 'group_tc',
 'group_tc_post',
 'hex_count',
 'hex_count_post',
 'mean_ride_distance_post',
 'mean_ride_distance_pre',
 'mismatch_qr_pd_delta',
 'mismatch_qr_per_day_post',
 'mismatch_qr_per_day_pre',
 'net_orders_p

In [38]:
df_analysis_pre_post_v1_final = df_analysis_pre_post_v1[['affluence', 'group_tc', 'hex_count',
                                  'fe2rr_pre', 'fe2rr_post', 'fe2rr_delta',
                                  'g2n_pre', 'g2n_post', 'g2n_delta',
                                  'fe2net_pre', 'fe2net_post', 'fe2net_delta', 'aor_pre', 'aor_post', 'aor_delta',
                                  'demand_per_day_pre', 'demand_per_day_post', 'demand_pd_delta',
                                  'supply_per_day_pre','supply_per_day_post',
                                  'mismatch_qr_per_day_pre', 'mismatch_qr_per_day_post', 'mismatch_qr_pd_delta',
                                  'ppkm_pre', 'ppkm_post', 'ppkm_delta',
                                  'fe_hex_pre', 'fe_hex_post',
                                  'surged_fe_percentage_pre', 'surged_fe_percentage_post', 'surged_fe_delta',
                                  'surge_percentage_pre', 'surge_percentage_post', 'surge_percentage_delta',
                                  'discount_percentage_pre', 'discount_percentage_post', 'discount_percentage_delta',
                                  'fe_count_pre', 'fe_count_post',
                                  'gross_orders_pre', 'gross_orders_post',
                                  'net_orders_pre', 'net_orders_post',
                                  'surged_fe_pre', 'surged_fe_post',
                                  'surged_net_pre', 'surged_net_post',
                                  'discount_pre', 'discount_post',
                                  'sub_total_pre', 'sub_total_post',
                                  'cobra_pre', 'cobra_post',
                                  'ocara_pre', 'ocara_post',
                                  'cobrm_pre', 'cobrm_post',
                                  'expiry_mapped_pre', 'expiry_mapped_post',
                                  'stockout_pre', 'stockout_post',
                                  'accepted_orders_pre', 'accepted_orders_post'
                                 ]]

In [39]:
df_analysis_pre_post_v1_final

Unnamed: 0,affluence,group_tc,hex_count,fe2rr_pre,fe2rr_post,fe2rr_delta,g2n_pre,g2n_post,g2n_delta,fe2net_pre,fe2net_post,fe2net_delta,aor_pre,aor_post,aor_delta,demand_per_day_pre,demand_per_day_post,demand_pd_delta,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,mismatch_qr_pd_delta,ppkm_pre,ppkm_post,...,fe_count_post,gross_orders_pre,gross_orders_post,net_orders_pre,net_orders_post,surged_fe_pre,surged_fe_post,surged_net_pre,surged_net_post,discount_pre,discount_post,sub_total_pre,sub_total_post,cobra_pre,cobra_post,ocara_pre,ocara_post,cobrm_pre,cobrm_post,expiry_mapped_pre,expiry_mapped_post,stockout_pre,stockout_post,accepted_orders_pre,accepted_orders_post
0,High Affluence,CONTROL,12,17.06,17.36,0.3,49.26,50.32,1.06,8.41,8.74,0.33,70.22,72.24,2.02,3372.67,3508.83,136.16,2102.71,1929.06,461.19,524.61,63.42,13.48,13.6,...,398620,78911,69209,38873.0,34827.0,65161,58650,3303,2705,885890.0,419344.0,7383204.0,6074632.0,20441,17026,15795,14230,122,144,3439,2765,24,17,55409,49998
1,High Affluence,TEST,13,17.12,18.27,1.15,48.28,48.91,0.63,8.27,8.94,0.67,67.09,68.97,1.88,3237.05,3261.78,24.73,2090.95,1845.67,507.76,561.22,53.46,13.56,13.13,...,362497,74688,66236,36058.0,32396.0,66733,23973,3036,747,581334.0,301842.0,6609380.0,5539784.0,19972,17189,13147,12303,71,108,5228,4049,20,31,50107,45686
2,Low Affluence,CONTROL,22,15.99,15.63,-0.36,47.59,47.91,0.32,7.61,7.49,-0.12,71.9,73.24,1.34,3805.52,4117.44,311.92,2515.19,2270.83,842.57,934.72,92.15,13.39,13.73,...,507478,85912,79299,40883.0,37990.0,78849,89132,3371,3294,891749.0,454114.0,8551720.0,7531303.0,20447,18190,19550,18465,303,361,4279,3869,98,99,61768,58079
3,Low Affluence,TEST,23,14.61,15.96,1.35,48.94,46.18,-2.76,7.15,7.37,0.22,76.02,72.5,-3.52,3549.67,4211.61,661.94,2871.43,2681.06,645.24,772.67,127.43,13.61,13.0,...,513750,79837,81977,39071.0,37854.0,103884,23784,4529,771,911183.0,506478.0,8627389.0,8302617.0,16922,19606,20383,20228,164,170,2922,3756,52,42,60695,59433


In [40]:
df_analysis_pre_post_v1_final.to_clipboard(index=False)

## Analysis view 2

In [41]:
## Pre

df_analysis_pre_group_v2 = df_pre_raw \
                            .groupby(['affluence', 'group_tc','time_period']) \
                            .agg(
                                hex_count = pd.NamedAgg('pickup_hex_id', 'nunique'),
                                fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'), 
                                gross_orders = pd.NamedAgg('order_id', 'nunique'), 
                                net_orders = pd.NamedAgg('net_orders', 'sum'),
                                surged_fe = pd.NamedAgg('surged_fe', 'nunique'),
                                surged_net = pd.NamedAgg('surged_net', 'nunique'),
                                cobra = pd.NamedAgg('cobra', 'nunique'),
                                ocara = pd.NamedAgg('ocara', 'nunique'),
                                cobrm = pd.NamedAgg('cobrm', 'nunique'),
                                stockout = pd.NamedAgg('stockout', 'nunique'),
                                expiry_mapped = pd.NamedAgg('expiry_mapped', 'nunique'),
                                accepted_orders = pd.NamedAgg('accepted_orders', 'nunique'),
                                discount = pd.NamedAgg('discount', 'sum'), 
                                sub_total = pd.NamedAgg('sub_total', 'sum'),
                                final_amount = pd.NamedAgg('final_amount', 'sum'),
                                dynamic_surge = pd.NamedAgg('dynamic_surge', 'sum'),
                                dynamic_fare = pd.NamedAgg('dynamic_fare', 'sum'),
                                rate_card_amount = pd.NamedAgg('rate_card_amount', 'sum'),
                                mean_ride_distance = pd.NamedAgg('ride_distance', 'mean'),
                                ride_distance = pd.NamedAgg('ride_distance', 'sum'),
                                surge_rate_card_amount = pd.NamedAgg('surge_rate_card_amount', 'sum'),
                                surge_dynamic_surge = pd.NamedAgg('surge_dynamic_surge', 'sum'),
                                surge_dynamic_fare = pd.NamedAgg('surge_dynamic_fare', 'sum')
                                ) \
                            .reset_index()

## Adding funnel 
df_analysis_pre_group_v2['fe2rr'] = (df_analysis_pre_group_v2['gross_orders']*100.0/df_analysis_pre_group_v2['fe_count']).round(2)
df_analysis_pre_group_v2['g2n'] = (df_analysis_pre_group_v2['net_orders']*100.0/df_analysis_pre_group_v2['gross_orders']).round(2)
df_analysis_pre_group_v2['fe2net'] = (df_analysis_pre_group_v2['net_orders']*100.0/df_analysis_pre_group_v2['fe_count']).round(2)
df_analysis_pre_group_v2['aor'] = (df_analysis_pre_group_v2['accepted_orders']*100.0/df_analysis_pre_group_v2['gross_orders']).round(2)
df_analysis_pre_group_v2['surged_fe_percentage'] = (df_analysis_pre_group_v2['surged_fe']*100.0/df_analysis_pre_group_v2['fe_count']).round(2)
df_analysis_pre_group_v2['surge_percentage'] = ((df_analysis_pre_group_v2['surge_dynamic_surge']+df_analysis_pre_group_v2['surge_dynamic_fare'])*100.0/df_analysis_pre_group_v2['surge_rate_card_amount']).round(2)
df_analysis_pre_group_v2['ppkm'] = (df_analysis_pre_group_v2['final_amount']/df_analysis_pre_group_v2['ride_distance']).round(2)
df_analysis_pre_group_v2['fe_hex'] = (df_analysis_pre_group_v2['fe_count']/df_analysis_pre_group_v2['hex_count']).round(2)
df_analysis_pre_group_v2['discount_percentage'] = (df_analysis_pre_group_v2['discount']*100.0/df_analysis_pre_group_v2['sub_total']).round(2)

In [42]:
## Post

df_analysis_post_group_v2 = df_post_raw \
                            .groupby(['affluence', 'group_tc','time_period']) \
                            .agg(
                                hex_count = pd.NamedAgg('pickup_hex_id', 'nunique'),
                                fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'), 
                                gross_orders = pd.NamedAgg('order_id', 'nunique'), 
                                net_orders = pd.NamedAgg('net_orders', 'sum'),
                                surged_fe = pd.NamedAgg('surged_fe', 'nunique'),
                                surged_net = pd.NamedAgg('surged_net', 'nunique'),
                                cobra = pd.NamedAgg('cobra', 'nunique'),
                                ocara = pd.NamedAgg('ocara', 'nunique'),
                                cobrm = pd.NamedAgg('cobrm', 'nunique'),
                                stockout = pd.NamedAgg('stockout', 'nunique'),
                                expiry_mapped = pd.NamedAgg('expiry_mapped', 'nunique'),
                                accepted_orders = pd.NamedAgg('accepted_orders', 'nunique'),
                                discount = pd.NamedAgg('discount', 'sum'), 
                                sub_total = pd.NamedAgg('sub_total', 'sum'),
                                final_amount = pd.NamedAgg('final_amount', 'sum'),
                                dynamic_surge = pd.NamedAgg('dynamic_surge', 'sum'),
                                dynamic_fare = pd.NamedAgg('dynamic_fare', 'sum'),
                                rate_card_amount = pd.NamedAgg('rate_card_amount', 'sum'),
                                mean_ride_distance = pd.NamedAgg('ride_distance', 'mean'),
                                ride_distance = pd.NamedAgg('ride_distance', 'sum'),
                                surge_rate_card_amount = pd.NamedAgg('surge_rate_card_amount', 'sum'),
                                surge_dynamic_surge = pd.NamedAgg('surge_dynamic_surge', 'sum'),
                                surge_dynamic_fare = pd.NamedAgg('surge_dynamic_fare', 'sum')
                                ) \
                            .reset_index()

## Adding funnel 
df_analysis_post_group_v2['fe2rr'] = (df_analysis_post_group_v2['gross_orders']*100.0/df_analysis_post_group_v2['fe_count']).round(2)
df_analysis_post_group_v2['g2n'] = (df_analysis_post_group_v2['net_orders']*100.0/df_analysis_post_group_v2['gross_orders']).round(2)
df_analysis_post_group_v2['fe2net'] = (df_analysis_post_group_v2['net_orders']*100.0/df_analysis_post_group_v2['fe_count']).round(2)
df_analysis_post_group_v2['aor'] = (df_analysis_post_group_v2['accepted_orders']*100.0/df_analysis_post_group_v2['gross_orders']).round(2)
df_analysis_post_group_v2['surged_fe_percentage'] = (df_analysis_post_group_v2['surged_fe']*100.0/df_analysis_post_group_v2['fe_count']).round(2)
df_analysis_post_group_v2['surge_percentage'] = ((df_analysis_post_group_v2['surge_dynamic_surge']+df_analysis_post_group_v2['surge_dynamic_fare'])*100.0/df_analysis_post_group_v2['surge_rate_card_amount']).round(2)
df_analysis_post_group_v2['ppkm'] = (df_analysis_post_group_v2['final_amount']/df_analysis_post_group_v2['ride_distance']).round(2)
df_analysis_post_group_v2['fe_hex'] = (df_analysis_post_group_v2['fe_count']/df_analysis_post_group_v2['hex_count']).round(2)
df_analysis_post_group_v2['discount_percentage'] = (df_analysis_post_group_v2['discount']*100.0/df_analysis_post_group_v2['sub_total']).round(2)

In [43]:
## Adding pre post to column 

def add_pre_post():
    
    ##vpre
    pre_new_column_names = [col + "_pre" for col in df_analysis_pre_group_v2.columns]
    df_analysis_pre_group_v2.columns = pre_new_column_names
    
    ## post
    post_new_column_names = [col + "_post" for col in df_analysis_post_group_v2.columns]
    df_analysis_post_group_v2.columns = post_new_column_names
    
    
add_pre_post()

In [44]:
view_2  = pd.merge(df_analysis_pre_group_v2,
             df_analysis_post_group_v2,
             how = 'inner',
             left_on = ['affluence_pre', 'group_tc_pre', 'time_period_pre'],
             right_on = ['affluence_post', 'group_tc_post', 'time_period_post']
            )

view_2.rename(columns = {'affluence_pre' : 'affluence', 
                         'group_tc_pre' : 'group_tc', 
                         'time_period_pre' : 'time_period',
                         'hex_count_pre' : 'hex_count'}, inplace = True)

view_2['fe2rr_delta'] = view_2['fe2rr_post'] - view_2['fe2rr_pre']
view_2['g2n_delta'] = view_2['g2n_post'] - view_2['g2n_pre']
view_2['fe2net_delta'] = view_2['fe2net_post'] - view_2['fe2net_pre']
view_2['aor_delta'] = view_2['aor_post'] - view_2['aor_pre']
view_2['surged_fe_delta'] = view_2['surged_fe_percentage_post'] - view_2['surged_fe_percentage_pre']
view_2['ppkm_delta'] = view_2['ppkm_pre'] - view_2['ppkm_post']
view_2['surge_percentage_delta'] = view_2['surge_percentage_post'] - view_2['surge_percentage_pre']
view_2['discount_percentage_delta'] = view_2['discount_percentage_post'] - view_2['discount_percentage_pre']

In [45]:
view_2.head(3)

Unnamed: 0,affluence,group_tc,time_period,hex_count,fe_count_pre,gross_orders_pre,net_orders_pre,surged_fe_pre,surged_net_pre,cobra_pre,ocara_pre,cobrm_pre,stockout_pre,expiry_mapped_pre,accepted_orders_pre,discount_pre,sub_total_pre,final_amount_pre,dynamic_surge_pre,dynamic_fare_pre,rate_card_amount_pre,mean_ride_distance_pre,ride_distance_pre,surge_rate_card_amount_pre,surge_dynamic_surge_pre,...,dynamic_surge_post,dynamic_fare_post,rate_card_amount_post,mean_ride_distance_post,ride_distance_post,surge_rate_card_amount_post,surge_dynamic_surge_post,surge_dynamic_fare_post,fe2rr_post,g2n_post,fe2net_post,aor_post,surged_fe_percentage_post,surge_percentage_post,ppkm_post,fe_hex_post,discount_percentage_post,fe2rr_delta,g2n_delta,fe2net_delta,aor_delta,surged_fe_delta,ppkm_delta,surge_percentage_delta,discount_percentage_delta
0,High Affluence,CONTROL,1.Morning Peak,12,108367,20179,9391.0,24227,1377,6202,3315,35,3,1201,12638,301304.0,1940324.0,10562289.0,809196.1,0.0,2035547.53,7.174321,779662.1,2035547.53,809196.1,...,791438.98,0.0,1875163.33,7.177725,625768.4,1875163.33,791438.98,0.0,18.54,50.03,9.28,68.11,25.06,42.21,13.77,7247.83,8.72,-0.08,3.49,0.61,5.48,2.7,-0.22,2.46,-6.81
1,High Affluence,CONTROL,2.Afternoon,12,106811,16580,9872.0,6271,282,2897,3464,33,3,273,13455,262304.0,1569055.0,9870026.0,119700.44,0.0,540302.35,7.273567,777449.8,540302.35,119700.44,...,58968.21,0.0,289831.95,7.204902,652158.9,289831.95,58968.21,0.0,16.97,58.01,9.84,81.33,3.73,20.35,12.66,7537.25,9.2,1.45,-1.53,0.6,0.18,-2.14,0.04,-1.8,-7.52
2,High Affluence,CONTROL,3.Evening Peak,12,176032,29122,13199.0,33902,1581,8654,5581,17,2,1586,19271,255566.0,2571018.0,15919410.0,686709.17,0.0,2743795.14,6.76629,1193749.0,2743795.14,686709.17,...,685830.96,0.0,2528746.37,6.62344,1025580.0,2528746.37,685830.96,0.0,16.75,46.46,7.78,67.45,20.95,27.12,13.47,12878.42,6.09,0.21,1.14,0.28,1.28,1.69,-0.13,2.09,-3.85


In [46]:
df_analysis_pre_post_v2_dsm = pd.read_clipboard()
df_analysis_pre_post_v2_dsm.head(3)

Unnamed: 0,affluence,group_tc,time_period,demand_per_day_pre,demand_per_day_post,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,demand_pd_delta,mismatch_qr_pd_delta
0,High Affluence,CONTROL,1.Morning Peak,853.33,795.22,324.43,308.39,135.19,137.61,-58.11,2.42
1,High Affluence,TEST,1.Morning Peak,994.33,924.39,244.57,213.22,157.67,160.11,-69.94,2.44
2,Low Affluence,CONTROL,1.Morning Peak,1136.76,1125.83,355.29,308.06,244.71,254.28,-10.93,9.57


In [47]:
df_analysis_pre_post_v2 = view_2.merge(df_analysis_pre_post_v2_dsm,
                                                              how = 'inner',
                                                              on = ['affluence','group_tc','time_period']
                                                             )
df_analysis_pre_post_v2.head(3)

Unnamed: 0,affluence,group_tc,time_period,hex_count,fe_count_pre,gross_orders_pre,net_orders_pre,surged_fe_pre,surged_net_pre,cobra_pre,ocara_pre,cobrm_pre,stockout_pre,expiry_mapped_pre,accepted_orders_pre,discount_pre,sub_total_pre,final_amount_pre,dynamic_surge_pre,dynamic_fare_pre,rate_card_amount_pre,mean_ride_distance_pre,ride_distance_pre,surge_rate_card_amount_pre,surge_dynamic_surge_pre,...,fe2rr_post,g2n_post,fe2net_post,aor_post,surged_fe_percentage_post,surge_percentage_post,ppkm_post,fe_hex_post,discount_percentage_post,fe2rr_delta,g2n_delta,fe2net_delta,aor_delta,surged_fe_delta,ppkm_delta,surge_percentage_delta,discount_percentage_delta,demand_per_day_pre,demand_per_day_post,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,demand_pd_delta,mismatch_qr_pd_delta
0,High Affluence,CONTROL,1.Morning Peak,12,108367,20179,9391.0,24227,1377,6202,3315,35,3,1201,12638,301304.0,1940324.0,10562289.0,809196.1,0.0,2035547.53,7.174321,779662.1,2035547.53,809196.1,...,18.54,50.03,9.28,68.11,25.06,42.21,13.77,7247.83,8.72,-0.08,3.49,0.61,5.48,2.7,-0.22,2.46,-6.81,853.33,795.22,324.43,308.39,135.19,137.61,-58.11,2.42
1,High Affluence,CONTROL,2.Afternoon,12,106811,16580,9872.0,6271,282,2897,3464,33,3,273,13455,262304.0,1569055.0,9870026.0,119700.44,0.0,540302.35,7.273567,777449.8,540302.35,119700.44,...,16.97,58.01,9.84,81.33,3.73,20.35,12.66,7537.25,9.2,1.45,-1.53,0.6,0.18,-2.14,0.04,-1.8,-7.52,714.14,775.78,613.48,492.61,108.95,138.5,61.64,29.55
2,High Affluence,CONTROL,3.Evening Peak,12,176032,29122,13199.0,33902,1581,8654,5581,17,2,1586,19271,255566.0,2571018.0,15919410.0,686709.17,0.0,2743795.14,6.76629,1193749.0,2743795.14,686709.17,...,16.75,46.46,7.78,67.45,20.95,27.12,13.47,12878.42,6.09,0.21,1.14,0.28,1.28,1.69,-0.13,2.09,-3.85,1220.9,1298.33,602.57,550.94,123.62,141.5,77.43,17.88


In [48]:
df_analysis_pre_post_v2_final = df_analysis_pre_post_v2[['affluence', 'group_tc', 'time_period', 'hex_count',
                                  'fe2rr_pre', 'fe2rr_post', 'fe2rr_delta',
                                  'g2n_pre', 'g2n_post', 'g2n_delta',
                                  'fe2net_pre', 'fe2net_post', 'fe2net_delta', 'aor_pre', 'aor_post', 'aor_delta',
                                  'demand_per_day_pre', 'demand_per_day_post', 'demand_pd_delta',
                                  'supply_per_day_pre','supply_per_day_post',
                                  'mismatch_qr_per_day_pre', 'mismatch_qr_per_day_post', 'mismatch_qr_pd_delta',
                                  'ppkm_pre', 'ppkm_post', 'ppkm_delta',
                                  'fe_hex_pre', 'fe_hex_post',
                                  'surged_fe_percentage_pre', 'surged_fe_percentage_post', 'surged_fe_delta',
                                  'surge_percentage_pre', 'surge_percentage_post', 'surge_percentage_delta',
                                  'discount_percentage_pre', 'discount_percentage_post', 'discount_percentage_delta',
                                  'fe_count_pre', 'fe_count_post',
                                  'gross_orders_pre', 'gross_orders_post',
                                  'net_orders_pre', 'net_orders_post',
                                  'surged_fe_pre', 'surged_fe_post',
                                  'surged_net_pre', 'surged_net_post',
                                  'discount_pre', 'discount_post',
                                  'sub_total_pre', 'sub_total_post',
                                  'cobra_pre', 'cobra_post',
                                  'ocara_pre', 'ocara_post',
                                  'cobrm_pre', 'cobrm_post',
                                  'expiry_mapped_pre', 'expiry_mapped_post',
                                  'stockout_pre', 'stockout_post',
                                  'accepted_orders_pre', 'accepted_orders_post'
                                 ]]
df_analysis_pre_post_v2_final = df_analysis_pre_post_v2_final.sort_values(['time_period','affluence','group_tc'])

In [49]:
df_analysis_pre_post_v2_final

Unnamed: 0,affluence,group_tc,time_period,hex_count,fe2rr_pre,fe2rr_post,fe2rr_delta,g2n_pre,g2n_post,g2n_delta,fe2net_pre,fe2net_post,fe2net_delta,aor_pre,aor_post,aor_delta,demand_per_day_pre,demand_per_day_post,demand_pd_delta,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,mismatch_qr_pd_delta,ppkm_pre,...,fe_count_post,gross_orders_pre,gross_orders_post,net_orders_pre,net_orders_post,surged_fe_pre,surged_fe_post,surged_net_pre,surged_net_post,discount_pre,discount_post,sub_total_pre,sub_total_post,cobra_pre,cobra_post,ocara_pre,ocara_post,cobrm_pre,cobrm_post,expiry_mapped_pre,expiry_mapped_post,stockout_pre,stockout_post,accepted_orders_pre,accepted_orders_post
0,High Affluence,CONTROL,1.Morning Peak,12,18.62,18.54,-0.08,46.54,50.03,3.49,8.67,9.28,0.61,62.63,68.11,5.48,853.33,795.22,-58.11,324.43,308.39,135.19,137.61,2.42,13.55,...,86974,20179,16129,9391.0,8070.0,24227,21799,1377,1152,301304.0,124308.0,1940324.0,1425747.0,6202,4443,3315,2815,35,35,1201,721,3,1,12638,10985
4,High Affluence,TEST,1.Morning Peak,13,21.37,23.29,1.92,46.54,51.97,5.43,9.95,12.11,2.16,60.03,67.84,7.81,994.33,924.39,-69.94,244.57,213.22,157.67,160.11,2.44,13.56,...,82971,23467,19326,10922.0,10044.0,19777,236,1206,35,190160.0,94982.0,2018007.0,1549871.0,7071,5174,2993,2830,11,10,2441,1235,2,5,14087,13111
8,Low Affluence,CONTROL,1.Morning Peak,22,20.23,19.53,-0.7,44.59,48.64,4.05,9.02,9.5,0.48,63.28,69.35,6.07,1136.76,1125.83,-10.93,355.29,308.06,244.71,254.28,9.57,12.89,...,115252,27018,22513,12047.0,10950.0,17413,21733,1028,1220,321716.0,146349.0,2604112.0,2027520.0,8061,5883,4815,4251,42,84,1967,1267,16,15,17097,15613
12,Low Affluence,TEST,1.Morning Peak,23,16.95,18.67,1.72,49.22,48.52,-0.7,8.34,9.06,0.72,71.11,70.5,-0.61,1006.24,1128.11,121.87,478.67,425.83,208.9,224.72,15.82,13.08,...,120445,22951,22485,11296.0,10909.0,26038,453,1354,63,322583.0,169580.0,2386515.0,2135675.0,5874,5954,4754,4569,27,24,939,971,8,1,16320,15853
1,High Affluence,CONTROL,2.Afternoon,12,15.52,16.97,1.45,59.54,58.01,-1.53,9.24,9.84,0.6,81.15,81.33,0.18,714.14,775.78,61.64,613.48,492.61,108.95,138.5,29.55,12.7,...,90447,16580,15349,9872.0,8904.0,6271,3378,282,207,262304.0,121338.0,1569055.0,1318571.0,2897,2703,3464,3379,33,39,273,282,3,3,13455,12483
5,High Affluence,TEST,2.Afternoon,13,15.38,17.07,1.69,58.83,56.51,-2.32,9.05,9.64,0.59,80.17,77.84,-2.33,722.81,753.28,30.47,632.29,493.78,123.48,148.44,24.96,12.92,...,88665,16144,15132,9498.0,8551.0,10551,4696,441,255,169465.0,82129.0,1422483.0,1234081.0,2916,3032,3280,3000,6,20,393,485,1,2,12943,11779
9,Low Affluence,CONTROL,2.Afternoon,22,15.21,15.37,0.16,56.32,55.02,-1.3,8.57,8.45,-0.12,81.98,81.32,-0.66,995.62,1109.67,114.05,785.38,567.94,201.95,253.5,51.55,12.86,...,135454,21569,20814,12147.0,11451.0,16348,19770,741,806,307131.0,153793.0,2156202.0,1939320.0,3543,3547,5181,5074,54,70,547,585,9,11,17683,16925
13,Low Affluence,TEST,2.Afternoon,23,14.26,15.48,1.22,56.31,52.55,-3.76,8.03,8.14,0.11,84.14,80.35,-3.79,868.9,1009.89,140.99,876.71,646.39,148.1,205.94,57.84,12.93,...,124120,19191,19218,10806.0,10099.0,21596,5324,1030,238,299787.0,157087.0,2025199.0,1870644.0,2989,3526,4967,5044,15,19,345,452,3,3,16147,15441
2,High Affluence,CONTROL,3.Evening Peak,12,16.54,16.75,0.21,45.32,46.46,1.14,7.5,7.78,0.28,66.17,67.45,1.28,1220.9,1298.33,77.43,602.57,550.94,123.62,141.5,17.88,13.34,...,154541,29122,25886,13199.0,12027.0,33902,32379,1581,1267,255566.0,134096.0,2571018.0,2200549.0,8654,7371,5581,5027,17,27,1586,1368,2,1,19271,17460
6,High Affluence,TEST,3.Evening Peak,13,15.04,15.97,0.93,42.85,42.06,-0.79,6.44,6.72,0.28,61.82,62.01,0.19,1103.0,1143.89,40.89,722.0,640.5,134.95,150.28,15.33,13.51,...,145206,25446,23195,10903.0,9756.0,35902,19032,1350,448,190421.0,103503.0,2245611.0,1948377.0,7916,7134,4490,4298,17,33,2043,1904,2,6,15730,14383


In [50]:
df_analysis_pre_post_v2_final.to_clipboard(index=False)

## Analysis view 3 - On Hold

In [None]:
## Pre

df_analysis_pre_group_v3 = df_pre_raw \
                            .groupby(['affluence', 'group_tc','weekday']) \
                            .agg(
                                hex_count = pd.NamedAgg('pickup_hex_id', 'nunique'),
                                fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'), 
                                gross_orders = pd.NamedAgg('order_id', 'nunique'), 
                                net_orders = pd.NamedAgg('net_orders', 'sum'),
                                surged_fe = pd.NamedAgg('surged_fe', 'nunique'),
                                surged_net = pd.NamedAgg('surged_net', 'nunique'),
                                cobra = pd.NamedAgg('cobra', 'nunique'),
                                ocara = pd.NamedAgg('ocara', 'nunique'),
                                cobrm = pd.NamedAgg('cobrm', 'nunique'),
                                stockout = pd.NamedAgg('stockout', 'nunique'),
                                expiry_mapped = pd.NamedAgg('expiry_mapped', 'nunique'),
                                accepted_orders = pd.NamedAgg('accepted_orders', 'nunique'),
                                discount = pd.NamedAgg('discount', 'sum'), 
                                sub_total = pd.NamedAgg('sub_total', 'sum'),
                                final_amount = pd.NamedAgg('final_amount', 'sum'),
                                dynamic_surge = pd.NamedAgg('dynamic_surge', 'sum'),
                                dynamic_fare = pd.NamedAgg('dynamic_fare', 'sum'),
                                rate_card_amount = pd.NamedAgg('rate_card_amount', 'sum'),
                                mean_ride_distance = pd.NamedAgg('ride_distance', 'mean'),
                                ride_distance = pd.NamedAgg('ride_distance', 'sum'),
                                surge_rate_card_amount = pd.NamedAgg('surge_rate_card_amount', 'sum'),
                                surge_dynamic_surge = pd.NamedAgg('surge_dynamic_surge', 'sum'),
                                surge_dynamic_fare = pd.NamedAgg('surge_dynamic_fare', 'sum')
                                ) \
                            .reset_index()

## Adding funnel 
df_analysis_pre_group_v3['fe2rr'] = (df_analysis_pre_group_v3['gross_orders']*100.0/df_analysis_pre_group_v3['fe_count']).round(2)
df_analysis_pre_group_v3['g2n'] = (df_analysis_pre_group_v3['net_orders']*100.0/df_analysis_pre_group_v3['gross_orders']).round(2)
df_analysis_pre_group_v3['fe2net'] = (df_analysis_pre_group_v3['net_orders']*100.0/df_analysis_pre_group_v3['fe_count']).round(2)
df_analysis_pre_group_v3['aor'] = (df_analysis_pre_group_v3['accepted_orders']*100.0/df_analysis_pre_group_v3['gross_orders']).round(2)
df_analysis_pre_group_v3['surged_fe_percentage'] = (df_analysis_pre_group_v3['surged_fe']*100.0/df_analysis_pre_group_v3['fe_count']).round(2)
df_analysis_pre_group_v3['surge_percentage'] = ((df_analysis_pre_group_v3['surge_dynamic_surge']+df_analysis_pre_group_v3['surge_dynamic_fare'])*100.0/df_analysis_pre_group_v3['surge_rate_card_amount']).round(2)
df_analysis_pre_group_v3['ppkm'] = (df_analysis_pre_group_v3['final_amount']/df_analysis_pre_group_v3['ride_distance']).round(2)
df_analysis_pre_group_v3['fe_hex'] = (df_analysis_pre_group_v3['fe_count']/df_analysis_pre_group_v3['hex_count']).round(2)
df_analysis_pre_group_v3['discount_percentage'] = (df_analysis_pre_group_v3['discount']*100.0/df_analysis_pre_group_v3['sub_total']).round(2)

In [None]:
## Post

df_analysis_post_group_v3 = df_post_raw \
                            .groupby(['affluence', 'group_tc','weekday']) \
                            .agg(
                                hex_count = pd.NamedAgg('pickup_hex_id', 'nunique'),
                                fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'), 
                                gross_orders = pd.NamedAgg('order_id', 'nunique'), 
                                net_orders = pd.NamedAgg('net_orders', 'sum'),
                                surged_fe = pd.NamedAgg('surged_fe', 'nunique'),
                                surged_net = pd.NamedAgg('surged_net', 'nunique'),
                                cobra = pd.NamedAgg('cobra', 'nunique'),
                                ocara = pd.NamedAgg('ocara', 'nunique'),
                                cobrm = pd.NamedAgg('cobrm', 'nunique'),
                                stockout = pd.NamedAgg('stockout', 'nunique'),
                                expiry_mapped = pd.NamedAgg('expiry_mapped', 'nunique'),
                                accepted_orders = pd.NamedAgg('accepted_orders', 'nunique'),
                                discount = pd.NamedAgg('discount', 'sum'),
                                sub_total = pd.NamedAgg('sub_total', 'sum'),
                                final_amount = pd.NamedAgg('final_amount', 'sum'),
                                dynamic_surge = pd.NamedAgg('dynamic_surge', 'sum'),
                                dynamic_fare = pd.NamedAgg('dynamic_fare', 'sum'),
                                rate_card_amount = pd.NamedAgg('rate_card_amount', 'sum'),
                                mean_ride_distance = pd.NamedAgg('ride_distance', 'mean'),
                                ride_distance = pd.NamedAgg('ride_distance', 'sum'),
                                surge_rate_card_amount = pd.NamedAgg('surge_rate_card_amount', 'sum'),
                                surge_dynamic_surge = pd.NamedAgg('surge_dynamic_surge', 'sum'),
                                surge_dynamic_fare = pd.NamedAgg('surge_dynamic_fare', 'sum')
                                ) \
                            .reset_index()

## Adding funnel 
df_analysis_post_group_v3['fe2rr'] = (df_analysis_post_group_v3['gross_orders']*100.0/df_analysis_post_group_v3['fe_count']).round(2)
df_analysis_post_group_v3['g2n'] = (df_analysis_post_group_v3['net_orders']*100.0/df_analysis_post_group_v3['gross_orders']).round(2)
df_analysis_post_group_v3['fe2net'] = (df_analysis_post_group_v3['net_orders']*100.0/df_analysis_post_group_v3['fe_count']).round(2)
df_analysis_post_group_v3['aor'] = (df_analysis_post_group_v3['accepted_orders']*100.0/df_analysis_post_group_v3['gross_orders']).round(2)
df_analysis_post_group_v3['surged_fe_percentage'] = (df_analysis_post_group_v3['surged_fe']*100.0/df_analysis_post_group_v3['fe_count']).round(2)
df_analysis_post_group_v3['surge_percentage'] = ((df_analysis_post_group_v3['surge_dynamic_surge']+df_analysis_post_group_v3['surge_dynamic_fare'])*100.0/df_analysis_post_group_v3['surge_rate_card_amount']).round(2)
df_analysis_post_group_v3['ppkm'] = (df_analysis_post_group_v3['final_amount']/df_analysis_post_group_v3['ride_distance']).round(2)
df_analysis_post_group_v3['fe_hex'] = (df_analysis_post_group_v3['fe_count']/df_analysis_post_group_v3['hex_count']).round(2)
df_analysis_post_group_v3['discount_percentage'] = (df_analysis_post_group_v3['discount']*100.0/df_analysis_post_group_v3['sub_total']).round(2)

In [None]:
## Adding pre post to column 

def add_pre_post():
    
    ##vpre
    pre_new_column_names = [col + "_pre" for col in df_analysis_pre_group_v3.columns]
    df_analysis_pre_group_v3.columns = pre_new_column_names
    
    ## post
    post_new_column_names = [col + "_post" for col in df_analysis_post_group_v3.columns]
    df_analysis_post_group_v3.columns = post_new_column_names
    
    
add_pre_post()

In [None]:
view_3  = pd.merge(df_analysis_pre_group_v3,
             df_analysis_post_group_v3,
             how = 'inner',
             left_on = ['affluence_pre', 'group_tc_pre', 'weekday_pre'],
             right_on = ['affluence_post', 'group_tc_post', 'weekday_post']
            )

view_3.rename(columns = {'affluence_pre' : 'affluence', 
                         'group_tc_pre' : 'group_tc', 
                         'weekday_pre' : 'weekday',
                         'hex_count_pre' : 'hex_count'}, inplace = True)

view_3['fe2rr_delta'] = view_3['fe2rr_post'] - view_3['fe2rr_pre']
view_3['g2n_delta'] = view_3['g2n_post'] - view_3['g2n_pre']
view_3['fe2net_delta'] = view_3['fe2net_post'] - view_3['fe2net_pre']
view_3['aor_delta'] = view_3['aor_post'] - view_3['aor_pre']
view_3['surged_fe_delta'] = view_3['surged_fe_percentage_post'] - view_3['surged_fe_percentage_pre']
view_3['ppkm_delta'] = view_3['ppkm_pre'] - view_3['ppkm_post']
view_3['surge_percentage_delta'] = view_3['surge_percentage_post'] - view_3['surge_percentage_pre']
view_3['discount_percentage_delta'] = view_3['discount_percentage_post'] - view_3['discount_percentage_pre']

In [None]:
view_3.head(3)

In [None]:
df_analysis_pre_post_v3_dsm = pd.read_clipboard()
df_analysis_pre_post_v3_dsm.head(3)

In [None]:
df_analysis_pre_post_v3 = view_3.merge(df_analysis_pre_post_v3_dsm,
                                      how = 'inner',
                                      on = ['affluence','group_tc','weekday']
                                     )
df_analysis_pre_post_v3.head(3)

In [None]:
df_analysis_pre_post_v3_final = df_analysis_pre_post_v3[['affluence', 'group_tc', 'weekday', 'hex_count',
                                  'fe2rr_pre', 'fe2rr_post', 'fe2rr_delta',
                                  'g2n_pre', 'g2n_post', 'g2n_delta',
                                  'fe2net_pre', 'fe2net_post', 'fe2net_delta', 'aor_pre', 'aor_post', 'aor_delta',
                                  'demand_per_day_pre', 'demand_per_day_post', 'demand_pd_delta',
                                  'supply_per_day_pre','supply_per_day_post',
                                  'mismatch_qr_per_day_pre', 'mismatch_qr_per_day_post', 'mismatch_qr_pd_delta',
                                  'ppkm_pre', 'ppkm_post', 'ppkm_delta',
                                  'fe_hex_pre', 'fe_hex_post',
                                  'surged_fe_percentage_pre', 'surged_fe_percentage_post', 'surged_fe_delta',
                                  'surge_percentage_pre', 'surge_percentage_post', 'surge_percentage_delta',
                                  'discount_percentage_pre', 'discount_percentage_post', 'discount_percentage_delta',
                                  'fe_count_pre', 'fe_count_post',
                                  'gross_orders_pre', 'gross_orders_post',
                                  'net_orders_pre', 'net_orders_post',
                                  'surged_fe_pre', 'surged_fe_post',
                                  'surged_net_pre', 'surged_net_post',
                                  'discount_pre', 'discount_post',
                                  'sub_total_pre', 'sub_total_post',
                                  'cobra_pre', 'cobra_post',
                                  'ocara_pre', 'ocara_post',
                                  'cobrm_pre', 'cobrm_post',
                                  'expiry_mapped_pre', 'expiry_mapped_post',
                                  'stockout_pre', 'stockout_post',
                                  'accepted_orders_pre', 'accepted_orders_post'
                                 ]]
df_analysis_pre_post_v3_final = df_analysis_pre_post_v3_final.sort_values(['weekday','affluence','group_tc'])

In [None]:
df_analysis_pre_post_v3_final

In [None]:
df_analysis_pre_post_v3_final.to_clipboard(index=False)

## Analysis view 4

In [51]:
## Pre

df_analysis_pre_group_v4 = df_pre_raw \
                            .groupby(['affluence', 'group_tc','pickup_location','pickup_hex_id']) \
                            .agg(
                                hex_count = pd.NamedAgg('pickup_hex_id', 'nunique'),
                                fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'), 
                                gross_orders = pd.NamedAgg('order_id', 'nunique'), 
                                net_orders = pd.NamedAgg('net_orders', 'sum'),
                                surged_fe = pd.NamedAgg('surged_fe', 'nunique'),
                                surged_net = pd.NamedAgg('surged_net', 'nunique'),
                                cobra = pd.NamedAgg('cobra', 'nunique'),
                                ocara = pd.NamedAgg('ocara', 'nunique'),
                                cobrm = pd.NamedAgg('cobrm', 'nunique'),
                                stockout = pd.NamedAgg('stockout', 'nunique'),
                                expiry_mapped = pd.NamedAgg('expiry_mapped', 'nunique'),
                                accepted_orders = pd.NamedAgg('accepted_orders', 'nunique'),
                                discount = pd.NamedAgg('discount', 'sum'),
                                sub_total = pd.NamedAgg('sub_total', 'sum'),
                                final_amount = pd.NamedAgg('final_amount', 'sum'),
                                dynamic_surge = pd.NamedAgg('dynamic_surge', 'sum'),
                                dynamic_fare = pd.NamedAgg('dynamic_fare', 'sum'),
                                rate_card_amount = pd.NamedAgg('rate_card_amount', 'sum'),
                                mean_ride_distance = pd.NamedAgg('ride_distance', 'mean'),
                                ride_distance = pd.NamedAgg('ride_distance', 'sum'),
                                surge_rate_card_amount = pd.NamedAgg('surge_rate_card_amount', 'sum'),
                                surge_dynamic_surge = pd.NamedAgg('surge_dynamic_surge', 'sum'),
                                surge_dynamic_fare = pd.NamedAgg('surge_dynamic_fare', 'sum')
                                ) \
                            .reset_index()

## Adding funnel 
df_analysis_pre_group_v4['fe2rr'] = (df_analysis_pre_group_v4['gross_orders']*100.0/df_analysis_pre_group_v4['fe_count']).round(2)
df_analysis_pre_group_v4['g2n'] = (df_analysis_pre_group_v4['net_orders']*100.0/df_analysis_pre_group_v4['gross_orders']).round(2)
df_analysis_pre_group_v4['fe2net'] = (df_analysis_pre_group_v4['net_orders']*100.0/df_analysis_pre_group_v4['fe_count']).round(2)
df_analysis_pre_group_v4['aor'] = (df_analysis_pre_group_v4['accepted_orders']*100.0/df_analysis_pre_group_v4['gross_orders']).round(2)
df_analysis_pre_group_v4['surged_fe_percentage'] = (df_analysis_pre_group_v4['surged_fe']*100.0/df_analysis_pre_group_v4['fe_count']).round(2)
df_analysis_pre_group_v4['surge_percentage'] = ((df_analysis_pre_group_v4['surge_dynamic_surge']+df_analysis_pre_group_v4['surge_dynamic_fare'])*100.0/df_analysis_pre_group_v4['surge_rate_card_amount']).round(2)
df_analysis_pre_group_v4['ppkm'] = (df_analysis_pre_group_v4['final_amount']/df_analysis_pre_group_v4['ride_distance']).round(2)
df_analysis_pre_group_v4['fe_hex'] = (df_analysis_pre_group_v4['fe_count']/df_analysis_pre_group_v4['hex_count']).round(2)
df_analysis_pre_group_v4['discount_percentage'] = (df_analysis_pre_group_v4['discount']*100.0/df_analysis_pre_group_v4['sub_total']).round(2)

In [52]:
## Post

df_analysis_post_group_v4 = df_post_raw \
                            .groupby(['affluence', 'group_tc','pickup_location', 'pickup_hex_id']) \
                            .agg(
                                hex_count = pd.NamedAgg('pickup_hex_id', 'nunique'),
                                fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'), 
                                gross_orders = pd.NamedAgg('order_id', 'nunique'), 
                                net_orders = pd.NamedAgg('net_orders', 'sum'),
                                surged_fe = pd.NamedAgg('surged_fe', 'nunique'),
                                surged_net = pd.NamedAgg('surged_net', 'nunique'),
                                cobra = pd.NamedAgg('cobra', 'nunique'),
                                ocara = pd.NamedAgg('ocara', 'nunique'),
                                cobrm = pd.NamedAgg('cobrm', 'nunique'),
                                stockout = pd.NamedAgg('stockout', 'nunique'),
                                expiry_mapped = pd.NamedAgg('expiry_mapped', 'nunique'),
                                accepted_orders = pd.NamedAgg('accepted_orders', 'nunique'),
                                discount = pd.NamedAgg('discount', 'sum'),
                                sub_total = pd.NamedAgg('sub_total', 'sum'),
                                final_amount = pd.NamedAgg('final_amount', 'sum'),
                                dynamic_surge = pd.NamedAgg('dynamic_surge', 'sum'),
                                dynamic_fare = pd.NamedAgg('dynamic_fare', 'sum'),
                                rate_card_amount = pd.NamedAgg('rate_card_amount', 'sum'),
                                mean_ride_distance = pd.NamedAgg('ride_distance', 'mean'),
                                ride_distance = pd.NamedAgg('ride_distance', 'sum'),
                                surge_rate_card_amount = pd.NamedAgg('surge_rate_card_amount', 'sum'),
                                surge_dynamic_surge = pd.NamedAgg('surge_dynamic_surge', 'sum'),
                                surge_dynamic_fare = pd.NamedAgg('surge_dynamic_fare', 'sum')
                                ) \
                            .reset_index()

## Adding funnel 
df_analysis_post_group_v4['fe2rr'] = (df_analysis_post_group_v4['gross_orders']*100.0/df_analysis_post_group_v4['fe_count']).round(2)
df_analysis_post_group_v4['g2n'] = (df_analysis_post_group_v4['net_orders']*100.0/df_analysis_post_group_v4['gross_orders']).round(2)
df_analysis_post_group_v4['fe2net'] = (df_analysis_post_group_v4['net_orders']*100.0/df_analysis_post_group_v4['fe_count']).round(2)
df_analysis_post_group_v4['aor'] = (df_analysis_post_group_v4['accepted_orders']*100.0/df_analysis_post_group_v4['gross_orders']).round(2)
df_analysis_post_group_v4['surged_fe_percentage'] = (df_analysis_post_group_v4['surged_fe']*100.0/df_analysis_post_group_v4['fe_count']).round(2)
df_analysis_post_group_v4['surge_percentage'] = ((df_analysis_post_group_v4['surge_dynamic_surge']+df_analysis_post_group_v4['surge_dynamic_fare'])*100.0/df_analysis_post_group_v4['surge_rate_card_amount']).round(2)
df_analysis_post_group_v4['ppkm'] = (df_analysis_post_group_v4['final_amount']/df_analysis_post_group_v4['ride_distance']).round(2)
df_analysis_post_group_v4['fe_hex'] = (df_analysis_post_group_v4['fe_count']/df_analysis_post_group_v4['hex_count']).round(2)
df_analysis_post_group_v4['discount_percentage'] = (df_analysis_post_group_v4['discount']*100.0/df_analysis_post_group_v4['sub_total']).round(2)

In [53]:
## Adding pre post to column 

def add_pre_post():
    
    ##vpre
    pre_new_column_names = [col + "_pre" for col in df_analysis_pre_group_v4.columns]
    df_analysis_pre_group_v4.columns = pre_new_column_names
    
    ## post
    post_new_column_names = [col + "_post" for col in df_analysis_post_group_v4.columns]
    df_analysis_post_group_v4.columns = post_new_column_names
    
    
add_pre_post()

In [54]:
view_4  = pd.merge(df_analysis_pre_group_v4,
             df_analysis_post_group_v4,
             how = 'inner',
             left_on = ['affluence_pre', 'group_tc_pre', 'pickup_location_pre', 'pickup_hex_id_pre'],
             right_on = ['affluence_post', 'group_tc_post', 'pickup_location_post', 'pickup_hex_id_post']
            )

view_4.rename(columns = {'affluence_pre' : 'affluence', 
                         'group_tc_pre' : 'group_tc', 
                         'pickup_location_pre' : 'pickup_location',
                         'pickup_hex_id_pre' : 'pickup_hex_id',
                         'hex_count_pre' : 'hex_count'}, inplace = True)

view_4['fe2rr_delta'] = view_4['fe2rr_post'] - view_4['fe2rr_pre']
view_4['g2n_delta'] = view_4['g2n_post'] - view_4['g2n_pre']
view_4['fe2net_delta'] = view_4['fe2net_post'] - view_4['fe2net_pre']
view_4['aor_delta'] = view_4['aor_post'] - view_4['aor_pre']
view_4['surged_fe_delta'] = view_4['surged_fe_percentage_post'] - view_4['surged_fe_percentage_pre']
view_4['ppkm_delta'] = view_4['ppkm_pre'] - view_4['ppkm_post']
view_4['surge_percentage_delta'] = view_4['surge_percentage_post'] - view_4['surge_percentage_pre']
view_4['surged_fe_percentage_delta'] = view_4['surged_fe_percentage_post'] - view_4['surged_fe_percentage_pre']
view_4['discount_percentage_delta'] = view_4['discount_percentage_post'] - view_4['discount_percentage_pre']

In [55]:
view_4.head(5)

Unnamed: 0,affluence,group_tc,pickup_location,pickup_hex_id,hex_count,fe_count_pre,gross_orders_pre,net_orders_pre,surged_fe_pre,surged_net_pre,cobra_pre,ocara_pre,cobrm_pre,stockout_pre,expiry_mapped_pre,accepted_orders_pre,discount_pre,sub_total_pre,final_amount_pre,dynamic_surge_pre,dynamic_fare_pre,rate_card_amount_pre,mean_ride_distance_pre,ride_distance_pre,surge_rate_card_amount_pre,...,dynamic_fare_post,rate_card_amount_post,mean_ride_distance_post,ride_distance_post,surge_rate_card_amount_post,surge_dynamic_surge_post,surge_dynamic_fare_post,fe2rr_post,g2n_post,fe2net_post,aor_post,surged_fe_percentage_post,surge_percentage_post,ppkm_post,fe_hex_post,discount_percentage_post,fe2rr_delta,g2n_delta,fe2net_delta,aor_delta,surged_fe_delta,ppkm_delta,surge_percentage_delta,surged_fe_percentage_delta,discount_percentage_delta
0,High Affluence,CONTROL,Akshaynagar,88618926adfffff,1,13990,2360,948.0,356,12,747,404,1,3,252,1389,17499.0,253997.0,1588049.0,19305.24,0.0,39800.57,8.997581,126469.995364,39800.57,...,0.0,112694.76,8.781928,99964.691416,112694.76,54485.09,0.0,17.13,48.12,8.24,67.66,8.83,48.35,13.1,11320.0,4.07,0.26,7.95,1.46,8.8,6.29,-0.54,-0.15,6.29,-2.82
1,High Affluence,CONTROL,Banashankri North,8860145a61fffff,1,16203,2868,1410.0,1198,57,755,555,9,7,123,2020,37634.0,250107.0,1282964.0,18797.24,0.0,89256.84,5.905646,95925.41388,89256.84,...,0.0,120031.68,5.900307,98729.84362,120031.68,28476.63,0.0,17.16,46.33,7.95,66.48,10.5,23.72,13.47,16689.0,8.04,-0.54,-2.83,-0.75,-3.95,3.11,-0.1,2.66,3.11,-7.01
2,High Affluence,CONTROL,Chanasandra,88618921c7fffff,1,14365,2859,1625.0,612,36,567,530,11,4,112,2170,40966.0,276433.0,1585374.0,15542.96,0.0,63462.37,8.385995,120749.936076,63462.37,...,0.0,66273.73,8.83313,108797.656423,66273.73,17414.21,0.0,19.51,58.79,11.47,81.08,5.42,26.28,13.24,12302.0,8.09,-0.39,1.95,0.16,5.18,1.16,-0.11,1.79,1.16,-6.73
3,High Affluence,CONTROL,Electronic City,8861892639fffff,1,55967,7858,4468.0,12337,610,1561,1700,3,2,106,6218,112651.0,842124.0,6171311.0,360152.95,0.0,1003929.82,8.063542,451590.630292,1003929.82,...,0.0,766300.02,8.046372,374880.488841,766300.02,271450.54,0.0,14.46,62.43,9.03,85.87,20.77,35.42,13.7,46567.0,7.55,0.42,5.57,1.05,6.74,-1.27,-0.03,-0.45,-1.27,-5.83
4,High Affluence,CONTROL,Horamavu,8861892c1dfffff,1,14199,2444,1342.0,3839,251,535,467,6,0,90,1825,37926.0,254285.0,1443652.0,139788.84,0.0,353032.35,7.219595,102677.074845,353032.35,...,0.0,284041.34,6.939077,81374.552544,284041.34,97096.6,0.0,17.18,54.55,9.37,75.56,27.34,34.18,14.07,11719.0,8.66,-0.03,-0.36,-0.08,0.89,0.3,-0.01,-5.42,0.3,-6.25


In [56]:
df_analysis_pre_post_v4_dsm = pd.read_clipboard()
df_analysis_pre_post_v4_dsm.head(3)

Unnamed: 0,affluence,group_tc,cluster,hex_id,demand_per_day_pre,demand_per_day_post,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,demand_pd_delta,mismatch_qr_pd_delta
0,High Affluence,CONTROL,Akshaynagar,88618926adfffff,96.57,97.89,69.9,67.56,24.19,26.89,1.32,2.7
1,High Affluence,CONTROL,Banashankri North,8860145a61fffff,125.81,146.22,44.29,24.28,46.81,54.67,20.41,7.86
2,High Affluence,CONTROL,Chanasandra,88618921c7fffff,125.76,124.89,113.19,121.22,27.24,29.5,-0.87,2.26


In [57]:
df_analysis_pre_post_v4 = view_4.merge(df_analysis_pre_post_v4_dsm,
                                      how = 'inner',
                                      left_on = ['affluence','group_tc','pickup_location','pickup_hex_id'],
                                      right_on = ['affluence','group_tc','cluster','hex_id']
                                     )
df_analysis_pre_post_v4.head(3)

Unnamed: 0,affluence,group_tc,pickup_location,pickup_hex_id,hex_count,fe_count_pre,gross_orders_pre,net_orders_pre,surged_fe_pre,surged_net_pre,cobra_pre,ocara_pre,cobrm_pre,stockout_pre,expiry_mapped_pre,accepted_orders_pre,discount_pre,sub_total_pre,final_amount_pre,dynamic_surge_pre,dynamic_fare_pre,rate_card_amount_pre,mean_ride_distance_pre,ride_distance_pre,surge_rate_card_amount_pre,...,aor_post,surged_fe_percentage_post,surge_percentage_post,ppkm_post,fe_hex_post,discount_percentage_post,fe2rr_delta,g2n_delta,fe2net_delta,aor_delta,surged_fe_delta,ppkm_delta,surge_percentage_delta,surged_fe_percentage_delta,discount_percentage_delta,cluster,hex_id,demand_per_day_pre,demand_per_day_post,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,demand_pd_delta,mismatch_qr_pd_delta
0,High Affluence,CONTROL,Akshaynagar,88618926adfffff,1,13990,2360,948.0,356,12,747,404,1,3,252,1389,17499.0,253997.0,1588049.0,19305.24,0.0,39800.57,8.997581,126469.995364,39800.57,...,67.66,8.83,48.35,13.1,11320.0,4.07,0.26,7.95,1.46,8.8,6.29,-0.54,-0.15,6.29,-2.82,Akshaynagar,88618926adfffff,96.57,97.89,69.9,67.56,24.19,26.89,1.32,2.7
1,High Affluence,CONTROL,Banashankri North,8860145a61fffff,1,16203,2868,1410.0,1198,57,755,555,9,7,123,2020,37634.0,250107.0,1282964.0,18797.24,0.0,89256.84,5.905646,95925.41388,89256.84,...,66.48,10.5,23.72,13.47,16689.0,8.04,-0.54,-2.83,-0.75,-3.95,3.11,-0.1,2.66,3.11,-7.01,Banashankri North,8860145a61fffff,125.81,146.22,44.29,24.28,46.81,54.67,20.41,7.86
2,High Affluence,CONTROL,Chanasandra,88618921c7fffff,1,14365,2859,1625.0,612,36,567,530,11,4,112,2170,40966.0,276433.0,1585374.0,15542.96,0.0,63462.37,8.385995,120749.936076,63462.37,...,81.08,5.42,26.28,13.24,12302.0,8.09,-0.39,1.95,0.16,5.18,1.16,-0.11,1.79,1.16,-6.73,Chanasandra,88618921c7fffff,125.76,124.89,113.19,121.22,27.24,29.5,-0.87,2.26


In [58]:
df_analysis_pre_post_v4_final = df_analysis_pre_post_v4[['affluence', 'group_tc', 'pickup_location', 'pickup_hex_id',
                                                        'fe2rr_pre', 'fe2rr_post', 'fe2rr_delta',
                                  'g2n_pre', 'g2n_post', 'g2n_delta',
                                  'fe2net_pre', 'fe2net_post', 'fe2net_delta', 'aor_pre', 'aor_post', 'aor_delta',
                                  'demand_per_day_pre', 'demand_per_day_post', 'demand_pd_delta',
                                  'supply_per_day_pre','supply_per_day_post',
                                  'mismatch_qr_per_day_pre', 'mismatch_qr_per_day_post', 'mismatch_qr_pd_delta',
                                  'ppkm_pre', 'ppkm_post', 'ppkm_delta',
                                  'fe_hex_pre', 'fe_hex_post',
                                  'surged_fe_percentage_pre', 'surged_fe_percentage_post', 'surged_fe_delta',
                                  'surge_percentage_pre', 'surge_percentage_post', 'surge_percentage_delta',
                                  'discount_percentage_pre', 'discount_percentage_post', 'discount_percentage_delta',
                                  'fe_count_pre', 'fe_count_post',
                                  'gross_orders_pre', 'gross_orders_post',
                                  'net_orders_pre', 'net_orders_post',
                                  'surged_fe_pre', 'surged_fe_post',
                                  'surged_net_pre', 'surged_net_post',
                                  'discount_pre', 'discount_post',
                                  'sub_total_pre', 'sub_total_post',
                                  'cobra_pre', 'cobra_post',
                                  'ocara_pre', 'ocara_post',
                                  'cobrm_pre', 'cobrm_post',
                                  'expiry_mapped_pre', 'expiry_mapped_post',
                                  'stockout_pre', 'stockout_post',
                                  'accepted_orders_pre', 'accepted_orders_post'
                                 ]]
df_analysis_pre_post_v4_final = df_analysis_pre_post_v4_final.sort_values(['affluence','group_tc', 'pickup_location', 'pickup_hex_id'])

In [59]:
df_analysis_pre_post_v4_final

Unnamed: 0,affluence,group_tc,pickup_location,pickup_hex_id,fe2rr_pre,fe2rr_post,fe2rr_delta,g2n_pre,g2n_post,g2n_delta,fe2net_pre,fe2net_post,fe2net_delta,aor_pre,aor_post,aor_delta,demand_per_day_pre,demand_per_day_post,demand_pd_delta,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,mismatch_qr_pd_delta,ppkm_pre,...,fe_count_post,gross_orders_pre,gross_orders_post,net_orders_pre,net_orders_post,surged_fe_pre,surged_fe_post,surged_net_pre,surged_net_post,discount_pre,discount_post,sub_total_pre,sub_total_post,cobra_pre,cobra_post,ocara_pre,ocara_post,cobrm_pre,cobrm_post,expiry_mapped_pre,expiry_mapped_post,stockout_pre,stockout_post,accepted_orders_pre,accepted_orders_post
0,High Affluence,CONTROL,Akshaynagar,88618926adfffff,16.87,17.13,0.26,40.17,48.12,7.95,6.78,8.24,1.46,58.86,67.66,8.8,96.57,97.89,1.32,69.9,67.56,24.19,26.89,2.7,12.56,...,11320,2360,1939,948.0,933.0,356,1000,12,35,17499.0,8320.0,253997.0,204219.0,747,508,404,323,1,0,252,165,3,3,1389,1312
1,High Affluence,CONTROL,Banashankri North,8860145a61fffff,17.7,17.16,-0.54,49.16,46.33,-2.83,8.7,7.95,-0.75,70.43,66.48,-3.95,125.81,146.22,20.41,44.29,24.28,46.81,54.67,7.86,13.37,...,16689,2868,2864,1410.0,1327.0,1198,1753,57,63,37634.0,18955.0,250107.0,235778.0,755,808,555,518,9,11,123,189,7,2,2020,1904
2,High Affluence,CONTROL,Chanasandra,88618921c7fffff,19.9,19.51,-0.39,56.84,58.79,1.95,11.31,11.47,0.16,75.9,81.08,5.18,125.76,124.89,-0.87,113.19,121.22,27.24,29.5,2.26,13.13,...,12302,2859,2400,1625.0,1411.0,612,667,36,28,40966.0,18311.0,276433.0,226343.0,567,395,530,516,11,19,112,48,4,3,2170,1946
3,High Affluence,CONTROL,Electronic City,8861892639fffff,14.04,14.46,0.42,56.86,62.43,5.57,7.98,9.03,1.05,79.13,85.87,6.74,345.33,356.83,11.5,289.33,293.83,32.24,38.5,6.26,13.67,...,46567,7858,6732,4468.0,4203.0,12337,9670,610,482,112651.0,50460.0,842124.0,667911.0,1561,985,1700,1458,3,2,106,66,2,0,6218,5781
4,High Affluence,CONTROL,Horamavu,8861892c1dfffff,17.21,17.18,-0.03,54.91,54.55,-0.36,9.45,9.37,-0.08,74.67,75.56,0.89,107.05,101.67,-5.38,114.81,89.39,28.62,30.28,1.66,14.06,...,11719,2444,2013,1342.0,1098.0,3839,3204,251,250,37926.0,15917.0,254285.0,183729.0,535,435,467,399,6,9,90,67,0,1,1825,1521
5,High Affluence,CONTROL,Kammanahalli HRBR Layout,8861892ea5fffff,16.64,17.04,0.4,54.98,60.24,5.26,9.15,10.26,1.11,75.48,81.59,6.11,346.14,356.17,10.03,264.14,203.44,47.14,55.72,8.58,13.59,...,40139,7727,6838,4248.0,4119.0,7291,5385,338,288,108766.0,49074.0,756204.0,624812.0,1735,1203,1511,1356,27,17,186,116,2,1,5832,5579
6,High Affluence,CONTROL,Mysore rd,8860145a33fffff,19.03,19.81,0.78,57.19,54.2,-2.99,10.88,10.74,-0.14,80.2,77.02,-3.18,82.43,99.11,16.68,82.14,66.39,20.19,31.33,11.14,12.79,...,9555,1864,1893,1066.0,1026.0,288,342,12,16,26979.0,13929.0,178382.0,181901.0,311,373,421,404,19,19,40,57,2,3,1495,1458
7,High Affluence,CONTROL,Ramamurthy Nagar,8861892e37fffff,18.25,17.34,-0.91,49.3,53.96,4.66,9.0,9.36,0.36,66.79,72.42,5.63,188.38,181.72,-6.66,83.24,61.56,44.9,50.0,5.1,13.25,...,20809,4529,3608,2233.0,1947.0,2936,2488,168,138,65002.0,23136.0,438355.0,322698.0,1305,881,743,625,2,3,232,141,0,0,3025,2613
8,High Affluence,CONTROL,Thanisandra,8861892cbdfffff,18.13,18.63,0.5,49.61,50.56,0.95,8.99,9.42,0.43,76.2,74.25,-1.95,144.62,157.17,12.55,136.19,112.56,32.76,38.22,5.46,13.08,...,15879,3193,2959,1584.0,1496.0,2403,1827,123,95,46852.0,22633.0,340847.0,284650.0,709,691,769,637,8,7,107,117,2,2,2433,2197
9,High Affluence,CONTROL,Venkatapura,88618925c9fffff,16.62,16.69,0.07,46.02,45.43,-0.59,7.65,7.58,-0.07,69.81,70.87,1.06,992.19,1047.5,55.31,425.71,390.22,69.33,74.89,5.56,13.78,...,122358,22704,20422,10449.0,9277.0,19482,20180,937,802,198404.0,103943.0,2003447.0,1708543.0,6138,5526,5255,4944,2,9,782,613,0,1,15849,14473


In [60]:
df_analysis_pre_post_v4_final.to_clipboard(index=False)

In [None]:
#df_analysis_pre_post_v4_final.to_csv('/Users/rapido/local-datasets/affluence/final/kepler_hex_view_all_exp_data.csv', index=False)

## Analysis view 5

In [71]:
## Pre

df_analysis_pre_group_v5 = df_pre_raw \
                            .groupby(['affluence', 'group_tc','pickup_location','pickup_hex_id','time_period']) \
                            .agg(
                                hex_count = pd.NamedAgg('pickup_hex_id', 'nunique'),
                                fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'), 
                                gross_orders = pd.NamedAgg('order_id', 'nunique'), 
                                net_orders = pd.NamedAgg('net_orders', 'sum'),
                                surged_fe = pd.NamedAgg('surged_fe', 'nunique'),
                                surged_net = pd.NamedAgg('surged_net', 'nunique'),
                                cobra = pd.NamedAgg('cobra', 'nunique'),
                                ocara = pd.NamedAgg('ocara', 'nunique'),
                                cobrm = pd.NamedAgg('cobrm', 'nunique'),
                                stockout = pd.NamedAgg('stockout', 'nunique'),
                                expiry_mapped = pd.NamedAgg('expiry_mapped', 'nunique'),
                                accepted_orders = pd.NamedAgg('accepted_orders', 'nunique'),
                                discount = pd.NamedAgg('discount', 'sum'),
                                sub_total = pd.NamedAgg('sub_total', 'sum'),
                                final_amount = pd.NamedAgg('final_amount', 'sum'),
                                dynamic_surge = pd.NamedAgg('dynamic_surge', 'sum'),
                                dynamic_fare = pd.NamedAgg('dynamic_fare', 'sum'),
                                rate_card_amount = pd.NamedAgg('rate_card_amount', 'sum'),
                                mean_ride_distance = pd.NamedAgg('ride_distance', 'mean'),
                                ride_distance = pd.NamedAgg('ride_distance', 'sum'),
                                surge_rate_card_amount = pd.NamedAgg('surge_rate_card_amount', 'sum'),
                                surge_dynamic_surge = pd.NamedAgg('surge_dynamic_surge', 'sum'),
                                surge_dynamic_fare = pd.NamedAgg('surge_dynamic_fare', 'sum')
                                ) \
                            .reset_index()

## Adding funnel 
df_analysis_pre_group_v5['fe2rr'] = (df_analysis_pre_group_v5['gross_orders']*100.0/df_analysis_pre_group_v5['fe_count']).round(2)
df_analysis_pre_group_v5['g2n'] = (df_analysis_pre_group_v5['net_orders']*100.0/df_analysis_pre_group_v5['gross_orders']).round(2)
df_analysis_pre_group_v5['fe2net'] = (df_analysis_pre_group_v5['net_orders']*100.0/df_analysis_pre_group_v5['fe_count']).round(2)
df_analysis_pre_group_v5['aor'] = (df_analysis_pre_group_v5['accepted_orders']*100.0/df_analysis_pre_group_v5['gross_orders']).round(2)
df_analysis_pre_group_v5['surged_fe_percentage'] = (df_analysis_pre_group_v5['surged_fe']*100.0/df_analysis_pre_group_v5['fe_count']).round(2)
df_analysis_pre_group_v5['surge_percentage'] = ((df_analysis_pre_group_v5['surge_dynamic_surge']+df_analysis_pre_group_v5['surge_dynamic_fare'])*100.0/df_analysis_pre_group_v5['surge_rate_card_amount']).round(2)
df_analysis_pre_group_v5['ppkm'] = (df_analysis_pre_group_v5['final_amount']/df_analysis_pre_group_v5['ride_distance']).round(2)
df_analysis_pre_group_v5['fe_hex'] = (df_analysis_pre_group_v5['fe_count']/df_analysis_pre_group_v5['hex_count']).round(2)
df_analysis_pre_group_v5['discount_percentage'] = (df_analysis_pre_group_v5['discount']*100.0/df_analysis_pre_group_v5['sub_total']).round(2)

In [72]:
## Post

df_analysis_post_group_v5 = df_post_raw \
                            .groupby(['affluence', 'group_tc','pickup_location', 'pickup_hex_id', 'time_period']) \
                            .agg(
                                hex_count = pd.NamedAgg('pickup_hex_id', 'nunique'),
                                fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'), 
                                gross_orders = pd.NamedAgg('order_id', 'nunique'), 
                                net_orders = pd.NamedAgg('net_orders', 'sum'),
                                surged_fe = pd.NamedAgg('surged_fe', 'nunique'),
                                surged_net = pd.NamedAgg('surged_net', 'nunique'),
                                cobra = pd.NamedAgg('cobra', 'nunique'),
                                ocara = pd.NamedAgg('ocara', 'nunique'),
                                cobrm = pd.NamedAgg('cobrm', 'nunique'),
                                stockout = pd.NamedAgg('stockout', 'nunique'),
                                expiry_mapped = pd.NamedAgg('expiry_mapped', 'nunique'),
                                accepted_orders = pd.NamedAgg('accepted_orders', 'nunique'),
                                discount = pd.NamedAgg('discount', 'sum'),
                                sub_total = pd.NamedAgg('sub_total', 'sum'),
                                final_amount = pd.NamedAgg('final_amount', 'sum'),
                                dynamic_surge = pd.NamedAgg('dynamic_surge', 'sum'),
                                dynamic_fare = pd.NamedAgg('dynamic_fare', 'sum'),
                                rate_card_amount = pd.NamedAgg('rate_card_amount', 'sum'),
                                mean_ride_distance = pd.NamedAgg('ride_distance', 'mean'),
                                ride_distance = pd.NamedAgg('ride_distance', 'sum'),
                                surge_rate_card_amount = pd.NamedAgg('surge_rate_card_amount', 'sum'),
                                surge_dynamic_surge = pd.NamedAgg('surge_dynamic_surge', 'sum'),
                                surge_dynamic_fare = pd.NamedAgg('surge_dynamic_fare', 'sum')
                                ) \
                            .reset_index()

## Adding funnel 
df_analysis_post_group_v5['fe2rr'] = (df_analysis_post_group_v5['gross_orders']*100.0/df_analysis_post_group_v5['fe_count']).round(2)
df_analysis_post_group_v5['g2n'] = (df_analysis_post_group_v5['net_orders']*100.0/df_analysis_post_group_v5['gross_orders']).round(2)
df_analysis_post_group_v5['fe2net'] = (df_analysis_post_group_v5['net_orders']*100.0/df_analysis_post_group_v5['fe_count']).round(2)
df_analysis_post_group_v5['aor'] = (df_analysis_post_group_v5['accepted_orders']*100.0/df_analysis_post_group_v5['gross_orders']).round(2)
df_analysis_post_group_v5['surged_fe_percentage'] = (df_analysis_post_group_v5['surged_fe']*100.0/df_analysis_post_group_v5['fe_count']).round(2)
df_analysis_post_group_v5['surge_percentage'] = ((df_analysis_post_group_v5['surge_dynamic_surge']+df_analysis_post_group_v5['surge_dynamic_fare'])*100.0/df_analysis_post_group_v5['surge_rate_card_amount']).round(2)
df_analysis_post_group_v5['ppkm'] = (df_analysis_post_group_v5['final_amount']/df_analysis_post_group_v5['ride_distance']).round(2)
df_analysis_post_group_v5['fe_hex'] = (df_analysis_post_group_v5['fe_count']/df_analysis_post_group_v5['hex_count']).round(2)
df_analysis_post_group_v5['discount_percentage'] = (df_analysis_post_group_v5['discount']*100.0/df_analysis_post_group_v5['sub_total']).round(2)

In [73]:
## Adding pre post to column 

def add_pre_post():
    
    ##vpre
    pre_new_column_names = [col + "_pre" for col in df_analysis_pre_group_v5.columns]
    df_analysis_pre_group_v5.columns = pre_new_column_names
    
    ## post
    post_new_column_names = [col + "_post" for col in df_analysis_post_group_v5.columns]
    df_analysis_post_group_v5.columns = post_new_column_names
    
    
add_pre_post()

In [74]:
view_5  = pd.merge(df_analysis_pre_group_v5,
             df_analysis_post_group_v5,
             how = 'inner',
             left_on = ['affluence_pre', 'group_tc_pre', 'pickup_location_pre', 'pickup_hex_id_pre', 'time_period_pre'],
             right_on = ['affluence_post', 'group_tc_post', 'pickup_location_post', 'pickup_hex_id_post', 'time_period_post']
            )

view_5.rename(columns = {'affluence_pre' : 'affluence', 
                         'group_tc_pre' : 'group_tc', 
                         'pickup_location_pre' : 'pickup_location',
                         'pickup_hex_id_pre' : 'pickup_hex_id',
                         'time_period_pre' : 'time_period',
                         'hex_count_pre' : 'hex_count'}, inplace = True)

view_5['fe2rr_delta'] = view_5['fe2rr_post'] - view_5['fe2rr_pre']
view_5['g2n_delta'] = view_5['g2n_post'] - view_5['g2n_pre']
view_5['fe2net_delta'] = view_5['fe2net_post'] - view_5['fe2net_pre']
view_5['aor_delta'] = view_5['aor_post'] - view_5['aor_pre']
view_5['surged_fe_delta'] = view_5['surged_fe_percentage_post'] - view_5['surged_fe_percentage_pre']
view_5['ppkm_delta'] = view_5['ppkm_pre'] - view_5['ppkm_post']
view_5['surge_percentage_delta'] = view_5['surge_percentage_post'] - view_5['surge_percentage_pre']
view_5['surged_fe_percentage_delta'] = view_5['surged_fe_percentage_post'] - view_5['surged_fe_percentage_pre']
view_5['discount_percentage_delta'] = view_5['discount_percentage_post'] - view_5['discount_percentage_pre']

In [75]:
view_5.head(3)

Unnamed: 0,affluence,group_tc,pickup_location,pickup_hex_id,time_period,hex_count,fe_count_pre,gross_orders_pre,net_orders_pre,surged_fe_pre,surged_net_pre,cobra_pre,ocara_pre,cobrm_pre,stockout_pre,expiry_mapped_pre,accepted_orders_pre,discount_pre,sub_total_pre,final_amount_pre,dynamic_surge_pre,dynamic_fare_pre,rate_card_amount_pre,mean_ride_distance_pre,ride_distance_pre,...,dynamic_fare_post,rate_card_amount_post,mean_ride_distance_post,ride_distance_post,surge_rate_card_amount_post,surge_dynamic_surge_post,surge_dynamic_fare_post,fe2rr_post,g2n_post,fe2net_post,aor_post,surged_fe_percentage_post,surge_percentage_post,ppkm_post,fe_hex_post,discount_percentage_post,fe2rr_delta,g2n_delta,fe2net_delta,aor_delta,surged_fe_delta,ppkm_delta,surge_percentage_delta,surged_fe_percentage_delta,discount_percentage_delta
0,High Affluence,CONTROL,Akshaynagar,88618926adfffff,1.Morning Peak,1,6057,1068,296.0,337,10,447,130,1,0,193,432,1559.0,115638.0,709852.0,18955.89,0.0,38113.21,9.301302,56830.953254,...,0.0,111012.52,9.327199,43707.2522,111012.52,54148.62,0.0,16.62,35.93,5.97,52.79,20.94,48.78,13.5,4638.0,1.33,-1.01,8.21,1.08,12.34,15.38,-1.01,-0.96,15.38,-0.02
1,High Affluence,CONTROL,Akshaynagar,88618926adfffff,2.Afternoon,1,3394,450,254.0,10,1,88,93,0,0,15,360,7402.0,48334.0,366277.0,188.76,0.0,943.81,8.788995,29838.638882,...,0.0,0.0,8.165223,19784.336079,0.0,0.0,0.0,16.87,60.05,10.13,79.66,0.0,,12.43,2419.0,7.64,3.61,3.61,2.65,-0.34,-0.29,-0.15,,-0.29,-7.67
2,High Affluence,CONTROL,Akshaynagar,88618926adfffff,3.Evening Peak,1,2807,484,275.0,8,0,112,89,0,0,6,371,8103.0,53708.0,300326.0,140.7,0.0,703.55,8.563327,24037.25991,...,0.0,1682.24,8.280135,23142.978138,1682.24,336.47,0.0,17.59,59.67,10.49,81.47,1.04,20.0,12.54,2792.0,7.64,0.35,2.85,0.69,4.82,0.75,-0.05,0.0,0.75,-7.45


In [76]:
df_analysis_pre_post_v5_dsm = pd.read_clipboard()
df_analysis_pre_post_v5_dsm.head(3)

Unnamed: 0,affluence,group_tc,cluster,hex_id,time_period,demand_per_day_pre,demand_per_day_post,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,demand_pd_delta,mismatch_qr_pd_delta
0,High Affluence,CONTROL,Akshaynagar,88618926adfffff,1.Morning Peak,41.9,36.78,6.62,3.89,11.38,11.83,-5.12,0.45
1,High Affluence,CONTROL,Akshaynagar,88618926adfffff,2.Afternoon,21.0,20.67,19.48,17.11,6.24,6.61,-0.33,0.37
2,High Affluence,CONTROL,Akshaynagar,88618926adfffff,3.Evening Peak,21.57,27.83,28.14,30.0,3.86,5.33,6.26,1.47


In [78]:
df_analysis_pre_post_v5 = view_5.merge(df_analysis_pre_post_v5_dsm,
                                      how = 'inner',
                                      left_on = ['affluence','group_tc','pickup_location','pickup_hex_id', 'time_period'],
                                      right_on = ['affluence','group_tc','cluster','hex_id', 'time_period']
                                     )
df_analysis_pre_post_v5.head(3)

Unnamed: 0,affluence,group_tc,pickup_location,pickup_hex_id,time_period,hex_count,fe_count_pre,gross_orders_pre,net_orders_pre,surged_fe_pre,surged_net_pre,cobra_pre,ocara_pre,cobrm_pre,stockout_pre,expiry_mapped_pre,accepted_orders_pre,discount_pre,sub_total_pre,final_amount_pre,dynamic_surge_pre,dynamic_fare_pre,rate_card_amount_pre,mean_ride_distance_pre,ride_distance_pre,...,aor_post,surged_fe_percentage_post,surge_percentage_post,ppkm_post,fe_hex_post,discount_percentage_post,fe2rr_delta,g2n_delta,fe2net_delta,aor_delta,surged_fe_delta,ppkm_delta,surge_percentage_delta,surged_fe_percentage_delta,discount_percentage_delta,cluster,hex_id,demand_per_day_pre,demand_per_day_post,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,demand_pd_delta,mismatch_qr_pd_delta
0,High Affluence,CONTROL,Akshaynagar,88618926adfffff,1.Morning Peak,1,6057,1068,296.0,337,10,447,130,1,0,193,432,1559.0,115638.0,709852.0,18955.89,0.0,38113.21,9.301302,56830.953254,...,52.79,20.94,48.78,13.5,4638.0,1.33,-1.01,8.21,1.08,12.34,15.38,-1.01,-0.96,15.38,-0.02,Akshaynagar,88618926adfffff,41.9,36.78,6.62,3.89,11.38,11.83,-5.12,0.45
1,High Affluence,CONTROL,Akshaynagar,88618926adfffff,2.Afternoon,1,3394,450,254.0,10,1,88,93,0,0,15,360,7402.0,48334.0,366277.0,188.76,0.0,943.81,8.788995,29838.638882,...,79.66,0.0,,12.43,2419.0,7.64,3.61,3.61,2.65,-0.34,-0.29,-0.15,,-0.29,-7.67,Akshaynagar,88618926adfffff,21.0,20.67,19.48,17.11,6.24,6.61,-0.33,0.37
2,High Affluence,CONTROL,Akshaynagar,88618926adfffff,3.Evening Peak,1,2807,484,275.0,8,0,112,89,0,0,6,371,8103.0,53708.0,300326.0,140.7,0.0,703.55,8.563327,24037.25991,...,81.47,1.04,20.0,12.54,2792.0,7.64,0.35,2.85,0.69,4.82,0.75,-0.05,0.0,0.75,-7.45,Akshaynagar,88618926adfffff,21.57,27.83,28.14,30.0,3.86,5.33,6.26,1.47


In [79]:
df_analysis_pre_post_v5_final = df_analysis_pre_post_v5[['affluence', 'group_tc', 'pickup_location', 'pickup_hex_id',
                                                         'time_period',  'fe2rr_pre', 'fe2rr_post', 'fe2rr_delta',
                                  'g2n_pre', 'g2n_post', 'g2n_delta',
                                  'fe2net_pre', 'fe2net_post', 'fe2net_delta', 'aor_pre', 'aor_post', 'aor_delta',
                                  'demand_per_day_pre', 'demand_per_day_post', 'demand_pd_delta',
                                  'supply_per_day_pre','supply_per_day_post',
                                  'mismatch_qr_per_day_pre', 'mismatch_qr_per_day_post', 'mismatch_qr_pd_delta',
                                  'ppkm_pre', 'ppkm_post', 'ppkm_delta',
                                  'fe_hex_pre', 'fe_hex_post',
                                  'surged_fe_percentage_pre', 'surged_fe_percentage_post', 'surged_fe_delta',
                                  'surge_percentage_pre', 'surge_percentage_post', 'surge_percentage_delta',
                                  'discount_percentage_pre', 'discount_percentage_post', 'discount_percentage_delta',
                                  'fe_count_pre', 'fe_count_post',
                                  'gross_orders_pre', 'gross_orders_post',
                                  'net_orders_pre', 'net_orders_post',
                                  'surged_fe_pre', 'surged_fe_post',
                                  'surged_net_pre', 'surged_net_post',
                                  'discount_pre', 'discount_post',
                                  'sub_total_pre', 'sub_total_post',
                                  'cobra_pre', 'cobra_post',
                                  'ocara_pre', 'ocara_post',
                                  'cobrm_pre', 'cobrm_post',
                                  'expiry_mapped_pre', 'expiry_mapped_post',
                                  'stockout_pre', 'stockout_post',
                                  'accepted_orders_pre', 'accepted_orders_post'
                                 ]]
df_analysis_pre_post_v5_final = df_analysis_pre_post_v5_final.sort_values(['affluence','group_tc', 'pickup_location', 'pickup_hex_id', 'time_period'])

In [80]:
df_analysis_pre_post_v5_final.to_clipboard(index=False)