In [1]:
import os
import h3 as h3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy import stats
from pyhive import presto
from keplergl import KeplerGl
from datetime import datetime, timedelta

import warnings
warnings.filterwarnings('ignore')

In [2]:
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 300)

In [3]:
## Connection
connection = presto.connect(
        host='presto-gateway.serving.data.production.internal',
        port=80,
        protocol='http',
        catalog='hive',
        username='manoj.ravirajan@rapido.bike'
)

## Parameter 

In [4]:
city = 'Bangalore'
service = 'Link'

In [5]:
## datasets.service_mapping

service_mapping = f"""
        SELECT 
            city_display_name AS city,
            service_level AS service_name,
            service_detail_id,
            city_id,
            service_id
        FROM 
            datasets.service_mapping
        WHERE 
            city_display_name = '{city}'
            AND service_level = '{service}'
"""

df_service_mapping = pd.read_sql(service_mapping, connection)
df_service_mapping.head()

Unnamed: 0,city,service_name,service_detail_id,city_id,service_id
0,Bangalore,Link,57370b61a6855d70057417d1,572ca7ff116b5db3057bd814,572e29b0116b5db3057bd821


In [6]:
service_detail_id = df_service_mapping.service_detail_id.loc[0]
service_detail_id

'57370b61a6855d70057417d1'

## Dataset

In [7]:
pre_start_date = '20230724'
pre_end_date = '20230813'
post_start_date = '20230824'
post_end_date = '20230910'

In [8]:
## Experimental Hex's

experimental_hex = pd.read_csv('/Users/rapido/local-datasets/affluence/experiment/exp_affluence_bangalore_link_circuit_break_hex_8_list_v1.csv')
experimental_hex_list = experimental_hex[['pickup_cluster', 'pickup_hex_8','income_signal', 'affluence_tag','group_tc']]

In [9]:
experimental_hex_list.groupby(['affluence_tag', 'group_tc']).pickup_hex_8.count()

affluence_tag   group_tc
High Affluence  control     12
                test        13
Low Affluence   control     22
                test        23
Name: pickup_hex_8, dtype: int64

In [10]:
exp_hex_list = experimental_hex_list['pickup_hex_8'].values.tolist()
len(exp_hex_list)

70

AND fe_ench.pickup_location_hex_8 IN ('88618921d3fffff','88618921c7fffff','8861892665fffff','8860145a33fffff','8861892c97fffff','886189219bfffff','88618920b3fffff','8861892e37fffff','8861892ed9fffff','8861892c1dfffff','8861892c11fffff','8861892ea5fffff','8861892c55fffff','8861892639fffff','886189246dfffff','8861892cbdfffff','8861892ee5fffff','88618925c9fffff','88618925bbfffff','8860145a61fffff','8861892431fffff','88618926adfffff','8860145b43fffff','8861892ed3fffff','88618925a5fffff','886014594bfffff','8860145863fffff','8861892ca3fffff','8860145829fffff','8860145a05fffff','8860169669fffff','8860145943fffff','8861892dd5fffff','886014584dfffff','886189244dfffff','88618925a9fffff','8860145913fffff','8861892c0bfffff','8861892cc1fffff','8860145ad1fffff','88618925e3fffff','886016966bfffff','88618920e9fffff','8861892645fffff','8861892521fffff','8861892ccbfffff','8861892e63fffff','8861892f13fffff','8860145ad9fffff','88618921e3fffff','8860145b51fffff','8861892e2bfffff','8861892c8bfffff','8860145b67fffff','8861892e2dfffff','8861892db5fffff','8860145a21fffff','8861892db7fffff','8860145a29fffff','8861892c95fffff','8861892eb1fffff','8861892eb7fffff','8861892eb5fffff','8860145b53fffff','8861892e93fffff','8860145b55fffff','886014595dfffff','8861892e85fffff','8860145b59fffff','8860169663fffff')


In [11]:
## pricing.fare_estimates_enriched

raw_dataset = f"""

        WITH city_cluster_hex AS (

                SELECT
                    cch.hex_id AS hex_id,
                    cch.cluster AS cluster
                FROM
                    datasets.city_cluster_hex cch
                WHERE
                    cch.resolution = 8
                    AND cch.cluster != ''
            ),

            fare_estimates AS (

                SELECT
                    fe_ench.yyyymmdd AS yyyymmdd,
                    CAST(DAY_OF_WEEK(DATE_PARSE(fe_ench.yyyymmdd, '%Y%m%d')) AS VARCHAR ) || '. ' || DATE_FORMAT(DATE_PARSE(fe_ench.yyyymmdd, '%Y%m%d'), '%W') weekday,
                    -- DATE_FORMAT(DATE_PARSE(fe_ench.yyyymmdd, '%Y%m%d'), '%W') weekday,
                    fe_ench.quarter_hour AS quarter_hour,
                    CASE 
                    WHEN CAST(SUBSTR(fe_ench.quarter_hour, 1,2) AS INT) >= 8 AND CAST(SUBSTR(fe_ench.quarter_hour, 1,2) AS INT) <= 11 THEN '1.Morning Peak'
                    WHEN CAST(SUBSTR(fe_ench.quarter_hour, 1,2) AS INT) >= 17 AND CAST(SUBSTR(fe_ench.quarter_hour, 1,2) AS INT) <= 21 THEN '3.Evening Peak'
                    WHEN CAST(SUBSTR(fe_ench.quarter_hour, 1,2) AS INT) > 11 AND CAST(SUBSTR(fe_ench.quarter_hour, 1,2) AS INT) < 17 THEN '2.Afternoon'
                    ELSE '4.Rest' END AS time_period,
                    CAST(SUBSTR(fe_ench.quarter_hour, 1,2) AS INT) AS hour,
                    fe_ench.city AS city_name,
                    fe_ench.service_name AS service_name,
                    fe_ench.service_detail_id AS service_detail_id,
                    fe_ench.pickup_location_hex_8 AS pickup_location_hex_8,
                    fe_ench.drop_location_hex_8 AS drop_location_hex_8,
                    COALESCE(fe_ench.price_increase_strategy,'surge_not_applied') AS surge_strategy,
                    COALESCE(fe_ench.is_gradient_applied,false) AS gradient_applied,
                    fe_ench.fare_estimate_id AS fare_estimate_id,
                    fe_ench.user_id AS customer_id,
                    -- fe_ench.sub_total,
                    fe_ench.final_amount,
                    fe_ench.dynamic_surge AS dynamic_surge,
                    fe_ench.dynamic_fare AS dynamic_fare,
                    fe_ench.ride_distance AS ride_distance

                FROM
                    pricing.fare_estimates_enriched fe_ench
                WHERE
                    -- fe_ench.yyyymmdd BETWEEN '{pre_start_date}' AND '{pre_end_date}'
                    fe_ench.yyyymmdd BETWEEN '{post_start_date}' AND '{post_end_date}'
                    AND fe_ench.service_detail_id = '{service_detail_id}'
                    AND fe_ench.pickup_location_hex_8 IN ('88618921d3fffff','88618921c7fffff','8861892665fffff','8860145a33fffff','8861892c97fffff','886189219bfffff','88618920b3fffff','8861892e37fffff','8861892ed9fffff','8861892c1dfffff','8861892c11fffff','8861892ea5fffff','8861892c55fffff','8861892639fffff','886189246dfffff','8861892cbdfffff','8861892ee5fffff','88618925c9fffff','88618925bbfffff','8860145a61fffff','8861892431fffff','88618926adfffff','8860145b43fffff','8861892ed3fffff','88618925a5fffff','886014594bfffff','8860145863fffff','8861892ca3fffff','8860145829fffff','8860145a05fffff','8860169669fffff','8860145943fffff','8861892dd5fffff','886014584dfffff','886189244dfffff','88618925a9fffff','8860145913fffff','8861892c0bfffff','8861892cc1fffff','8860145ad1fffff','88618925e3fffff','886016966bfffff','88618920e9fffff','8861892645fffff','8861892521fffff','8861892ccbfffff','8861892e63fffff','8861892f13fffff','8860145ad9fffff','88618921e3fffff','8860145b51fffff','8861892e2bfffff','8861892c8bfffff','8860145b67fffff','8861892e2dfffff','8861892db5fffff','8860145a21fffff','8861892db7fffff','8860145a29fffff','8861892c95fffff','8861892eb1fffff','8861892eb7fffff','8861892eb5fffff','8860145b53fffff','8861892e93fffff','8860145b55fffff','886014595dfffff','8861892e85fffff','8860145b59fffff','8860169663fffff')
            ),

            rate_card AS (

                SELECT 
                    city,
                    yyyymmdd,
                    fare_estimate_id,
                    ROUND(CAST(rate_card_amount AS DOUBLE),2) AS rate_card_amount
                FROM 
                    experiments.iprice_cleaned_responses_v2 
                WHERE 
                    -- yyyymmdd BETWEEN '{pre_start_date}' AND '{pre_end_date}'
                    yyyymmdd BETWEEN '{post_start_date}' AND '{post_end_date}'
                    AND service_detail_id = '{service_detail_id}'
            ),

            orders AS (

                SELECT
                    ols.yyyymmdd AS yyyymmdd,
                    ols.service_detail_id AS service_detail_id,
                    ols.customer_id AS customer_id,
                    ols.estimate_id AS fare_estimate_id,
                    ols.order_status AS order_status,
                    ols.order_id AS order_id,
                    ols.spd_fraud_flag AS spd_fraud_flag,
                    ols.discount AS discount, 
                    ols.sub_total AS sub_total,
                    ols.rate_card_amount AS rate_card_amount,
                    ols.accept_to_pickup_distance  AS accept_to_pickup_distance,
                    ROW_NUMBER() OVER (PARTITION BY ols.order_id ORDER BY ols.updated_epoch DESC) AS row_number,
                    CASE 
                    WHEN ols.order_status IN ('dropped') AND ols.spd_fraud_flag != True THEN 'net_orders'
                    WHEN ols.order_status IN ('customerCancelled') AND ols.cancel_reason IN ('order cancelled before rider accepted') THEN 'cobra'
                    WHEN ols.order_status IN ('customerCancelled') AND ols.cancel_reason IN ('Order cancelled before rider was mapped') THEN 'cobrm'
                    WHEN ols.order_status IN ('customerCancelled') AND ols.cancel_reason NOT IN ('order cancelled before rider accepted', 'Order cancelled before rider was mapped') THEN 'ocara'
                    WHEN ols.order_status IN ('expired') AND length(ols.map_riders) < 28 THEN 'stockout'
                    WHEN ols.order_status IN ('expired') AND length(ols.map_riders) >= 28 THEN 'expiry_mapped' 
                    ELSE 'Other' 
                    END AS order_state

                FROM
                    orders.order_logs_snapshot ols
                WHERE
                    -- yyyymmdd BETWEEN '{pre_start_date}' AND '{pre_end_date}'
                    yyyymmdd BETWEEN '{post_start_date}' AND '{post_end_date}'
                    AND service_detail_id = '{service_detail_id}'
            )
        
        SELECT 
            fe.yyyymmdd AS yyyymmdd,
            fe.weekday AS weekday,
            fe.quarter_hour AS quarter_hour,
            fe.hour AS hour,
            fe.time_period,
            CASE 
            WHEN hour IN (8,9) THEN '0800-0959'
            WHEN hour IN (10,11) THEN '1000-1159'
            WHEN hour IN (17,18) THEN '1700-1859'
            WHEN hour IN (19,20,21) THEN '1900-2159'
            WHEN hour IN (12,13) THEN '1200-1359'
            WHEN hour IN (14,15,16) THEN '1400-1659'
            ELSE 'Rest'
            END AS hour_bucket,
            fe.city_name AS city_name,
            fe.service_name AS service_name,
            fe.service_detail_id AS service_detail_id,
            fe.customer_id AS fe_customer_id,
            pic.cluster AS pickup_location,
            pic.hex_id AS pickup_hex_id,
            
            CASE 
            WHEN pic.hex_id IN ('88618921d3fffff', '88618921c7fffff', '8861892665fffff', '8860145a33fffff', '8861892c97fffff', '886189219bfffff', '88618920b3fffff', '8861892e37fffff', '8861892ed9fffff',
                                '8861892c1dfffff', '8861892c11fffff', '8861892ea5fffff', '8861892c55fffff', '8861892639fffff', '886189246dfffff', '8861892cbdfffff', '8861892ee5fffff', '88618925c9fffff',
                                '88618925bbfffff', '8860145a61fffff', '8861892431fffff', '88618926adfffff', '8860145b43fffff', '8861892ed3fffff', '88618925a5fffff')
            THEN 'High Affluence'
            WHEN pic.hex_id IN ('886014594bfffff', '8860145863fffff', '8861892ca3fffff', '8860145829fffff', '8860145a05fffff', '8860169669fffff', '8860145943fffff', '8861892dd5fffff', '886014584dfffff',
                                '886189244dfffff', '88618925a9fffff', '8860145913fffff', '8861892c0bfffff', '8861892cc1fffff', '8860145ad1fffff', '88618925e3fffff', '886016966bfffff', '88618920e9fffff',
                                '8861892645fffff', '8861892521fffff', '8861892ccbfffff', '8861892e63fffff', '8861892f13fffff', '8860145ad9fffff', '88618921e3fffff', '8860145b51fffff', '8861892e2bfffff', 
                                '8861892c8bfffff', '8860145b67fffff', '8861892e2dfffff', '8861892db5fffff', '8860145a21fffff', '8861892db7fffff', '8860145a29fffff', '8861892c95fffff', '8861892eb1fffff', 
                                '8861892eb7fffff', '8861892eb5fffff', '8860145b53fffff', '8861892e93fffff', '8860145b55fffff', '886014595dfffff', '8861892e85fffff', '8860145b59fffff', '8860169663fffff')
            THEN 'Low Affluence'
            ELSE 'OTHER' END affluence,
            
            CASE 
            WHEN pic.hex_id IN ('88618921d3fffff', '8861892665fffff', '8861892c97fffff', '88618920b3fffff', '8861892ed9fffff', '8861892c11fffff', '8861892c55fffff', '886189246dfffff', '8861892ee5fffff',
                                '88618925bbfffff', '8861892431fffff', '8860145b43fffff', '88618925a5fffff', '886014594bfffff', '8861892ca3fffff', '8860145a05fffff', '8860145943fffff', '886014584dfffff',
                                '88618925a9fffff', '8861892c0bfffff', '8860145ad1fffff', '886016966bfffff', '8861892645fffff', '8861892ccbfffff', '8861892f13fffff', '88618921e3fffff', '8861892e2bfffff',
                                '8860145b67fffff', '8861892db5fffff', '8861892db7fffff', '8861892c95fffff', '8861892eb7fffff', '8860145b53fffff', '8860145b55fffff', '8861892e85fffff', '8860169663fffff')
            THEN 'TEST'
            WHEN pic.hex_id IN ('88618921c7fffff', '8860145a33fffff', '886189219bfffff', '8861892e37fffff', '8861892c1dfffff', '8861892ea5fffff', '8861892639fffff', '8861892cbdfffff', '88618925c9fffff',
                                '8860145a61fffff', '88618926adfffff', '8861892ed3fffff', '8860145863fffff', '8860145829fffff', '8860169669fffff', '8861892dd5fffff', '886189244dfffff', '8860145913fffff',
                                '8861892cc1fffff', '88618925e3fffff', '88618920e9fffff', '8861892521fffff', '8861892e63fffff', '8860145ad9fffff', '8860145b51fffff', '8861892c8bfffff', '8861892e2dfffff',
                                '8860145a21fffff', '8860145a29fffff', '8861892eb1fffff', '8861892eb5fffff', '8861892e93fffff', '886014595dfffff', '8860145b59fffff')
            THEN 'CONTROL'
            ELSE 'OTHER' END group_tc,
            
            fe.surge_strategy AS surge_strategy,
            fe.fare_estimate_id AS fare_estimate_id,
            -- fe.sub_total,
            fe.final_amount,
            
            CASE WHEN surge_strategy NOT IN ('surge_not_applied', 'circuit_broken') THEN fe.dynamic_surge END dynamic_surge,
            CASE WHEN surge_strategy NOT IN ('surge_not_applied', 'circuit_broken') THEN fe.dynamic_fare END dynamic_fare,
            CASE WHEN surge_strategy NOT IN ('surge_not_applied', 'circuit_broken') THEN COALESCE(rc.rate_card_amount, ord.rate_card_amount) END rate_card_amount,
            
            fe.ride_distance AS ride_distance,
            ord.customer_id AS gross_customer_id,
            ord.order_id,
            ord.order_status,
            ord.spd_fraud_flag,
            ord.discount,
            ord.sub_total,
            ord.order_state,
            ord.row_number,
            ord.accept_to_pickup_distance,
            
            case 
            when fe.ride_distance <= 2 then '0-2'
            when fe.ride_distance > 2 and fe.ride_distance <= 5 then '2-5'
            when fe.ride_distance > 5 and fe.ride_distance <= 10 then '5-10'
            when fe.ride_distance > 10 then 'Rest'
            else 'check'
            end as distance_tag
            
            
        FROM
            fare_estimates fe
            
        LEFT JOIN
            city_cluster_hex pic
            ON fe.pickup_location_hex_8 = pic.hex_id
            
        LEFT JOIN
            rate_card rc
            ON fe.yyyymmdd = rc.yyyymmdd
            AND fe.city_name = rc.city
            AND fe.fare_estimate_id = rc.fare_estimate_id
        
        LEFT JOIN
            orders ord
            ON fe.yyyymmdd = ord.yyyymmdd
            AND fe.service_detail_id = ord.service_detail_id
            AND fe.fare_estimate_id = ord.fare_estimate_id
"""

In [12]:
# df_raw_dataset = pd.read_sql(raw_dataset, connection)
# df_raw_dataset.head(3)

In [13]:
# df_raw_dataset.to_csv('/Users/rapido/local-datasets/affluence/pre-post-analysis/pre_raw_data_{}_to_{}.csv' \
#                               .format(pre_start_date, pre_end_date)
#                               , index = False)

'''
df_raw_dataset.to_csv('/Users/rapido/local-datasets/affluence/pre-post-analysis/post_raw_data_{}_to_{}.csv' \
                                .format(post_start_date, post_end_date)
                               , index = False)
# '''

"\ndf_raw_dataset.to_csv('/Users/rapido/local-datasets/affluence/pre-post-analysis/post_raw_data_{}_to_{}.csv'                                 .format(post_start_date, post_end_date)\n                               , index = False)\n# "

In [14]:
df_pre_raw_dataset = pd.read_csv('/Users/rapido/local-datasets/affluence/pre-post-analysis/pre_raw_data_{}_to_{}.csv' \
                               .format(pre_start_date, pre_end_date))
df_post_raw_dataset = pd.read_csv('/Users/rapido/local-datasets/affluence/pre-post-analysis/post_raw_data_{}_to_{}.csv' \
                               .format(post_start_date, post_end_date))

In [15]:
print(df_pre_raw_dataset.yyyymmdd.nunique())
print(df_post_raw_dataset.yyyymmdd.nunique())

21
18


In [16]:
df_pre_raw_dataset = df_pre_raw_dataset[df_pre_raw_dataset['weekday'].isin(['1. Monday', 
                                                                            '2. Tuesday',
                                                                            '3. Wednesday',
                                                                            '4. Thursday',
                                                                            '5. Friday'
                                                                           ])]

df_post_raw_dataset = df_post_raw_dataset[df_post_raw_dataset['weekday'].isin(['1. Monday', 
                                                                            '2. Tuesday',
                                                                            '3. Wednesday',
                                                                            '4. Thursday',
                                                                            '5. Friday'
                                                                           ])]

In [17]:
df_pre_raw = df_pre_raw_dataset.copy(deep=True)
print(df_pre_raw.shape)
df_post_raw = df_post_raw_dataset.copy(deep=True)
print(df_post_raw.shape)

(1389841, 31)
(1190311, 31)


In [18]:
print('--------------PRE----------------')
print(df_pre_raw.yyyymmdd.nunique())
print('---------------------------------')
print(df_pre_raw.groupby(['yyyymmdd','weekday']).fare_estimate_id.nunique())

print('--------------POST----------------')
print(df_post_raw.yyyymmdd.nunique())
print('---------------------------------')
print(df_post_raw.groupby(['yyyymmdd','weekday']).fare_estimate_id.nunique())

--------------PRE----------------
15
---------------------------------
yyyymmdd  weekday     
20230724  1. Monday       104854
20230725  2. Tuesday      100889
20230726  3. Wednesday     92766
20230727  4. Thursday      78652
20230728  5. Friday        83816
20230731  1. Monday        96735
20230801  2. Tuesday       89888
20230802  3. Wednesday     85797
20230803  4. Thursday      81642
20230804  5. Friday        84636
20230807  1. Monday       104166
20230808  2. Tuesday       93159
20230809  3. Wednesday     92333
20230810  4. Thursday      96785
20230811  5. Friday        99987
Name: fare_estimate_id, dtype: int64
--------------POST----------------
12
---------------------------------
yyyymmdd  weekday     
20230824  4. Thursday      91159
20230825  5. Friday        82374
20230828  1. Monday        97221
20230829  2. Tuesday       81080
20230830  3. Wednesday     86651
20230831  4. Thursday     103850
20230901  5. Friday        99628
20230904  1. Monday       113619
20230905  2. Tu

## DE

In [19]:
df_pre_raw.head(2)

Unnamed: 0,yyyymmdd,weekday,quarter_hour,hour,time_period,hour_bucket,city_name,service_name,service_detail_id,fe_customer_id,pickup_location,pickup_hex_id,affluence,group_tc,surge_strategy,fare_estimate_id,final_amount,dynamic_surge,dynamic_fare,rate_card_amount,ride_distance,gross_customer_id,order_id,order_status,spd_fraud_flag,discount,sub_total,order_state,row_number,accept_to_pickup_distance,distance_tag
0,20230725,2. Tuesday,2045,20,3.Evening Peak,1900-2159,Bangalore,Link,57370b61a6855d70057417d1,6222d0e7c1a4e430d4643577,Venkatapura,88618925c9fffff,High Affluence,CONTROL,surge_not_applied,64bfe8a5c41fe9d38ab98d5c,67.0,,,,5.076,,,,,,,,,,5-10
2,20230802,3. Wednesday,1200,12,2.Afternoon,1200-1359,Bangalore,Link,57370b61a6855d70057417d1,62fdd68404cb1b0d2809bb87,Mathikere Lake,8860145943fffff,Low Affluence,TEST,surge_not_applied,64c9f99ecb0a938404995b74,186.0,,,,16.428,,,,,,,,,,Rest


In [20]:
df_post_raw.head(2)

Unnamed: 0,yyyymmdd,weekday,quarter_hour,hour,time_period,hour_bucket,city_name,service_name,service_detail_id,fe_customer_id,pickup_location,pickup_hex_id,affluence,group_tc,surge_strategy,fare_estimate_id,final_amount,dynamic_surge,dynamic_fare,rate_card_amount,ride_distance,gross_customer_id,order_id,order_status,spd_fraud_flag,discount,sub_total,order_state,row_number,accept_to_pickup_distance,distance_tag
0,20230830,3. Wednesday,815,8,1.Morning Peak,0800-0959,Bangalore,Link,57370b61a6855d70057417d1,62a74e0a7d7967549ee7a7e6,Electronic City,8861892639fffff,High Affluence,CONTROL,surge_not_applied,64eeb08f1bd630639fa939ae,41.0,,,,0.09,,,,,,,,,,0-2
1,20230829,2. Tuesday,800,8,1.Morning Peak,0800-0959,Bangalore,Link,57370b61a6855d70057417d1,64eb0efd5a6ec79d09c3504c,Majestic,8860145b55fffff,Low Affluence,TEST,circuit_broken,64ed5ba81ed3d92ced7ef496,117.0,,,,10.836,,,,,,,,,,Rest


In [21]:
## Feature engineering 

def feature_engineering():

    def get_dropped_order(row):
        return 1 if row['order_status'] == 'dropped' and not row['spd_fraud_flag'] else None

    def get_surged_fe(row):
        return row['fare_estimate_id'] if row['surge_strategy'] != 'surge_not_applied' and row['surge_strategy'] != 'circuit_broken' else None

    def get_surged_fe_rate_card(row):
        return row['rate_card_amount'] if row['surge_strategy'] != 'surge_not_applied' and row['surge_strategy'] != 'circuit_broken' else None

    def get_surged_fe_dynamic_surge(row):
        return row['dynamic_surge'] if row['surge_strategy'] != 'surge_not_applied' and row['surge_strategy'] != 'circuit_broken' else None

    def get_surged_fe_dynamic_fare(row):
        return row['dynamic_fare'] if row['surge_strategy'] != 'surge_not_applied' and row['surge_strategy'] != 'circuit_broken' else None

    def get_surged_net(row):
        return row['fare_estimate_id'] if row['surge_strategy'] != 'surge_not_applied' and row['surge_strategy'] != 'circuit_broken' and row['order_status'] == 'dropped' and not row['spd_fraud_flag'] else None

    def get_cobra(row):
        return row['order_id'] if row['order_state'] == 'cobra' and row['row_number'] == 1 else None

    def get_ocara(row):
        return row['order_id'] if row['order_state'] == 'ocara' and row['row_number'] == 1 else None

    def get_cobrm(row):
        return row['order_id'] if row['order_state'] == 'cobrm' and row['row_number'] == 1 else None

    def get_stockout(row):
        return row['order_id'] if row['order_state'] == 'stockout' and row['row_number'] == 1 else None

    def get_expiry_mapped(row):
        return row['order_id'] if row['order_state'] == 'expiry_mapped' and row['row_number'] == 1 else None

    def get_accepted_orders(row):
        return row['order_id'] if row['accept_to_pickup_distance'] > 0 else None

    ## Pre
    df_pre_raw['net_orders'] = df_pre_raw.apply(get_dropped_order, axis=1)
    df_pre_raw['surged_fe'] = df_pre_raw.apply(get_surged_fe, axis=1)
    df_pre_raw['surged_net'] = df_pre_raw.apply(get_surged_net, axis=1)
    df_pre_raw['cobra'] = df_pre_raw.apply(get_cobra, axis=1)
    df_pre_raw['ocara'] = df_pre_raw.apply(get_ocara, axis=1)
    df_pre_raw['cobrm'] = df_pre_raw.apply(get_cobrm, axis=1)
    df_pre_raw['stockout'] = df_pre_raw.apply(get_stockout, axis=1)
    df_pre_raw['expiry_mapped'] = df_pre_raw.apply(get_expiry_mapped, axis=1)
    df_pre_raw['accepted_orders'] = df_pre_raw.apply(get_accepted_orders, axis=1)
    df_pre_raw['surge_rate_card_amount'] = df_pre_raw.apply(get_surged_fe_rate_card, axis=1)
    df_pre_raw['surge_dynamic_surge'] = df_pre_raw.apply(get_surged_fe_dynamic_surge, axis=1)
    df_pre_raw['surge_dynamic_fare'] = df_pre_raw.apply(get_surged_fe_dynamic_fare, axis=1)
    
    ## Post
    df_post_raw['net_orders'] = df_post_raw.apply(get_dropped_order, axis=1)
    df_post_raw['surged_fe'] = df_post_raw.apply(get_surged_fe, axis=1)
    df_post_raw['surged_net'] = df_post_raw.apply(get_surged_net, axis=1)
    df_post_raw['cobra'] = df_post_raw.apply(get_cobra, axis=1)
    df_post_raw['ocara'] = df_post_raw.apply(get_ocara, axis=1)
    df_post_raw['cobrm'] = df_post_raw.apply(get_cobrm, axis=1)
    df_post_raw['stockout'] = df_post_raw.apply(get_stockout, axis=1)
    df_post_raw['expiry_mapped'] = df_post_raw.apply(get_expiry_mapped, axis=1)
    df_post_raw['accepted_orders'] = df_post_raw.apply(get_accepted_orders, axis=1)
    df_post_raw['surge_rate_card_amount'] = df_post_raw.apply(get_surged_fe_rate_card, axis=1)
    df_post_raw['surge_dynamic_surge'] = df_post_raw.apply(get_surged_fe_dynamic_surge, axis=1)
    df_post_raw['surge_dynamic_fare'] = df_post_raw.apply(get_surged_fe_dynamic_fare, axis=1)
    
feature_engineering()

In [22]:
df_pre_raw.head(2)

Unnamed: 0,yyyymmdd,weekday,quarter_hour,hour,time_period,hour_bucket,city_name,service_name,service_detail_id,fe_customer_id,pickup_location,pickup_hex_id,affluence,group_tc,surge_strategy,fare_estimate_id,final_amount,dynamic_surge,dynamic_fare,rate_card_amount,ride_distance,gross_customer_id,order_id,order_status,spd_fraud_flag,discount,sub_total,order_state,row_number,accept_to_pickup_distance,distance_tag,net_orders,surged_fe,surged_net,cobra,ocara,cobrm,stockout,expiry_mapped,accepted_orders,surge_rate_card_amount,surge_dynamic_surge,surge_dynamic_fare
0,20230725,2. Tuesday,2045,20,3.Evening Peak,1900-2159,Bangalore,Link,57370b61a6855d70057417d1,6222d0e7c1a4e430d4643577,Venkatapura,88618925c9fffff,High Affluence,CONTROL,surge_not_applied,64bfe8a5c41fe9d38ab98d5c,67.0,,,,5.076,,,,,,,,,,5-10,,,,,,,,,,,,
2,20230802,3. Wednesday,1200,12,2.Afternoon,1200-1359,Bangalore,Link,57370b61a6855d70057417d1,62fdd68404cb1b0d2809bb87,Mathikere Lake,8860145943fffff,Low Affluence,TEST,surge_not_applied,64c9f99ecb0a938404995b74,186.0,,,,16.428,,,,,,,,,,Rest,,,,,,,,,,,,


In [23]:
df_post_raw.head(2)

Unnamed: 0,yyyymmdd,weekday,quarter_hour,hour,time_period,hour_bucket,city_name,service_name,service_detail_id,fe_customer_id,pickup_location,pickup_hex_id,affluence,group_tc,surge_strategy,fare_estimate_id,final_amount,dynamic_surge,dynamic_fare,rate_card_amount,ride_distance,gross_customer_id,order_id,order_status,spd_fraud_flag,discount,sub_total,order_state,row_number,accept_to_pickup_distance,distance_tag,net_orders,surged_fe,surged_net,cobra,ocara,cobrm,stockout,expiry_mapped,accepted_orders,surge_rate_card_amount,surge_dynamic_surge,surge_dynamic_fare
0,20230830,3. Wednesday,815,8,1.Morning Peak,0800-0959,Bangalore,Link,57370b61a6855d70057417d1,62a74e0a7d7967549ee7a7e6,Electronic City,8861892639fffff,High Affluence,CONTROL,surge_not_applied,64eeb08f1bd630639fa939ae,41.0,,,,0.09,,,,,,,,,,0-2,,,,,,,,,,,,
1,20230829,2. Tuesday,800,8,1.Morning Peak,0800-0959,Bangalore,Link,57370b61a6855d70057417d1,64eb0efd5a6ec79d09c3504c,Majestic,8860145b55fffff,Low Affluence,TEST,circuit_broken,64ed5ba81ed3d92ced7ef496,117.0,,,,10.836,,,,,,,,,,Rest,,,,,,,,,,,,


## Analysis view 1

In [24]:
## Pre

df_analysis_pre_group_v1 = df_pre_raw \
                            .groupby(['affluence', 'group_tc']) \
                            .agg(
                                hex_count = pd.NamedAgg('pickup_hex_id', 'nunique'),
                                fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'), 
                                gross_orders = pd.NamedAgg('order_id', 'nunique'), 
                                net_orders = pd.NamedAgg('net_orders', 'sum'),
                                surged_fe = pd.NamedAgg('surged_fe', 'nunique'),
                                surged_net = pd.NamedAgg('surged_net', 'nunique'),
                                cobra = pd.NamedAgg('cobra', 'nunique'),
                                ocara = pd.NamedAgg('ocara', 'nunique'),
                                cobrm = pd.NamedAgg('cobrm', 'nunique'),
                                stockout = pd.NamedAgg('stockout', 'nunique'),
                                expiry_mapped = pd.NamedAgg('expiry_mapped', 'nunique'),
                                accepted_orders = pd.NamedAgg('accepted_orders', 'nunique'),
                                discount = pd.NamedAgg('discount', 'sum'), 
                                sub_total = pd.NamedAgg('sub_total', 'sum'),
                                final_amount = pd.NamedAgg('final_amount', 'sum'),
                                dynamic_surge = pd.NamedAgg('dynamic_surge', 'sum'),
                                dynamic_fare = pd.NamedAgg('dynamic_fare', 'sum'),
                                rate_card_amount = pd.NamedAgg('rate_card_amount', 'sum'),
                                mean_ride_distance = pd.NamedAgg('ride_distance', 'mean'),
                                ride_distance = pd.NamedAgg('ride_distance', 'sum'),
                                surge_rate_card_amount = pd.NamedAgg('surge_rate_card_amount', 'sum'),
                                surge_dynamic_surge = pd.NamedAgg('surge_dynamic_surge', 'sum'),
                                surge_dynamic_fare = pd.NamedAgg('surge_dynamic_fare', 'sum')
                                ) \
                            .reset_index()

## Adding funnel 
df_analysis_pre_group_v1['fe2rr'] = (df_analysis_pre_group_v1['gross_orders']*100.0/df_analysis_pre_group_v1['fe_count']).round(2)
df_analysis_pre_group_v1['g2n'] = (df_analysis_pre_group_v1['net_orders']*100.0/df_analysis_pre_group_v1['gross_orders']).round(2)
df_analysis_pre_group_v1['fe2net'] = (df_analysis_pre_group_v1['net_orders']*100.0/df_analysis_pre_group_v1['fe_count']).round(2)
df_analysis_pre_group_v1['aor'] = (df_analysis_pre_group_v1['accepted_orders']*100.0/df_analysis_pre_group_v1['gross_orders']).round(2)
df_analysis_pre_group_v1['surged_fe_percentage'] = (df_analysis_pre_group_v1['surged_fe']*100.0/df_analysis_pre_group_v1['fe_count']).round(2)
df_analysis_pre_group_v1['surge_percentage'] = ((df_analysis_pre_group_v1['surge_dynamic_surge']+df_analysis_pre_group_v1['surge_dynamic_fare'])*100.0/df_analysis_pre_group_v1['surge_rate_card_amount']).round(2)
df_analysis_pre_group_v1['ppkm'] = (df_analysis_pre_group_v1['final_amount']/df_analysis_pre_group_v1['ride_distance']).round(2)
df_analysis_pre_group_v1['fe_hex'] = (df_analysis_pre_group_v1['fe_count']/df_analysis_pre_group_v1['hex_count']).round(2)
df_analysis_pre_group_v1['discount_percentage'] = (df_analysis_pre_group_v1['discount']*100.0/df_analysis_pre_group_v1['sub_total']).round(2)

In [25]:
## Post

df_analysis_post_group_v1 = df_post_raw \
                            .groupby(['affluence', 'group_tc']) \
                            .agg(
                                hex_count = pd.NamedAgg('pickup_hex_id', 'nunique'),
                                fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'), 
                                gross_orders = pd.NamedAgg('order_id', 'nunique'), 
                                net_orders = pd.NamedAgg('net_orders', 'sum'),
                                surged_fe = pd.NamedAgg('surged_fe', 'nunique'),
                                surged_net = pd.NamedAgg('surged_net', 'nunique'),
                                cobra = pd.NamedAgg('cobra', 'nunique'),
                                ocara = pd.NamedAgg('ocara', 'nunique'),
                                cobrm = pd.NamedAgg('cobrm', 'nunique'),
                                stockout = pd.NamedAgg('stockout', 'nunique'),
                                expiry_mapped = pd.NamedAgg('expiry_mapped', 'nunique'),
                                accepted_orders = pd.NamedAgg('accepted_orders', 'nunique'),
                                discount = pd.NamedAgg('discount', 'sum'),
                                sub_total = pd.NamedAgg('sub_total', 'sum'),
                                final_amount = pd.NamedAgg('final_amount', 'sum'),
                                dynamic_surge = pd.NamedAgg('dynamic_surge', 'sum'),
                                dynamic_fare = pd.NamedAgg('dynamic_fare', 'sum'),
                                rate_card_amount = pd.NamedAgg('rate_card_amount', 'sum'),
                                mean_ride_distance = pd.NamedAgg('ride_distance', 'mean'),
                                ride_distance = pd.NamedAgg('ride_distance', 'sum'),
                                surge_rate_card_amount = pd.NamedAgg('surge_rate_card_amount', 'sum'),
                                surge_dynamic_surge = pd.NamedAgg('surge_dynamic_surge', 'sum'),
                                surge_dynamic_fare = pd.NamedAgg('surge_dynamic_fare', 'sum')
                                ) \
                            .reset_index()

## Adding funnel 
df_analysis_post_group_v1['fe2rr'] = (df_analysis_post_group_v1['gross_orders']*100.0/df_analysis_post_group_v1['fe_count']).round(2)
df_analysis_post_group_v1['g2n'] = (df_analysis_post_group_v1['net_orders']*100.0/df_analysis_post_group_v1['gross_orders']).round(2)
df_analysis_post_group_v1['fe2net'] = (df_analysis_post_group_v1['net_orders']*100.0/df_analysis_post_group_v1['fe_count']).round(2)
df_analysis_post_group_v1['aor'] = (df_analysis_post_group_v1['accepted_orders']*100.0/df_analysis_post_group_v1['gross_orders']).round(2)
df_analysis_post_group_v1['surged_fe_percentage'] = (df_analysis_post_group_v1['surged_fe']*100.0/df_analysis_post_group_v1['fe_count']).round(2)
df_analysis_post_group_v1['surge_percentage'] = ((df_analysis_post_group_v1['surge_dynamic_surge']+df_analysis_post_group_v1['surge_dynamic_fare'])*100.0/df_analysis_post_group_v1['surge_rate_card_amount']).round(2)
df_analysis_post_group_v1['ppkm'] = (df_analysis_post_group_v1['final_amount']/df_analysis_post_group_v1['ride_distance']).round(2)
df_analysis_post_group_v1['fe_hex'] = (df_analysis_post_group_v1['fe_count']/df_analysis_post_group_v1['hex_count']).round(2)
df_analysis_post_group_v1['discount_percentage'] = (df_analysis_post_group_v1['discount']*100.0/df_analysis_post_group_v1['sub_total']).round(2)

In [26]:
## Adding pre post to column 

def add_pre_post():
    
    ##vpre
    pre_new_column_names = [col + "_pre" for col in df_analysis_pre_group_v1.columns]
    df_analysis_pre_group_v1.columns = pre_new_column_names
    
    ## post
    post_new_column_names = [col + "_post" for col in df_analysis_post_group_v1.columns]
    df_analysis_post_group_v1.columns = post_new_column_names
    
    
add_pre_post()

In [27]:
view_1  = pd.merge(df_analysis_pre_group_v1,
             df_analysis_post_group_v1,
             how = 'inner',
             left_on = ['affluence_pre', 'group_tc_pre'],
             right_on = ['affluence_post', 'group_tc_post']
            )

view_1.rename(columns = {'affluence_pre' : 'affluence', 
                         'group_tc_pre' : 'group_tc', 
                         'hex_count_pre' : 'hex_count'}, inplace = True)

view_1['fe2rr_delta'] = view_1['fe2rr_post'] - view_1['fe2rr_pre']
view_1['g2n_delta'] = view_1['g2n_post'] - view_1['g2n_pre']
view_1['fe2net_delta'] = view_1['fe2net_post'] - view_1['fe2net_pre']
view_1['aor_delta'] = view_1['aor_post'] - view_1['aor_pre']
view_1['surged_fe_delta'] = view_1['surged_fe_percentage_post'] - view_1['surged_fe_percentage_pre']
view_1['ppkm_delta'] = view_1['ppkm_pre'] - view_1['ppkm_post']
view_1['surge_percentage_delta'] = view_1['surge_percentage_post'] - view_1['surge_percentage_pre']
view_1['discount_percentage_delta'] = view_1['discount_percentage_post'] - view_1['discount_percentage_pre']

In [28]:
view_1.head()

Unnamed: 0,affluence,group_tc,hex_count,fe_count_pre,gross_orders_pre,net_orders_pre,surged_fe_pre,surged_net_pre,cobra_pre,ocara_pre,cobrm_pre,stockout_pre,expiry_mapped_pre,accepted_orders_pre,discount_pre,sub_total_pre,final_amount_pre,dynamic_surge_pre,dynamic_fare_pre,rate_card_amount_pre,mean_ride_distance_pre,ride_distance_pre,surge_rate_card_amount_pre,surge_dynamic_surge_pre,surge_dynamic_fare_pre,...,dynamic_surge_post,dynamic_fare_post,rate_card_amount_post,mean_ride_distance_post,ride_distance_post,surge_rate_card_amount_post,surge_dynamic_surge_post,surge_dynamic_fare_post,fe2rr_post,g2n_post,fe2net_post,aor_post,surged_fe_percentage_post,surge_percentage_post,ppkm_post,fe_hex_post,discount_percentage_post,fe2rr_delta,g2n_delta,fe2net_delta,aor_delta,surged_fe_delta,ppkm_delta,surge_percentage_delta,discount_percentage_delta
0,High Affluence,CONTROL,12,335972,60352,28675.0,56952,2940,16717,11530,104,18,3145,40740,682844.0,5604419.0,32172088.0,1509804.24,0.0,4713712.56,7.012191,2361664.0,4713712.56,1509804.24,0.0,...,1522126.14,0.0,4566668.15,6.873567,1931685.0,4566668.15,1522126.14,0.0,17.95,47.94,8.61,69.08,19.99,33.33,13.85,23369.75,7.71,-0.01,0.43,0.08,1.58,3.04,-0.23,1.3,-4.47
1,High Affluence,TEST,13,309792,57028,26542.0,57448,2615,16158,9408,57,15,4712,36578,434046.0,4969727.0,29211502.0,1690682.82,0.0,4989249.55,6.829907,2124395.0,4989249.55,1690682.82,0.0,...,448613.88,0.0,1758829.16,6.726483,1681628.0,1758829.16,448613.88,0.0,19.26,46.57,8.97,65.37,8.64,25.51,13.22,19159.46,5.98,0.85,0.03,0.4,1.23,-9.9,0.53,-8.38,-2.75
2,Low Affluence,CONTROL,22,365020,62089,28535.0,61747,2763,15852,13574,227,64,3587,43033,646005.0,6147165.0,39184038.0,1891837.52,0.0,5666845.44,7.937846,2905712.0,5666845.44,1891837.52,0.0,...,2713304.87,0.0,6450483.12,7.7154,2526169.0,6450483.12,2713304.87,0.0,16.41,46.6,7.65,71.34,21.69,42.06,13.98,14846.0,6.73,-0.6,0.64,-0.17,2.03,4.77,-0.49,8.68,-3.78
3,Low Affluence,TEST,23,375321,57399,27314.0,81783,3633,13074,14163,141,41,2430,42357,661779.0,6149004.0,45886373.0,3050452.29,0.0,8679852.36,8.87254,3335596.0,8679852.36,3050452.29,0.0,...,576524.68,0.0,1898105.7,8.853844,2938237.0,1898105.7,576524.68,0.0,16.77,44.57,7.47,70.13,5.84,30.37,13.06,14393.0,6.85,1.48,-3.02,0.19,-3.66,-15.95,0.7,-4.77,-3.91


In [29]:
df_analysis_pre_post_v1_dsm = pd.read_clipboard()
df_analysis_pre_post_v1_dsm

Unnamed: 0,affluence,group_tc,demand_per_day_pre,demand_per_day_post,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,demand_pd_delta,mismatch_qr_pd_delta
0,High Affluence,CONTROL,3557.47,3786.25,1869.93,1785.25,487.27,552.33,228.78,65.06
1,High Affluence,TEST,3436.2,3499.67,1735.27,1511.67,530.53,588.33,63.47,57.8
2,Low Affluence,CONTROL,3816.53,4137.0,2175.4,1949.0,867.13,964.67,320.47,97.54
3,Low Affluence,TEST,3554.93,4235.17,2415.33,2213.08,682.53,809.83,680.24,127.3


In [30]:
df_analysis_pre_post_v1 = view_1.merge(df_analysis_pre_post_v1_dsm,
                                                              how = 'inner',
                                                              on = ['affluence','group_tc']
                                                             )
df_analysis_pre_post_v1

Unnamed: 0,affluence,group_tc,hex_count,fe_count_pre,gross_orders_pre,net_orders_pre,surged_fe_pre,surged_net_pre,cobra_pre,ocara_pre,cobrm_pre,stockout_pre,expiry_mapped_pre,accepted_orders_pre,discount_pre,sub_total_pre,final_amount_pre,dynamic_surge_pre,dynamic_fare_pre,rate_card_amount_pre,mean_ride_distance_pre,ride_distance_pre,surge_rate_card_amount_pre,surge_dynamic_surge_pre,surge_dynamic_fare_pre,...,fe2rr_post,g2n_post,fe2net_post,aor_post,surged_fe_percentage_post,surge_percentage_post,ppkm_post,fe_hex_post,discount_percentage_post,fe2rr_delta,g2n_delta,fe2net_delta,aor_delta,surged_fe_delta,ppkm_delta,surge_percentage_delta,discount_percentage_delta,demand_per_day_pre,demand_per_day_post,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,demand_pd_delta,mismatch_qr_pd_delta
0,High Affluence,CONTROL,12,335972,60352,28675.0,56952,2940,16717,11530,104,18,3145,40740,682844.0,5604419.0,32172088.0,1509804.24,0.0,4713712.56,7.012191,2361664.0,4713712.56,1509804.24,0.0,...,17.95,47.94,8.61,69.08,19.99,33.33,13.85,23369.75,7.71,-0.01,0.43,0.08,1.58,3.04,-0.23,1.3,-4.47,3557.47,3786.25,1869.93,1785.25,487.27,552.33,228.78,65.06
1,High Affluence,TEST,13,309792,57028,26542.0,57448,2615,16158,9408,57,15,4712,36578,434046.0,4969727.0,29211502.0,1690682.82,0.0,4989249.55,6.829907,2124395.0,4989249.55,1690682.82,0.0,...,19.26,46.57,8.97,65.37,8.64,25.51,13.22,19159.46,5.98,0.85,0.03,0.4,1.23,-9.9,0.53,-8.38,-2.75,3436.2,3499.67,1735.27,1511.67,530.53,588.33,63.47,57.8
2,Low Affluence,CONTROL,22,365020,62089,28535.0,61747,2763,15852,13574,227,64,3587,43033,646005.0,6147165.0,39184038.0,1891837.52,0.0,5666845.44,7.937846,2905712.0,5666845.44,1891837.52,0.0,...,16.41,46.6,7.65,71.34,21.69,42.06,13.98,14846.0,6.73,-0.6,0.64,-0.17,2.03,4.77,-0.49,8.68,-3.78,3816.53,4137.0,2175.4,1949.0,867.13,964.67,320.47,97.54
3,Low Affluence,TEST,23,375321,57399,27314.0,81783,3633,13074,14163,141,41,2430,42357,661779.0,6149004.0,45886373.0,3050452.29,0.0,8679852.36,8.87254,3335596.0,8679852.36,3050452.29,0.0,...,16.77,44.57,7.47,70.13,5.84,30.37,13.06,14393.0,6.85,1.48,-3.02,0.19,-3.66,-15.95,0.7,-4.77,-3.91,3554.93,4235.17,2415.33,2213.08,682.53,809.83,680.24,127.3


In [31]:
sorted_list = sorted(df_analysis_pre_post_v1.columns)
sorted_list

['accepted_orders_post',
 'accepted_orders_pre',
 'affluence',
 'affluence_post',
 'aor_delta',
 'aor_post',
 'aor_pre',
 'cobra_post',
 'cobra_pre',
 'cobrm_post',
 'cobrm_pre',
 'demand_pd_delta',
 'demand_per_day_post',
 'demand_per_day_pre',
 'discount_percentage_delta',
 'discount_percentage_post',
 'discount_percentage_pre',
 'discount_post',
 'discount_pre',
 'dynamic_fare_post',
 'dynamic_fare_pre',
 'dynamic_surge_post',
 'dynamic_surge_pre',
 'expiry_mapped_post',
 'expiry_mapped_pre',
 'fe2net_delta',
 'fe2net_post',
 'fe2net_pre',
 'fe2rr_delta',
 'fe2rr_post',
 'fe2rr_pre',
 'fe_count_post',
 'fe_count_pre',
 'fe_hex_post',
 'fe_hex_pre',
 'final_amount_post',
 'final_amount_pre',
 'g2n_delta',
 'g2n_post',
 'g2n_pre',
 'gross_orders_post',
 'gross_orders_pre',
 'group_tc',
 'group_tc_post',
 'hex_count',
 'hex_count_post',
 'mean_ride_distance_post',
 'mean_ride_distance_pre',
 'mismatch_qr_pd_delta',
 'mismatch_qr_per_day_post',
 'mismatch_qr_per_day_pre',
 'net_orders_p

In [32]:
df_analysis_pre_post_v1_final = df_analysis_pre_post_v1[['affluence', 'group_tc', 'hex_count',
                                  'fe2rr_pre', 'fe2rr_post', 'fe2rr_delta',
                                  'g2n_pre', 'g2n_post', 'g2n_delta',
                                  'fe2net_pre', 'fe2net_post', 'fe2net_delta', 'aor_pre', 'aor_post', 'aor_delta',
                                  'demand_per_day_pre', 'demand_per_day_post', 'demand_pd_delta',
                                  'supply_per_day_pre','supply_per_day_post',
                                  'mismatch_qr_per_day_pre', 'mismatch_qr_per_day_post', 'mismatch_qr_pd_delta',
                                  'ppkm_pre', 'ppkm_post', 'ppkm_delta',
                                  'fe_hex_pre', 'fe_hex_post',
                                  'surged_fe_percentage_pre', 'surged_fe_percentage_post', 'surged_fe_delta',
                                  'surge_percentage_pre', 'surge_percentage_post', 'surge_percentage_delta',
                                  'discount_percentage_pre', 'discount_percentage_post', 'discount_percentage_delta',
                                  'fe_count_pre', 'fe_count_post',
                                  'gross_orders_pre', 'gross_orders_post',
                                  'net_orders_pre', 'net_orders_post',
                                  'surged_fe_pre', 'surged_fe_post',
                                  'surged_net_pre', 'surged_net_post',
                                  'discount_pre', 'discount_post',
                                  'sub_total_pre', 'sub_total_post',
                                  'cobra_pre', 'cobra_post',
                                  'ocara_pre', 'ocara_post',
                                  'cobrm_pre', 'cobrm_post',
                                  'expiry_mapped_pre', 'expiry_mapped_post',
                                  'stockout_pre', 'stockout_post',
                                  'accepted_orders_pre', 'accepted_orders_post'
                                 ]]

In [33]:
df_analysis_pre_post_v1_final

Unnamed: 0,affluence,group_tc,hex_count,fe2rr_pre,fe2rr_post,fe2rr_delta,g2n_pre,g2n_post,g2n_delta,fe2net_pre,fe2net_post,fe2net_delta,aor_pre,aor_post,aor_delta,demand_per_day_pre,demand_per_day_post,demand_pd_delta,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,mismatch_qr_pd_delta,ppkm_pre,ppkm_post,...,fe_count_post,gross_orders_pre,gross_orders_post,net_orders_pre,net_orders_post,surged_fe_pre,surged_fe_post,surged_net_pre,surged_net_post,discount_pre,discount_post,sub_total_pre,sub_total_post,cobra_pre,cobra_post,ocara_pre,ocara_post,cobrm_pre,cobrm_post,expiry_mapped_pre,expiry_mapped_post,stockout_pre,stockout_post,accepted_orders_pre,accepted_orders_post
0,High Affluence,CONTROL,12,17.96,17.95,-0.01,47.51,47.94,0.43,8.53,8.61,0.08,67.5,69.08,1.58,3557.47,3786.25,228.78,1869.93,1785.25,487.27,552.33,65.06,13.62,13.85,...,280437,60352,50347,28675.0,24138.0,56952,56071,2940,2514,682844.0,339989.0,5604419.0,4409327.0,16717,13499,11530,10034,104,91,3145,2433,18,16,40740,34782
1,High Affluence,TEST,13,18.41,19.26,0.85,46.54,46.57,0.03,8.57,8.97,0.4,64.14,65.37,1.23,3436.2,3499.67,63.47,1735.27,1511.67,530.53,588.33,57.8,13.75,13.22,...,249073,57028,47982,26542.0,22344.0,57448,21522,2615,609,434046.0,237060.0,4969727.0,3962192.0,16158,13489,9408,8388,57,70,4712,3568,15,19,36578,31368
2,Low Affluence,CONTROL,22,17.01,16.41,-0.6,45.96,46.6,0.64,7.82,7.65,-0.17,69.31,71.34,2.03,3816.53,4137.0,320.47,2175.4,1949.0,867.13,964.67,97.54,13.49,13.98,...,326612,62089,53601,28535.0,24979.0,61747,70840,2763,2634,646005.0,343375.0,6147165.0,5105659.0,15852,13001,13574,12159,227,243,3587,2945,64,68,43033,38239
3,Low Affluence,TEST,23,15.29,16.77,1.48,47.59,44.57,-3.02,7.28,7.47,0.19,73.79,70.13,-3.66,3554.93,4235.17,680.24,2415.33,2213.08,682.53,809.83,127.3,13.76,13.06,...,331039,57399,55517,27314.0,24744.0,81783,19340,3633,548,661779.0,384108.0,6149004.0,5610003.0,13074,14196,14163,13260,141,121,2430,2977,41,29,42357,38935


In [34]:
df_analysis_pre_post_v1_final.to_clipboard(index=False)

## Analysis view 2

In [35]:
## Pre

df_analysis_pre_group_v2 = df_pre_raw \
                            .groupby(['affluence', 'group_tc','time_period']) \
                            .agg(
                                hex_count = pd.NamedAgg('pickup_hex_id', 'nunique'),
                                fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'), 
                                gross_orders = pd.NamedAgg('order_id', 'nunique'), 
                                net_orders = pd.NamedAgg('net_orders', 'sum'),
                                surged_fe = pd.NamedAgg('surged_fe', 'nunique'),
                                surged_net = pd.NamedAgg('surged_net', 'nunique'),
                                cobra = pd.NamedAgg('cobra', 'nunique'),
                                ocara = pd.NamedAgg('ocara', 'nunique'),
                                cobrm = pd.NamedAgg('cobrm', 'nunique'),
                                stockout = pd.NamedAgg('stockout', 'nunique'),
                                expiry_mapped = pd.NamedAgg('expiry_mapped', 'nunique'),
                                accepted_orders = pd.NamedAgg('accepted_orders', 'nunique'),
                                discount = pd.NamedAgg('discount', 'sum'), 
                                sub_total = pd.NamedAgg('sub_total', 'sum'),
                                final_amount = pd.NamedAgg('final_amount', 'sum'),
                                dynamic_surge = pd.NamedAgg('dynamic_surge', 'sum'),
                                dynamic_fare = pd.NamedAgg('dynamic_fare', 'sum'),
                                rate_card_amount = pd.NamedAgg('rate_card_amount', 'sum'),
                                mean_ride_distance = pd.NamedAgg('ride_distance', 'mean'),
                                ride_distance = pd.NamedAgg('ride_distance', 'sum'),
                                surge_rate_card_amount = pd.NamedAgg('surge_rate_card_amount', 'sum'),
                                surge_dynamic_surge = pd.NamedAgg('surge_dynamic_surge', 'sum'),
                                surge_dynamic_fare = pd.NamedAgg('surge_dynamic_fare', 'sum')
                                ) \
                            .reset_index()

## Adding funnel 
df_analysis_pre_group_v2['fe2rr'] = (df_analysis_pre_group_v2['gross_orders']*100.0/df_analysis_pre_group_v2['fe_count']).round(2)
df_analysis_pre_group_v2['g2n'] = (df_analysis_pre_group_v2['net_orders']*100.0/df_analysis_pre_group_v2['gross_orders']).round(2)
df_analysis_pre_group_v2['fe2net'] = (df_analysis_pre_group_v2['net_orders']*100.0/df_analysis_pre_group_v2['fe_count']).round(2)
df_analysis_pre_group_v2['aor'] = (df_analysis_pre_group_v2['accepted_orders']*100.0/df_analysis_pre_group_v2['gross_orders']).round(2)
df_analysis_pre_group_v2['surged_fe_percentage'] = (df_analysis_pre_group_v2['surged_fe']*100.0/df_analysis_pre_group_v2['fe_count']).round(2)
df_analysis_pre_group_v2['surge_percentage'] = ((df_analysis_pre_group_v2['surge_dynamic_surge']+df_analysis_pre_group_v2['surge_dynamic_fare'])*100.0/df_analysis_pre_group_v2['surge_rate_card_amount']).round(2)
df_analysis_pre_group_v2['ppkm'] = (df_analysis_pre_group_v2['final_amount']/df_analysis_pre_group_v2['ride_distance']).round(2)
df_analysis_pre_group_v2['fe_hex'] = (df_analysis_pre_group_v2['fe_count']/df_analysis_pre_group_v2['hex_count']).round(2)
df_analysis_pre_group_v2['discount_percentage'] = (df_analysis_pre_group_v2['discount']*100.0/df_analysis_pre_group_v2['sub_total']).round(2)

In [36]:
## Post

df_analysis_post_group_v2 = df_post_raw \
                            .groupby(['affluence', 'group_tc','time_period']) \
                            .agg(
                                hex_count = pd.NamedAgg('pickup_hex_id', 'nunique'),
                                fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'), 
                                gross_orders = pd.NamedAgg('order_id', 'nunique'), 
                                net_orders = pd.NamedAgg('net_orders', 'sum'),
                                surged_fe = pd.NamedAgg('surged_fe', 'nunique'),
                                surged_net = pd.NamedAgg('surged_net', 'nunique'),
                                cobra = pd.NamedAgg('cobra', 'nunique'),
                                ocara = pd.NamedAgg('ocara', 'nunique'),
                                cobrm = pd.NamedAgg('cobrm', 'nunique'),
                                stockout = pd.NamedAgg('stockout', 'nunique'),
                                expiry_mapped = pd.NamedAgg('expiry_mapped', 'nunique'),
                                accepted_orders = pd.NamedAgg('accepted_orders', 'nunique'),
                                discount = pd.NamedAgg('discount', 'sum'), 
                                sub_total = pd.NamedAgg('sub_total', 'sum'),
                                final_amount = pd.NamedAgg('final_amount', 'sum'),
                                dynamic_surge = pd.NamedAgg('dynamic_surge', 'sum'),
                                dynamic_fare = pd.NamedAgg('dynamic_fare', 'sum'),
                                rate_card_amount = pd.NamedAgg('rate_card_amount', 'sum'),
                                mean_ride_distance = pd.NamedAgg('ride_distance', 'mean'),
                                ride_distance = pd.NamedAgg('ride_distance', 'sum'),
                                surge_rate_card_amount = pd.NamedAgg('surge_rate_card_amount', 'sum'),
                                surge_dynamic_surge = pd.NamedAgg('surge_dynamic_surge', 'sum'),
                                surge_dynamic_fare = pd.NamedAgg('surge_dynamic_fare', 'sum')
                                ) \
                            .reset_index()

## Adding funnel 
df_analysis_post_group_v2['fe2rr'] = (df_analysis_post_group_v2['gross_orders']*100.0/df_analysis_post_group_v2['fe_count']).round(2)
df_analysis_post_group_v2['g2n'] = (df_analysis_post_group_v2['net_orders']*100.0/df_analysis_post_group_v2['gross_orders']).round(2)
df_analysis_post_group_v2['fe2net'] = (df_analysis_post_group_v2['net_orders']*100.0/df_analysis_post_group_v2['fe_count']).round(2)
df_analysis_post_group_v2['aor'] = (df_analysis_post_group_v2['accepted_orders']*100.0/df_analysis_post_group_v2['gross_orders']).round(2)
df_analysis_post_group_v2['surged_fe_percentage'] = (df_analysis_post_group_v2['surged_fe']*100.0/df_analysis_post_group_v2['fe_count']).round(2)
df_analysis_post_group_v2['surge_percentage'] = ((df_analysis_post_group_v2['surge_dynamic_surge']+df_analysis_post_group_v2['surge_dynamic_fare'])*100.0/df_analysis_post_group_v2['surge_rate_card_amount']).round(2)
df_analysis_post_group_v2['ppkm'] = (df_analysis_post_group_v2['final_amount']/df_analysis_post_group_v2['ride_distance']).round(2)
df_analysis_post_group_v2['fe_hex'] = (df_analysis_post_group_v2['fe_count']/df_analysis_post_group_v2['hex_count']).round(2)
df_analysis_post_group_v2['discount_percentage'] = (df_analysis_post_group_v2['discount']*100.0/df_analysis_post_group_v2['sub_total']).round(2)

In [37]:
## Adding pre post to column 

def add_pre_post():
    
    ##vpre
    pre_new_column_names = [col + "_pre" for col in df_analysis_pre_group_v2.columns]
    df_analysis_pre_group_v2.columns = pre_new_column_names
    
    ## post
    post_new_column_names = [col + "_post" for col in df_analysis_post_group_v2.columns]
    df_analysis_post_group_v2.columns = post_new_column_names
    
    
add_pre_post()

In [38]:
view_2  = pd.merge(df_analysis_pre_group_v2,
             df_analysis_post_group_v2,
             how = 'inner',
             left_on = ['affluence_pre', 'group_tc_pre', 'time_period_pre'],
             right_on = ['affluence_post', 'group_tc_post', 'time_period_post']
            )

view_2.rename(columns = {'affluence_pre' : 'affluence', 
                         'group_tc_pre' : 'group_tc', 
                         'time_period_pre' : 'time_period',
                         'hex_count_pre' : 'hex_count'}, inplace = True)

view_2['fe2rr_delta'] = view_2['fe2rr_post'] - view_2['fe2rr_pre']
view_2['g2n_delta'] = view_2['g2n_post'] - view_2['g2n_pre']
view_2['fe2net_delta'] = view_2['fe2net_post'] - view_2['fe2net_pre']
view_2['aor_delta'] = view_2['aor_post'] - view_2['aor_pre']
view_2['surged_fe_delta'] = view_2['surged_fe_percentage_post'] - view_2['surged_fe_percentage_pre']
view_2['ppkm_delta'] = view_2['ppkm_pre'] - view_2['ppkm_post']
view_2['surge_percentage_delta'] = view_2['surge_percentage_post'] - view_2['surge_percentage_pre']
view_2['discount_percentage_delta'] = view_2['discount_percentage_post'] - view_2['discount_percentage_pre']

In [39]:
view_2.head(3)

Unnamed: 0,affluence,group_tc,time_period,hex_count,fe_count_pre,gross_orders_pre,net_orders_pre,surged_fe_pre,surged_net_pre,cobra_pre,ocara_pre,cobrm_pre,stockout_pre,expiry_mapped_pre,accepted_orders_pre,discount_pre,sub_total_pre,final_amount_pre,dynamic_surge_pre,dynamic_fare_pre,rate_card_amount_pre,mean_ride_distance_pre,ride_distance_pre,surge_rate_card_amount_pre,surge_dynamic_surge_pre,...,dynamic_surge_post,dynamic_fare_post,rate_card_amount_post,mean_ride_distance_post,ride_distance_post,surge_rate_card_amount_post,surge_dynamic_surge_post,surge_dynamic_fare_post,fe2rr_post,g2n_post,fe2net_post,aor_post,surged_fe_percentage_post,surge_percentage_post,ppkm_post,fe_hex_post,discount_percentage_post,fe2rr_delta,g2n_delta,fe2net_delta,aor_delta,surged_fe_delta,ppkm_delta,surge_percentage_delta,discount_percentage_delta
0,High Affluence,CONTROL,1.Morning Peak,12,90268,17013,7366.0,24142,1364,5715,2700,27,2,1178,9975,253978.0,1638509.0,8748384.0,807897.54,0.0,2027704.64,6.977048,631911.224934,2027704.64,807897.54,...,789657.94,0.0,1865366.18,6.940617,476029.170064,1865366.18,789657.94,0.0,18.67,46.88,8.75,64.39,31.72,42.33,14.23,5699.67,9.24,-0.18,3.58,0.59,5.76,4.98,-0.39,2.49,-6.26
1,High Affluence,CONTROL,2.Afternoon,12,68539,11544,6936.0,5201,228,2001,2320,31,3,223,9368,181627.0,1082269.0,6289278.0,101468.43,0.0,448395.63,7.178052,492421.565271,448395.63,101468.43,...,46598.36,0.0,229339.22,7.186581,398230.035539,229339.22,46598.36,0.0,18.6,58.11,10.81,80.93,4.82,20.32,12.66,4613.58,10.46,1.76,-1.97,0.69,-0.22,-2.77,0.11,-2.31,-6.32
2,High Affluence,CONTROL,3.Evening Peak,12,127429,22394,9820.0,26866,1300,6962,4100,13,2,1435,14286,194773.0,1957094.0,11592446.0,575997.11,0.0,2192560.5,6.739496,861300.809475,2192560.5,575997.11,...,653133.42,0.0,2386479.01,6.513645,721197.259378,2386479.01,653133.42,0.0,17.27,43.31,7.48,63.55,27.76,27.37,13.77,9204.0,6.79,-0.3,-0.54,-0.23,-0.24,6.68,-0.31,1.1,-3.16


In [40]:
df_analysis_pre_post_v2_dsm = pd.read_clipboard()
df_analysis_pre_post_v2_dsm.head(3)

Unnamed: 0,affluence,group_tc,time_period,demand_per_day_pre,demand_per_day_post,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,demand_pd_delta,mismatch_qr_pd_delta
0,High Affluence,CONTROL,1.Morning Peak,995.93,937.58,375.4,381.75,143.53,143.75,-58.35,0.22
1,High Affluence,TEST,1.Morning Peak,1194.4,1118.17,240.4,210.92,168.27,168.83,-76.23,0.56
2,Low Affluence,CONTROL,1.Morning Peak,1252.8,1227.17,355.8,310.5,255.4,265.17,-25.63,9.77


In [41]:
df_analysis_pre_post_v2 = view_2.merge(df_analysis_pre_post_v2_dsm,
                                                              how = 'inner',
                                                              on = ['affluence','group_tc','time_period']
                                                             )
df_analysis_pre_post_v2.head(3)

Unnamed: 0,affluence,group_tc,time_period,hex_count,fe_count_pre,gross_orders_pre,net_orders_pre,surged_fe_pre,surged_net_pre,cobra_pre,ocara_pre,cobrm_pre,stockout_pre,expiry_mapped_pre,accepted_orders_pre,discount_pre,sub_total_pre,final_amount_pre,dynamic_surge_pre,dynamic_fare_pre,rate_card_amount_pre,mean_ride_distance_pre,ride_distance_pre,surge_rate_card_amount_pre,surge_dynamic_surge_pre,...,fe2rr_post,g2n_post,fe2net_post,aor_post,surged_fe_percentage_post,surge_percentage_post,ppkm_post,fe_hex_post,discount_percentage_post,fe2rr_delta,g2n_delta,fe2net_delta,aor_delta,surged_fe_delta,ppkm_delta,surge_percentage_delta,discount_percentage_delta,demand_per_day_pre,demand_per_day_post,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,demand_pd_delta,mismatch_qr_pd_delta
0,High Affluence,CONTROL,1.Morning Peak,12,90268,17013,7366.0,24142,1364,5715,2700,27,2,1178,9975,253978.0,1638509.0,8748384.0,807897.54,0.0,2027704.64,6.977048,631911.224934,2027704.64,807897.54,...,18.67,46.88,8.75,64.39,31.72,42.33,14.23,5699.67,9.24,-0.18,3.58,0.59,5.76,4.98,-0.39,2.49,-6.26,995.93,937.58,375.4,381.75,143.53,143.75,-58.35,0.22
1,High Affluence,CONTROL,2.Afternoon,12,68539,11544,6936.0,5201,228,2001,2320,31,3,223,9368,181627.0,1082269.0,6289278.0,101468.43,0.0,448395.63,7.178052,492421.565271,448395.63,101468.43,...,18.6,58.11,10.81,80.93,4.82,20.32,12.66,4613.58,10.46,1.76,-1.97,0.69,-0.22,-2.77,0.11,-2.31,-6.32,687.13,766.08,502.33,426.17,115.87,145.67,78.95,29.8
2,High Affluence,CONTROL,3.Evening Peak,12,127429,22394,9820.0,26866,1300,6962,4100,13,2,1435,14286,194773.0,1957094.0,11592446.0,575997.11,0.0,2192560.5,6.739496,861300.809475,2192560.5,575997.11,...,17.27,43.31,7.48,63.55,27.76,27.37,13.77,9204.0,6.79,-0.3,-0.54,-0.23,-0.24,6.68,-0.31,1.1,-3.16,1294.53,1424.0,506.07,489.0,130.6,149.0,129.47,18.4


In [42]:
df_analysis_pre_post_v2_final = df_analysis_pre_post_v2[['affluence', 'group_tc', 'time_period', 'hex_count',
                                  'fe2rr_pre', 'fe2rr_post', 'fe2rr_delta',
                                  'g2n_pre', 'g2n_post', 'g2n_delta',
                                  'fe2net_pre', 'fe2net_post', 'fe2net_delta', 'aor_pre', 'aor_post', 'aor_delta',
                                  'demand_per_day_pre', 'demand_per_day_post', 'demand_pd_delta',
                                  'supply_per_day_pre','supply_per_day_post',
                                  'mismatch_qr_per_day_pre', 'mismatch_qr_per_day_post', 'mismatch_qr_pd_delta',
                                  'ppkm_pre', 'ppkm_post', 'ppkm_delta',
                                  'fe_hex_pre', 'fe_hex_post',
                                  'surged_fe_percentage_pre', 'surged_fe_percentage_post', 'surged_fe_delta',
                                  'surge_percentage_pre', 'surge_percentage_post', 'surge_percentage_delta',
                                  'discount_percentage_pre', 'discount_percentage_post', 'discount_percentage_delta',
                                  'fe_count_pre', 'fe_count_post',
                                  'gross_orders_pre', 'gross_orders_post',
                                  'net_orders_pre', 'net_orders_post',
                                  'surged_fe_pre', 'surged_fe_post',
                                  'surged_net_pre', 'surged_net_post',
                                  'discount_pre', 'discount_post',
                                  'sub_total_pre', 'sub_total_post',
                                  'cobra_pre', 'cobra_post',
                                  'ocara_pre', 'ocara_post',
                                  'cobrm_pre', 'cobrm_post',
                                  'expiry_mapped_pre', 'expiry_mapped_post',
                                  'stockout_pre', 'stockout_post',
                                  'accepted_orders_pre', 'accepted_orders_post'
                                 ]]
df_analysis_pre_post_v2_final = df_analysis_pre_post_v2_final.sort_values(['time_period','affluence','group_tc'])

In [43]:
df_analysis_pre_post_v2_final

Unnamed: 0,affluence,group_tc,time_period,hex_count,fe2rr_pre,fe2rr_post,fe2rr_delta,g2n_pre,g2n_post,g2n_delta,fe2net_pre,fe2net_post,fe2net_delta,aor_pre,aor_post,aor_delta,demand_per_day_pre,demand_per_day_post,demand_pd_delta,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,mismatch_qr_pd_delta,ppkm_pre,...,fe_count_post,gross_orders_pre,gross_orders_post,net_orders_pre,net_orders_post,surged_fe_pre,surged_fe_post,surged_net_pre,surged_net_post,discount_pre,discount_post,sub_total_pre,sub_total_post,cobra_pre,cobra_post,ocara_pre,ocara_post,cobrm_pre,cobrm_post,expiry_mapped_pre,expiry_mapped_post,stockout_pre,stockout_post,accepted_orders_pre,accepted_orders_post
0,High Affluence,CONTROL,1.Morning Peak,12,18.85,18.67,-0.18,43.3,46.88,3.58,8.16,8.75,0.59,58.63,64.39,5.76,995.93,937.58,-58.35,375.4,381.75,143.53,143.75,0.22,13.84,...,68396,17013,12770,7366.0,5986.0,24142,21695,1364,1137,253978.0,105319.0,1638509.0,1140431.0,5715,3887,2700,2173,27,27,1178,663,2,1,9975,8223
4,High Affluence,TEST,1.Morning Peak,13,22.1,24.3,2.2,43.94,49.37,5.43,9.71,12.0,2.29,56.53,64.29,7.76,1194.4,1118.17,-76.23,240.4,210.92,168.27,168.83,0.56,13.84,...,64854,20316,15759,8926.0,7780.0,19454,236,1176,35,165234.0,83117.0,1740414.0,1254134.0,6526,4603,2439,2164,10,9,2390,1180,2,4,11485,10132
8,Low Affluence,CONTROL,1.Morning Peak,22,20.82,19.95,-0.87,40.89,46.01,5.12,8.51,9.18,0.67,58.65,66.2,7.55,1252.8,1227.17,-25.63,355.8,310.5,255.4,265.17,9.77,13.09,...,82882,21582,16535,8824.0,7607.0,16982,20276,988,1139,252537.0,115516.0,2088877.0,1512296.0,7093,4699,3696,3038,39,61,1858,1076,13,8,12657,10946
12,Low Affluence,TEST,1.Morning Peak,23,17.47,19.67,2.2,46.41,46.12,-0.29,8.11,9.07,0.96,67.76,67.51,-0.25,1094.87,1240.42,145.55,464.4,411.92,223.93,238.58,14.65,13.4,...,84807,17991,16679,8349.0,7692.0,24807,447,1261,57,251308.0,136001.0,1867918.0,1580543.0,5075,4804,3655,3289,22,16,845,840,3,0,12191,11260
1,High Affluence,CONTROL,2.Afternoon,12,16.84,18.6,1.76,60.08,58.11,-1.97,10.12,10.81,0.69,81.15,80.93,-0.22,687.13,766.08,78.95,502.33,426.17,115.87,145.67,29.8,12.77,...,55363,11544,10299,6936.0,5985.0,5201,2666,228,166,181627.0,92774.0,1082269.0,887023.0,2001,1825,2320,2225,31,24,223,212,3,3,9368,8335
5,High Affluence,TEST,2.Afternoon,13,16.84,18.73,1.89,59.45,56.06,-3.39,10.01,10.5,0.49,80.33,76.72,-3.61,697.13,739.67,42.54,484.0,383.67,129.53,153.67,24.14,13.1,...,53156,11147,9957,6627.0,5582.0,9489,3064,387,177,112072.0,58887.0,961419.0,802181.0,1993,2048,2211,1913,4,13,275,370,1,2,8954,7639
9,Low Affluence,CONTROL,2.Afternoon,22,16.31,16.67,0.36,55.88,54.62,-1.26,9.11,9.1,-0.01,80.95,80.62,-0.33,950.2,1077.75,127.55,632.2,464.75,211.0,264.67,53.67,12.99,...,81523,14782,13589,8260.0,7422.0,14353,13998,637,588,209192.0,113544.0,1474667.0,1274667.0,2533,2376,3468,3275,41,46,415,422,8,6,11966,10955
13,Low Affluence,TEST,2.Afternoon,23,15.13,16.89,1.76,55.81,51.76,-4.05,8.44,8.74,0.3,83.75,79.24,-4.51,836.4,978.33,141.93,700.8,514.75,157.2,215.83,58.63,13.11,...,74102,13213,12516,7374.0,6478.0,19090,3173,917,159,206645.0,114939.0,1394289.0,1224316.0,2119,2405,3385,3236,11,7,269,342,3,3,11066,9918
2,High Affluence,CONTROL,3.Evening Peak,12,17.57,17.27,-0.3,43.85,43.31,-0.54,7.71,7.48,-0.23,63.79,63.55,-0.24,1294.53,1424.0,129.47,506.07,489.0,130.6,149.0,18.4,13.46,...,110448,22394,19073,9820.0,8260.0,26866,30656,1300,1141,194773.0,109824.0,1957094.0,1617826.0,6962,5911,4100,3591,13,16,1435,1249,2,0,14286,12120
6,High Affluence,TEST,3.Evening Peak,13,16.2,16.49,0.29,40.36,38.11,-2.25,6.54,6.28,-0.26,58.38,57.09,-1.29,1150.67,1215.08,64.41,573.53,485.17,140.6,160.17,19.57,13.73,...,100919,18954,16637,7649.0,6341.0,28034,18216,1027,391,132082.0,78252.0,1648109.0,1386780.0,6251,5600,3169,2945,13,23,1817,1684,2,6,11066,9498


In [44]:
df_analysis_pre_post_v2_final.to_clipboard(index=False)

## Analysis view 3 - On Hold

In [None]:
## Pre

df_analysis_pre_group_v3 = df_pre_raw \
                            .groupby(['affluence', 'group_tc','weekday']) \
                            .agg(
                                hex_count = pd.NamedAgg('pickup_hex_id', 'nunique'),
                                fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'), 
                                gross_orders = pd.NamedAgg('order_id', 'nunique'), 
                                net_orders = pd.NamedAgg('net_orders', 'sum'),
                                surged_fe = pd.NamedAgg('surged_fe', 'nunique'),
                                surged_net = pd.NamedAgg('surged_net', 'nunique'),
                                cobra = pd.NamedAgg('cobra', 'nunique'),
                                ocara = pd.NamedAgg('ocara', 'nunique'),
                                cobrm = pd.NamedAgg('cobrm', 'nunique'),
                                stockout = pd.NamedAgg('stockout', 'nunique'),
                                expiry_mapped = pd.NamedAgg('expiry_mapped', 'nunique'),
                                accepted_orders = pd.NamedAgg('accepted_orders', 'nunique'),
                                discount = pd.NamedAgg('discount', 'sum'), 
                                sub_total = pd.NamedAgg('sub_total', 'sum'),
                                final_amount = pd.NamedAgg('final_amount', 'sum'),
                                dynamic_surge = pd.NamedAgg('dynamic_surge', 'sum'),
                                dynamic_fare = pd.NamedAgg('dynamic_fare', 'sum'),
                                rate_card_amount = pd.NamedAgg('rate_card_amount', 'sum'),
                                mean_ride_distance = pd.NamedAgg('ride_distance', 'mean'),
                                ride_distance = pd.NamedAgg('ride_distance', 'sum'),
                                surge_rate_card_amount = pd.NamedAgg('surge_rate_card_amount', 'sum'),
                                surge_dynamic_surge = pd.NamedAgg('surge_dynamic_surge', 'sum'),
                                surge_dynamic_fare = pd.NamedAgg('surge_dynamic_fare', 'sum')
                                ) \
                            .reset_index()

## Adding funnel 
df_analysis_pre_group_v3['fe2rr'] = (df_analysis_pre_group_v3['gross_orders']*100.0/df_analysis_pre_group_v3['fe_count']).round(2)
df_analysis_pre_group_v3['g2n'] = (df_analysis_pre_group_v3['net_orders']*100.0/df_analysis_pre_group_v3['gross_orders']).round(2)
df_analysis_pre_group_v3['fe2net'] = (df_analysis_pre_group_v3['net_orders']*100.0/df_analysis_pre_group_v3['fe_count']).round(2)
df_analysis_pre_group_v3['aor'] = (df_analysis_pre_group_v3['accepted_orders']*100.0/df_analysis_pre_group_v3['gross_orders']).round(2)
df_analysis_pre_group_v3['surged_fe_percentage'] = (df_analysis_pre_group_v3['surged_fe']*100.0/df_analysis_pre_group_v3['fe_count']).round(2)
df_analysis_pre_group_v3['surge_percentage'] = ((df_analysis_pre_group_v3['surge_dynamic_surge']+df_analysis_pre_group_v3['surge_dynamic_fare'])*100.0/df_analysis_pre_group_v3['surge_rate_card_amount']).round(2)
df_analysis_pre_group_v3['ppkm'] = (df_analysis_pre_group_v3['final_amount']/df_analysis_pre_group_v3['ride_distance']).round(2)
df_analysis_pre_group_v3['fe_hex'] = (df_analysis_pre_group_v3['fe_count']/df_analysis_pre_group_v3['hex_count']).round(2)
df_analysis_pre_group_v3['discount_percentage'] = (df_analysis_pre_group_v3['discount']*100.0/df_analysis_pre_group_v3['sub_total']).round(2)

In [None]:
## Post

df_analysis_post_group_v3 = df_post_raw \
                            .groupby(['affluence', 'group_tc','weekday']) \
                            .agg(
                                hex_count = pd.NamedAgg('pickup_hex_id', 'nunique'),
                                fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'), 
                                gross_orders = pd.NamedAgg('order_id', 'nunique'), 
                                net_orders = pd.NamedAgg('net_orders', 'sum'),
                                surged_fe = pd.NamedAgg('surged_fe', 'nunique'),
                                surged_net = pd.NamedAgg('surged_net', 'nunique'),
                                cobra = pd.NamedAgg('cobra', 'nunique'),
                                ocara = pd.NamedAgg('ocara', 'nunique'),
                                cobrm = pd.NamedAgg('cobrm', 'nunique'),
                                stockout = pd.NamedAgg('stockout', 'nunique'),
                                expiry_mapped = pd.NamedAgg('expiry_mapped', 'nunique'),
                                accepted_orders = pd.NamedAgg('accepted_orders', 'nunique'),
                                discount = pd.NamedAgg('discount', 'sum'),
                                sub_total = pd.NamedAgg('sub_total', 'sum'),
                                final_amount = pd.NamedAgg('final_amount', 'sum'),
                                dynamic_surge = pd.NamedAgg('dynamic_surge', 'sum'),
                                dynamic_fare = pd.NamedAgg('dynamic_fare', 'sum'),
                                rate_card_amount = pd.NamedAgg('rate_card_amount', 'sum'),
                                mean_ride_distance = pd.NamedAgg('ride_distance', 'mean'),
                                ride_distance = pd.NamedAgg('ride_distance', 'sum'),
                                surge_rate_card_amount = pd.NamedAgg('surge_rate_card_amount', 'sum'),
                                surge_dynamic_surge = pd.NamedAgg('surge_dynamic_surge', 'sum'),
                                surge_dynamic_fare = pd.NamedAgg('surge_dynamic_fare', 'sum')
                                ) \
                            .reset_index()

## Adding funnel 
df_analysis_post_group_v3['fe2rr'] = (df_analysis_post_group_v3['gross_orders']*100.0/df_analysis_post_group_v3['fe_count']).round(2)
df_analysis_post_group_v3['g2n'] = (df_analysis_post_group_v3['net_orders']*100.0/df_analysis_post_group_v3['gross_orders']).round(2)
df_analysis_post_group_v3['fe2net'] = (df_analysis_post_group_v3['net_orders']*100.0/df_analysis_post_group_v3['fe_count']).round(2)
df_analysis_post_group_v3['aor'] = (df_analysis_post_group_v3['accepted_orders']*100.0/df_analysis_post_group_v3['gross_orders']).round(2)
df_analysis_post_group_v3['surged_fe_percentage'] = (df_analysis_post_group_v3['surged_fe']*100.0/df_analysis_post_group_v3['fe_count']).round(2)
df_analysis_post_group_v3['surge_percentage'] = ((df_analysis_post_group_v3['surge_dynamic_surge']+df_analysis_post_group_v3['surge_dynamic_fare'])*100.0/df_analysis_post_group_v3['surge_rate_card_amount']).round(2)
df_analysis_post_group_v3['ppkm'] = (df_analysis_post_group_v3['final_amount']/df_analysis_post_group_v3['ride_distance']).round(2)
df_analysis_post_group_v3['fe_hex'] = (df_analysis_post_group_v3['fe_count']/df_analysis_post_group_v3['hex_count']).round(2)
df_analysis_post_group_v3['discount_percentage'] = (df_analysis_post_group_v3['discount']*100.0/df_analysis_post_group_v3['sub_total']).round(2)

In [None]:
## Adding pre post to column 

def add_pre_post():
    
    ##vpre
    pre_new_column_names = [col + "_pre" for col in df_analysis_pre_group_v3.columns]
    df_analysis_pre_group_v3.columns = pre_new_column_names
    
    ## post
    post_new_column_names = [col + "_post" for col in df_analysis_post_group_v3.columns]
    df_analysis_post_group_v3.columns = post_new_column_names
    
    
add_pre_post()

In [None]:
view_3  = pd.merge(df_analysis_pre_group_v3,
             df_analysis_post_group_v3,
             how = 'inner',
             left_on = ['affluence_pre', 'group_tc_pre', 'weekday_pre'],
             right_on = ['affluence_post', 'group_tc_post', 'weekday_post']
            )

view_3.rename(columns = {'affluence_pre' : 'affluence', 
                         'group_tc_pre' : 'group_tc', 
                         'weekday_pre' : 'weekday',
                         'hex_count_pre' : 'hex_count'}, inplace = True)

view_3['fe2rr_delta'] = view_3['fe2rr_post'] - view_3['fe2rr_pre']
view_3['g2n_delta'] = view_3['g2n_post'] - view_3['g2n_pre']
view_3['fe2net_delta'] = view_3['fe2net_post'] - view_3['fe2net_pre']
view_3['aor_delta'] = view_3['aor_post'] - view_3['aor_pre']
view_3['surged_fe_delta'] = view_3['surged_fe_percentage_post'] - view_3['surged_fe_percentage_pre']
view_3['ppkm_delta'] = view_3['ppkm_pre'] - view_3['ppkm_post']
view_3['surge_percentage_delta'] = view_3['surge_percentage_post'] - view_3['surge_percentage_pre']
view_3['discount_percentage_delta'] = view_3['discount_percentage_post'] - view_3['discount_percentage_pre']

In [None]:
view_3.head(3)

In [None]:
df_analysis_pre_post_v3_dsm = pd.read_clipboard()
df_analysis_pre_post_v3_dsm.head(3)

In [None]:
df_analysis_pre_post_v3 = view_3.merge(df_analysis_pre_post_v3_dsm,
                                      how = 'inner',
                                      on = ['affluence','group_tc','weekday']
                                     )
df_analysis_pre_post_v3.head(3)

In [None]:
df_analysis_pre_post_v3_final = df_analysis_pre_post_v3[['affluence', 'group_tc', 'weekday', 'hex_count',
                                  'fe2rr_pre', 'fe2rr_post', 'fe2rr_delta',
                                  'g2n_pre', 'g2n_post', 'g2n_delta',
                                  'fe2net_pre', 'fe2net_post', 'fe2net_delta', 'aor_pre', 'aor_post', 'aor_delta',
                                  'demand_per_day_pre', 'demand_per_day_post', 'demand_pd_delta',
                                  'supply_per_day_pre','supply_per_day_post',
                                  'mismatch_qr_per_day_pre', 'mismatch_qr_per_day_post', 'mismatch_qr_pd_delta',
                                  'ppkm_pre', 'ppkm_post', 'ppkm_delta',
                                  'fe_hex_pre', 'fe_hex_post',
                                  'surged_fe_percentage_pre', 'surged_fe_percentage_post', 'surged_fe_delta',
                                  'surge_percentage_pre', 'surge_percentage_post', 'surge_percentage_delta',
                                  'discount_percentage_pre', 'discount_percentage_post', 'discount_percentage_delta',
                                  'fe_count_pre', 'fe_count_post',
                                  'gross_orders_pre', 'gross_orders_post',
                                  'net_orders_pre', 'net_orders_post',
                                  'surged_fe_pre', 'surged_fe_post',
                                  'surged_net_pre', 'surged_net_post',
                                  'discount_pre', 'discount_post',
                                  'sub_total_pre', 'sub_total_post',
                                  'cobra_pre', 'cobra_post',
                                  'ocara_pre', 'ocara_post',
                                  'cobrm_pre', 'cobrm_post',
                                  'expiry_mapped_pre', 'expiry_mapped_post',
                                  'stockout_pre', 'stockout_post',
                                  'accepted_orders_pre', 'accepted_orders_post'
                                 ]]
df_analysis_pre_post_v3_final = df_analysis_pre_post_v3_final.sort_values(['weekday','affluence','group_tc'])

In [None]:
df_analysis_pre_post_v3_final

In [None]:
df_analysis_pre_post_v3_final.to_clipboard(index=False)

## Analysis view 4

In [89]:
## Pre

df_analysis_pre_group_v4 = df_pre_raw \
                            .groupby(['affluence', 'group_tc','pickup_location','pickup_hex_id']) \
                            .agg(
                                hex_count = pd.NamedAgg('pickup_hex_id', 'nunique'),
                                fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'), 
                                gross_orders = pd.NamedAgg('order_id', 'nunique'), 
                                net_orders = pd.NamedAgg('net_orders', 'sum'),
                                surged_fe = pd.NamedAgg('surged_fe', 'nunique'),
                                surged_net = pd.NamedAgg('surged_net', 'nunique'),
                                cobra = pd.NamedAgg('cobra', 'nunique'),
                                ocara = pd.NamedAgg('ocara', 'nunique'),
                                cobrm = pd.NamedAgg('cobrm', 'nunique'),
                                stockout = pd.NamedAgg('stockout', 'nunique'),
                                expiry_mapped = pd.NamedAgg('expiry_mapped', 'nunique'),
                                accepted_orders = pd.NamedAgg('accepted_orders', 'nunique'),
                                discount = pd.NamedAgg('discount', 'sum'),
                                sub_total = pd.NamedAgg('sub_total', 'sum'),
                                final_amount = pd.NamedAgg('final_amount', 'sum'),
                                dynamic_surge = pd.NamedAgg('dynamic_surge', 'sum'),
                                dynamic_fare = pd.NamedAgg('dynamic_fare', 'sum'),
                                rate_card_amount = pd.NamedAgg('rate_card_amount', 'sum'),
                                mean_ride_distance = pd.NamedAgg('ride_distance', 'mean'),
                                ride_distance = pd.NamedAgg('ride_distance', 'sum'),
                                surge_rate_card_amount = pd.NamedAgg('surge_rate_card_amount', 'sum'),
                                surge_dynamic_surge = pd.NamedAgg('surge_dynamic_surge', 'sum'),
                                surge_dynamic_fare = pd.NamedAgg('surge_dynamic_fare', 'sum')
                                ) \
                            .reset_index()

## Adding funnel 
df_analysis_pre_group_v4['fe2rr'] = (df_analysis_pre_group_v4['gross_orders']*100.0/df_analysis_pre_group_v4['fe_count']).round(2)
df_analysis_pre_group_v4['g2n'] = (df_analysis_pre_group_v4['net_orders']*100.0/df_analysis_pre_group_v4['gross_orders']).round(2)
df_analysis_pre_group_v4['fe2net'] = (df_analysis_pre_group_v4['net_orders']*100.0/df_analysis_pre_group_v4['fe_count']).round(2)
df_analysis_pre_group_v4['aor'] = (df_analysis_pre_group_v4['accepted_orders']*100.0/df_analysis_pre_group_v4['gross_orders']).round(2)
df_analysis_pre_group_v4['surged_fe_percentage'] = (df_analysis_pre_group_v4['surged_fe']*100.0/df_analysis_pre_group_v4['fe_count']).round(2)
df_analysis_pre_group_v4['surge_percentage'] = ((df_analysis_pre_group_v4['surge_dynamic_surge']+df_analysis_pre_group_v4['surge_dynamic_fare'])*100.0/df_analysis_pre_group_v4['surge_rate_card_amount']).round(2)
df_analysis_pre_group_v4['ppkm'] = (df_analysis_pre_group_v4['final_amount']/df_analysis_pre_group_v4['ride_distance']).round(2)
df_analysis_pre_group_v4['fe_hex'] = (df_analysis_pre_group_v4['fe_count']/df_analysis_pre_group_v4['hex_count']).round(2)
df_analysis_pre_group_v4['discount_percentage'] = (df_analysis_pre_group_v4['discount']*100.0/df_analysis_pre_group_v4['sub_total']).round(2)

In [90]:
## Post

df_analysis_post_group_v4 = df_post_raw \
                            .groupby(['affluence', 'group_tc','pickup_location', 'pickup_hex_id']) \
                            .agg(
                                hex_count = pd.NamedAgg('pickup_hex_id', 'nunique'),
                                fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'), 
                                gross_orders = pd.NamedAgg('order_id', 'nunique'), 
                                net_orders = pd.NamedAgg('net_orders', 'sum'),
                                surged_fe = pd.NamedAgg('surged_fe', 'nunique'),
                                surged_net = pd.NamedAgg('surged_net', 'nunique'),
                                cobra = pd.NamedAgg('cobra', 'nunique'),
                                ocara = pd.NamedAgg('ocara', 'nunique'),
                                cobrm = pd.NamedAgg('cobrm', 'nunique'),
                                stockout = pd.NamedAgg('stockout', 'nunique'),
                                expiry_mapped = pd.NamedAgg('expiry_mapped', 'nunique'),
                                accepted_orders = pd.NamedAgg('accepted_orders', 'nunique'),
                                discount = pd.NamedAgg('discount', 'sum'),
                                sub_total = pd.NamedAgg('sub_total', 'sum'),
                                final_amount = pd.NamedAgg('final_amount', 'sum'),
                                dynamic_surge = pd.NamedAgg('dynamic_surge', 'sum'),
                                dynamic_fare = pd.NamedAgg('dynamic_fare', 'sum'),
                                rate_card_amount = pd.NamedAgg('rate_card_amount', 'sum'),
                                mean_ride_distance = pd.NamedAgg('ride_distance', 'mean'),
                                ride_distance = pd.NamedAgg('ride_distance', 'sum'),
                                surge_rate_card_amount = pd.NamedAgg('surge_rate_card_amount', 'sum'),
                                surge_dynamic_surge = pd.NamedAgg('surge_dynamic_surge', 'sum'),
                                surge_dynamic_fare = pd.NamedAgg('surge_dynamic_fare', 'sum')
                                ) \
                            .reset_index()

## Adding funnel 
df_analysis_post_group_v4['fe2rr'] = (df_analysis_post_group_v4['gross_orders']*100.0/df_analysis_post_group_v4['fe_count']).round(2)
df_analysis_post_group_v4['g2n'] = (df_analysis_post_group_v4['net_orders']*100.0/df_analysis_post_group_v4['gross_orders']).round(2)
df_analysis_post_group_v4['fe2net'] = (df_analysis_post_group_v4['net_orders']*100.0/df_analysis_post_group_v4['fe_count']).round(2)
df_analysis_post_group_v4['aor'] = (df_analysis_post_group_v4['accepted_orders']*100.0/df_analysis_post_group_v4['gross_orders']).round(2)
df_analysis_post_group_v4['surged_fe_percentage'] = (df_analysis_post_group_v4['surged_fe']*100.0/df_analysis_post_group_v4['fe_count']).round(2)
df_analysis_post_group_v4['surge_percentage'] = ((df_analysis_post_group_v4['surge_dynamic_surge']+df_analysis_post_group_v4['surge_dynamic_fare'])*100.0/df_analysis_post_group_v4['surge_rate_card_amount']).round(2)
df_analysis_post_group_v4['ppkm'] = (df_analysis_post_group_v4['final_amount']/df_analysis_post_group_v4['ride_distance']).round(2)
df_analysis_post_group_v4['fe_hex'] = (df_analysis_post_group_v4['fe_count']/df_analysis_post_group_v4['hex_count']).round(2)
df_analysis_post_group_v4['discount_percentage'] = (df_analysis_post_group_v4['discount']*100.0/df_analysis_post_group_v4['sub_total']).round(2)

In [91]:
## Adding pre post to column 

def add_pre_post():
    
    ##vpre
    pre_new_column_names = [col + "_pre" for col in df_analysis_pre_group_v4.columns]
    df_analysis_pre_group_v4.columns = pre_new_column_names
    
    ## post
    post_new_column_names = [col + "_post" for col in df_analysis_post_group_v4.columns]
    df_analysis_post_group_v4.columns = post_new_column_names
    
    
add_pre_post()

In [92]:
view_4  = pd.merge(df_analysis_pre_group_v4,
             df_analysis_post_group_v4,
             how = 'inner',
             left_on = ['affluence_pre', 'group_tc_pre', 'pickup_hex_id_pre'],
             right_on = ['affluence_post', 'group_tc_post', 'pickup_hex_id_post']
            )

view_4.rename(columns = {'affluence_pre' : 'affluence', 
                         'group_tc_pre' : 'group_tc', 
                         'pickup_location_pre' : 'pickup_location',
                         'pickup_hex_id_pre' : 'pickup_hex_id',
                         'hex_count_pre' : 'hex_count'}, inplace = True)

view_4['fe2rr_delta'] = view_4['fe2rr_post'] - view_4['fe2rr_pre']
view_4['g2n_delta'] = view_4['g2n_post'] - view_4['g2n_pre']
view_4['fe2net_delta'] = view_4['fe2net_post'] - view_4['fe2net_pre']
view_4['aor_delta'] = view_4['aor_post'] - view_4['aor_pre']
view_4['surged_fe_delta'] = view_4['surged_fe_percentage_post'] - view_4['surged_fe_percentage_pre']
view_4['ppkm_delta'] = view_4['ppkm_pre'] - view_4['ppkm_post']
view_4['surge_percentage_delta'] = view_4['surge_percentage_post'] - view_4['surge_percentage_pre']
view_4['surged_fe_percentage_delta'] = view_4['surged_fe_percentage_post'] - view_4['surged_fe_percentage_pre']
view_4['discount_percentage_delta'] = view_4['discount_percentage_post'] - view_4['discount_percentage_pre']

In [93]:
view_4.head(5)

Unnamed: 0,affluence,group_tc,pickup_location,pickup_hex_id,hex_count,fe_count_pre,gross_orders_pre,net_orders_pre,surged_fe_pre,surged_net_pre,cobra_pre,ocara_pre,cobrm_pre,stockout_pre,expiry_mapped_pre,accepted_orders_pre,discount_pre,sub_total_pre,final_amount_pre,dynamic_surge_pre,dynamic_fare_pre,rate_card_amount_pre,mean_ride_distance_pre,ride_distance_pre,surge_rate_card_amount_pre,...,dynamic_fare_post,rate_card_amount_post,mean_ride_distance_post,ride_distance_post,surge_rate_card_amount_post,surge_dynamic_surge_post,surge_dynamic_fare_post,fe2rr_post,g2n_post,fe2net_post,aor_post,surged_fe_percentage_post,surge_percentage_post,ppkm_post,fe_hex_post,discount_percentage_post,fe2rr_delta,g2n_delta,fe2net_delta,aor_delta,surged_fe_delta,ppkm_delta,surge_percentage_delta,surged_fe_percentage_delta,discount_percentage_delta
0,High Affluence,CONTROL,Akshaynagar,88618926adfffff,1,10302,1799,672.0,355,11,608,273,1,3,239,972,10632.0,193343.0,1169004.0,19285.35,0.0,39760.57,8.959562,92865.863178,39760.57,...,0.0,112694.76,8.918794,71296.839015,112694.76,54485.09,0.0,17.54,45.62,8.0,63.65,12.6,48.35,13.27,7938.0,4.27,0.08,8.27,1.48,9.62,9.15,-0.68,-0.15,9.15,-1.23
1,High Affluence,CONTROL,Banashankri North,8860145a61fffff,1,10995,2074,1013.0,866,42,552,396,8,5,93,1446,27445.0,183109.0,884027.0,13841.52,0.0,64464.8,6.001053,66155.613207,64464.8,...,0.0,105916.5,5.843301,64702.87432,105916.5,25659.71,0.0,17.65,46.1,8.14,64.51,14.05,24.23,13.61,11047.0,9.48,-1.21,-2.74,-1.07,-5.21,6.17,-0.25,2.76,6.17,-5.51
2,High Affluence,CONTROL,Chanasandra,88618921c7fffff,1,10655,2235,1200.0,533,34,512,403,9,1,104,1617,32175.0,211510.0,1119770.0,13759.49,0.0,54561.2,7.949945,84953.113739,54561.2,...,0.0,66273.73,8.414653,70329.669916,66273.73,17414.21,0.0,20.05,58.82,11.79,78.06,7.99,26.28,13.35,8345.0,9.66,-0.93,5.13,0.53,5.71,2.99,-0.17,1.06,2.99,-5.55
3,High Affluence,CONTROL,Electronic City,8861892639fffff,1,39777,5927,3308.0,10881,558,1302,1203,3,2,95,4535,86343.0,611695.0,4247532.0,337628.68,0.0,891267.62,7.637189,304044.137481,891267.62,...,0.0,734900.5,7.466542,244051.391443,734900.5,265173.58,0.0,14.42,61.29,8.84,84.24,28.81,36.08,14.18,32665.0,8.64,-0.48,5.48,0.52,7.73,1.45,-0.21,-1.8,1.45,-5.48
4,High Affluence,CONTROL,Horamavu,8861892c1dfffff,1,9844,1802,981.0,3368,227,413,318,4,0,82,1311,28554.0,192128.0,1049202.0,131832.04,0.0,311213.21,7.342984,72445.882012,311213.21,...,0.0,254159.47,7.021487,53419.471835,254159.47,91017.69,0.0,18.03,52.55,9.47,72.92,37.8,35.81,14.56,7600.0,9.68,-0.28,-1.89,-0.5,0.17,3.59,-0.08,-6.55,3.59,-5.18


In [94]:
view_4.pickup_hex_id.nunique()

70

In [95]:
df_analysis_pre_post_v4_dsm = pd.read_clipboard()
df_analysis_pre_post_v4_dsm.head(3)

Unnamed: 0,affluence,group_tc,cluster,hex_id,demand_per_day_pre,demand_per_day_post,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,demand_pd_delta,mismatch_qr_pd_delta
0,High Affluence,CONTROL,Akshaynagar,88618926adfffff,101.73,103.33,65.67,64.33,25.67,27.92,1.6,2.25
1,High Affluence,CONTROL,Banashankri North,8860145a61fffff,126.0,149.0,33.33,16.08,48.47,56.42,23.0,7.95
2,High Affluence,CONTROL,Chanasandra,88618921c7fffff,137.0,133.75,106.0,117.75,28.8,31.75,-3.25,2.95


In [96]:
df_analysis_pre_post_v4 = view_4.merge(df_analysis_pre_post_v4_dsm,
                                      how = 'inner',
                                      left_on = ['affluence','group_tc','pickup_hex_id'],
                                      right_on = ['affluence','group_tc','hex_id']
                                     )
df_analysis_pre_post_v4.head(3)

Unnamed: 0,affluence,group_tc,pickup_location,pickup_hex_id,hex_count,fe_count_pre,gross_orders_pre,net_orders_pre,surged_fe_pre,surged_net_pre,cobra_pre,ocara_pre,cobrm_pre,stockout_pre,expiry_mapped_pre,accepted_orders_pre,discount_pre,sub_total_pre,final_amount_pre,dynamic_surge_pre,dynamic_fare_pre,rate_card_amount_pre,mean_ride_distance_pre,ride_distance_pre,surge_rate_card_amount_pre,...,aor_post,surged_fe_percentage_post,surge_percentage_post,ppkm_post,fe_hex_post,discount_percentage_post,fe2rr_delta,g2n_delta,fe2net_delta,aor_delta,surged_fe_delta,ppkm_delta,surge_percentage_delta,surged_fe_percentage_delta,discount_percentage_delta,cluster,hex_id,demand_per_day_pre,demand_per_day_post,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,demand_pd_delta,mismatch_qr_pd_delta
0,High Affluence,CONTROL,Akshaynagar,88618926adfffff,1,10302,1799,672.0,355,11,608,273,1,3,239,972,10632.0,193343.0,1169004.0,19285.35,0.0,39760.57,8.959562,92865.863178,39760.57,...,63.65,12.6,48.35,13.27,7938.0,4.27,0.08,8.27,1.48,9.62,9.15,-0.68,-0.15,9.15,-1.23,Akshaynagar,88618926adfffff,101.73,103.33,65.67,64.33,25.67,27.92,1.6,2.25
1,High Affluence,CONTROL,Banashankri North,8860145a61fffff,1,10995,2074,1013.0,866,42,552,396,8,5,93,1446,27445.0,183109.0,884027.0,13841.52,0.0,64464.8,6.001053,66155.613207,64464.8,...,64.51,14.05,24.23,13.61,11047.0,9.48,-1.21,-2.74,-1.07,-5.21,6.17,-0.25,2.76,6.17,-5.51,Banashankri North,8860145a61fffff,126.0,149.0,33.33,16.08,48.47,56.42,23.0,7.95
2,High Affluence,CONTROL,Chanasandra,88618921c7fffff,1,10655,2235,1200.0,533,34,512,403,9,1,104,1617,32175.0,211510.0,1119770.0,13759.49,0.0,54561.2,7.949945,84953.113739,54561.2,...,78.06,7.99,26.28,13.35,8345.0,9.66,-0.93,5.13,0.53,5.71,2.99,-0.17,1.06,2.99,-5.55,Chanasandra,88618921c7fffff,137.0,133.75,106.0,117.75,28.8,31.75,-3.25,2.95


In [97]:
df_analysis_pre_post_v4_final = df_analysis_pre_post_v4[['affluence', 'group_tc', 'pickup_location', 'pickup_hex_id',
                                                        'fe2rr_pre', 'fe2rr_post', 'fe2rr_delta',
                                  'g2n_pre', 'g2n_post', 'g2n_delta',
                                  'fe2net_pre', 'fe2net_post', 'fe2net_delta', 'aor_pre', 'aor_post', 'aor_delta',
                                  'demand_per_day_pre', 'demand_per_day_post', 'demand_pd_delta',
                                  'supply_per_day_pre','supply_per_day_post',
                                  'mismatch_qr_per_day_pre', 'mismatch_qr_per_day_post', 'mismatch_qr_pd_delta',
                                  'ppkm_pre', 'ppkm_post', 'ppkm_delta',
                                  'fe_hex_pre', 'fe_hex_post',
                                  'surged_fe_percentage_pre', 'surged_fe_percentage_post', 'surged_fe_delta',
                                  'surge_percentage_pre', 'surge_percentage_post', 'surge_percentage_delta',
                                  'discount_percentage_pre', 'discount_percentage_post', 'discount_percentage_delta',
                                  'fe_count_pre', 'fe_count_post',
                                  'gross_orders_pre', 'gross_orders_post',
                                  'net_orders_pre', 'net_orders_post',
                                  'surged_fe_pre', 'surged_fe_post',
                                  'surged_net_pre', 'surged_net_post',
                                  'discount_pre', 'discount_post',
                                  'sub_total_pre', 'sub_total_post',
                                  'cobra_pre', 'cobra_post',
                                  'ocara_pre', 'ocara_post',
                                  'cobrm_pre', 'cobrm_post',
                                  'expiry_mapped_pre', 'expiry_mapped_post',
                                  'stockout_pre', 'stockout_post',
                                  'accepted_orders_pre', 'accepted_orders_post'
                                 ]]
df_analysis_pre_post_v4_final = df_analysis_pre_post_v4_final.sort_values(['affluence','group_tc', 'pickup_location', 'pickup_hex_id'])

In [98]:
df_analysis_pre_post_v4_final

Unnamed: 0,affluence,group_tc,pickup_location,pickup_hex_id,fe2rr_pre,fe2rr_post,fe2rr_delta,g2n_pre,g2n_post,g2n_delta,fe2net_pre,fe2net_post,fe2net_delta,aor_pre,aor_post,aor_delta,demand_per_day_pre,demand_per_day_post,demand_pd_delta,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,mismatch_qr_pd_delta,ppkm_pre,...,fe_count_post,gross_orders_pre,gross_orders_post,net_orders_pre,net_orders_post,surged_fe_pre,surged_fe_post,surged_net_pre,surged_net_post,discount_pre,discount_post,sub_total_pre,sub_total_post,cobra_pre,cobra_post,ocara_pre,ocara_post,cobrm_pre,cobrm_post,expiry_mapped_pre,expiry_mapped_post,stockout_pre,stockout_post,accepted_orders_pre,accepted_orders_post
0,High Affluence,CONTROL,Akshaynagar,88618926adfffff,17.46,17.54,0.08,37.35,45.62,8.27,6.52,8.0,1.48,54.03,63.65,9.62,101.73,103.33,1.6,65.67,64.33,25.67,27.92,2.25,12.59,...,7938,1799,1392,672.0,635.0,355,1000,11,35,10632.0,6314.0,193343.0,147861.0,608,390,273,214,1,0,239,146,3,3,972,886
1,High Affluence,CONTROL,Banashankri North,8860145a61fffff,18.86,17.65,-1.21,48.84,46.1,-2.74,9.21,8.14,-1.07,69.72,64.51,-5.21,126.0,149.0,23.0,33.33,16.08,48.47,56.42,7.95,13.36,...,11047,2074,1950,1013.0,899.0,866,1552,42,52,27445.0,15042.0,183109.0,158589.0,552,573,396,329,8,7,93,134,5,2,1446,1258
2,High Affluence,CONTROL,Chanasandra,88618921c7fffff,20.98,20.05,-0.93,53.69,58.82,5.13,11.26,11.79,0.53,72.35,78.06,5.71,137.0,133.75,-3.25,106.0,117.75,28.8,31.75,2.95,13.18,...,8345,2235,1673,1200.0,984.0,533,667,34,28,32175.0,14770.0,211510.0,152876.0,512,317,403,312,9,9,104,42,1,3,1617,1306
3,High Affluence,CONTROL,Electronic City,8861892639fffff,14.9,14.42,-0.48,55.81,61.29,5.48,8.32,8.84,0.52,76.51,84.24,7.73,357.8,372.92,15.12,265.8,277.75,32.73,39.0,6.27,13.97,...,32665,5927,4709,3308.0,2886.0,10881,9410,558,467,86343.0,39705.0,611695.0,459377.0,1302,746,1203,1007,3,2,95,57,2,0,4535,3967
4,High Affluence,CONTROL,Horamavu,8861892c1dfffff,18.31,18.03,-0.28,54.44,52.55,-1.89,9.97,9.47,-0.5,72.75,72.92,0.17,110.8,103.58,-7.22,76.87,53.17,33.27,34.83,1.56,14.48,...,7600,1802,1370,981.0,720.0,3368,2873,227,222,28554.0,12510.0,192128.0,129206.0,413,325,318,260,4,3,82,57,0,1,1311,999
5,High Affluence,CONTROL,Kammanahalli HRBR Layout,8861892ea5fffff,17.91,17.5,-0.41,54.14,59.53,5.39,9.7,10.42,0.72,73.94,79.75,5.81,353.07,360.5,7.43,197.87,163.92,53.93,60.75,6.82,13.56,...,26365,5663,4613,3066.0,2746.0,4959,4254,241,201,80636.0,38853.0,549175.0,426195.0,1335,863,1069,873,21,10,157,103,2,1,4187,3679
6,High Affluence,CONTROL,Mysore rd,8860145a33fffff,19.58,20.42,0.84,55.86,54.29,-1.57,10.94,11.09,0.15,79.69,77.06,-2.63,80.47,99.33,18.86,80.13,65.58,19.93,31.25,11.32,12.82,...,6106,1305,1247,729.0,677.0,199,302,9,13,19202.0,10720.0,123491.0,122722.0,217,242,305,258,16,15,36,44,2,2,1040,961
7,High Affluence,CONTROL,Ramamurthy Nagar,8861892e37fffff,20.04,18.37,-1.67,45.68,52.0,6.32,9.15,9.55,0.4,62.13,68.9,6.77,202.2,192.33,-9.87,82.0,58.92,44.53,49.25,4.72,13.36,...,14043,3544,2579,1619.0,1341.0,2630,2462,159,137,51562.0,18422.0,346136.0,232981.0,1139,693,550,414,1,2,224,124,0,0,2202,1777
8,High Affluence,CONTROL,Thanisandra,8861892cbdfffff,18.59,19.05,0.46,49.23,48.68,-0.55,9.15,9.27,0.12,74.49,71.66,-2.83,149.6,174.83,25.23,106.67,93.58,36.67,44.83,8.16,13.18,...,11562,2403,2202,1183.0,1072.0,2093,1706,104,83,35397.0,18711.0,252328.0,210857.0,553,548,551,467,8,5,95,103,2,2,1790,1578
9,High Affluence,CONTROL,Venkatapura,88618925c9fffff,17.12,16.99,-0.13,43.36,41.5,-1.86,7.42,7.05,-0.37,66.53,67.19,0.66,1023.13,1112.75,89.62,372.8,354.33,71.27,77.25,5.98,14.04,...,86021,16871,14613,7315.0,6065.0,17951,19850,840,780,147936.0,82312.0,1488324.0,1225499.0,4959,4376,3802,3584,2,7,734,543,0,1,11225,9819


In [99]:
df_analysis_pre_post_v4_final.to_clipboard(index=False)

In [55]:
#df_analysis_pre_post_v4_final.to_csv('/Users/rapido/local-datasets/affluence/final/kepler_hex_view_all_exp_data.csv', index=False)

## Analysis view 5

In [100]:
## Pre

df_analysis_pre_group_v5 = df_pre_raw \
                            .groupby(['affluence', 'group_tc','pickup_location','pickup_hex_id','time_period']) \
                            .agg(
                                hex_count = pd.NamedAgg('pickup_hex_id', 'nunique'),
                                fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'), 
                                gross_orders = pd.NamedAgg('order_id', 'nunique'), 
                                net_orders = pd.NamedAgg('net_orders', 'sum'),
                                surged_fe = pd.NamedAgg('surged_fe', 'nunique'),
                                surged_net = pd.NamedAgg('surged_net', 'nunique'),
                                cobra = pd.NamedAgg('cobra', 'nunique'),
                                ocara = pd.NamedAgg('ocara', 'nunique'),
                                cobrm = pd.NamedAgg('cobrm', 'nunique'),
                                stockout = pd.NamedAgg('stockout', 'nunique'),
                                expiry_mapped = pd.NamedAgg('expiry_mapped', 'nunique'),
                                accepted_orders = pd.NamedAgg('accepted_orders', 'nunique'),
                                discount = pd.NamedAgg('discount', 'sum'),
                                sub_total = pd.NamedAgg('sub_total', 'sum'),
                                final_amount = pd.NamedAgg('final_amount', 'sum'),
                                dynamic_surge = pd.NamedAgg('dynamic_surge', 'sum'),
                                dynamic_fare = pd.NamedAgg('dynamic_fare', 'sum'),
                                rate_card_amount = pd.NamedAgg('rate_card_amount', 'sum'),
                                mean_ride_distance = pd.NamedAgg('ride_distance', 'mean'),
                                ride_distance = pd.NamedAgg('ride_distance', 'sum'),
                                surge_rate_card_amount = pd.NamedAgg('surge_rate_card_amount', 'sum'),
                                surge_dynamic_surge = pd.NamedAgg('surge_dynamic_surge', 'sum'),
                                surge_dynamic_fare = pd.NamedAgg('surge_dynamic_fare', 'sum')
                                ) \
                            .reset_index()

## Adding funnel 
df_analysis_pre_group_v5['fe2rr'] = (df_analysis_pre_group_v5['gross_orders']*100.0/df_analysis_pre_group_v5['fe_count']).round(2)
df_analysis_pre_group_v5['g2n'] = (df_analysis_pre_group_v5['net_orders']*100.0/df_analysis_pre_group_v5['gross_orders']).round(2)
df_analysis_pre_group_v5['fe2net'] = (df_analysis_pre_group_v5['net_orders']*100.0/df_analysis_pre_group_v5['fe_count']).round(2)
df_analysis_pre_group_v5['aor'] = (df_analysis_pre_group_v5['accepted_orders']*100.0/df_analysis_pre_group_v5['gross_orders']).round(2)
df_analysis_pre_group_v5['surged_fe_percentage'] = (df_analysis_pre_group_v5['surged_fe']*100.0/df_analysis_pre_group_v5['fe_count']).round(2)
df_analysis_pre_group_v5['surge_percentage'] = ((df_analysis_pre_group_v5['surge_dynamic_surge']+df_analysis_pre_group_v5['surge_dynamic_fare'])*100.0/df_analysis_pre_group_v5['surge_rate_card_amount']).round(2)
df_analysis_pre_group_v5['ppkm'] = (df_analysis_pre_group_v5['final_amount']/df_analysis_pre_group_v5['ride_distance']).round(2)
df_analysis_pre_group_v5['fe_hex'] = (df_analysis_pre_group_v5['fe_count']/df_analysis_pre_group_v5['hex_count']).round(2)
df_analysis_pre_group_v5['discount_percentage'] = (df_analysis_pre_group_v5['discount']*100.0/df_analysis_pre_group_v5['sub_total']).round(2)

In [101]:
## Post

df_analysis_post_group_v5 = df_post_raw \
                            .groupby(['affluence', 'group_tc','pickup_location', 'pickup_hex_id', 'time_period']) \
                            .agg(
                                hex_count = pd.NamedAgg('pickup_hex_id', 'nunique'),
                                fe_count = pd.NamedAgg('fare_estimate_id', 'nunique'), 
                                gross_orders = pd.NamedAgg('order_id', 'nunique'), 
                                net_orders = pd.NamedAgg('net_orders', 'sum'),
                                surged_fe = pd.NamedAgg('surged_fe', 'nunique'),
                                surged_net = pd.NamedAgg('surged_net', 'nunique'),
                                cobra = pd.NamedAgg('cobra', 'nunique'),
                                ocara = pd.NamedAgg('ocara', 'nunique'),
                                cobrm = pd.NamedAgg('cobrm', 'nunique'),
                                stockout = pd.NamedAgg('stockout', 'nunique'),
                                expiry_mapped = pd.NamedAgg('expiry_mapped', 'nunique'),
                                accepted_orders = pd.NamedAgg('accepted_orders', 'nunique'),
                                discount = pd.NamedAgg('discount', 'sum'),
                                sub_total = pd.NamedAgg('sub_total', 'sum'),
                                final_amount = pd.NamedAgg('final_amount', 'sum'),
                                dynamic_surge = pd.NamedAgg('dynamic_surge', 'sum'),
                                dynamic_fare = pd.NamedAgg('dynamic_fare', 'sum'),
                                rate_card_amount = pd.NamedAgg('rate_card_amount', 'sum'),
                                mean_ride_distance = pd.NamedAgg('ride_distance', 'mean'),
                                ride_distance = pd.NamedAgg('ride_distance', 'sum'),
                                surge_rate_card_amount = pd.NamedAgg('surge_rate_card_amount', 'sum'),
                                surge_dynamic_surge = pd.NamedAgg('surge_dynamic_surge', 'sum'),
                                surge_dynamic_fare = pd.NamedAgg('surge_dynamic_fare', 'sum')
                                ) \
                            .reset_index()

## Adding funnel 
df_analysis_post_group_v5['fe2rr'] = (df_analysis_post_group_v5['gross_orders']*100.0/df_analysis_post_group_v5['fe_count']).round(2)
df_analysis_post_group_v5['g2n'] = (df_analysis_post_group_v5['net_orders']*100.0/df_analysis_post_group_v5['gross_orders']).round(2)
df_analysis_post_group_v5['fe2net'] = (df_analysis_post_group_v5['net_orders']*100.0/df_analysis_post_group_v5['fe_count']).round(2)
df_analysis_post_group_v5['aor'] = (df_analysis_post_group_v5['accepted_orders']*100.0/df_analysis_post_group_v5['gross_orders']).round(2)
df_analysis_post_group_v5['surged_fe_percentage'] = (df_analysis_post_group_v5['surged_fe']*100.0/df_analysis_post_group_v5['fe_count']).round(2)
df_analysis_post_group_v5['surge_percentage'] = ((df_analysis_post_group_v5['surge_dynamic_surge']+df_analysis_post_group_v5['surge_dynamic_fare'])*100.0/df_analysis_post_group_v5['surge_rate_card_amount']).round(2)
df_analysis_post_group_v5['ppkm'] = (df_analysis_post_group_v5['final_amount']/df_analysis_post_group_v5['ride_distance']).round(2)
df_analysis_post_group_v5['fe_hex'] = (df_analysis_post_group_v5['fe_count']/df_analysis_post_group_v5['hex_count']).round(2)
df_analysis_post_group_v5['discount_percentage'] = (df_analysis_post_group_v5['discount']*100.0/df_analysis_post_group_v5['sub_total']).round(2)

In [102]:
## Adding pre post to column 

def add_pre_post():
    
    ##vpre
    pre_new_column_names = [col + "_pre" for col in df_analysis_pre_group_v5.columns]
    df_analysis_pre_group_v5.columns = pre_new_column_names
    
    ## post
    post_new_column_names = [col + "_post" for col in df_analysis_post_group_v5.columns]
    df_analysis_post_group_v5.columns = post_new_column_names
    
    
add_pre_post()

In [103]:
view_5  = pd.merge(df_analysis_pre_group_v5,
             df_analysis_post_group_v5,
             how = 'inner',
             left_on = ['affluence_pre', 'group_tc_pre', 'pickup_hex_id_pre', 'time_period_pre'],
             right_on = ['affluence_post', 'group_tc_post', 'pickup_hex_id_post', 'time_period_post']
            )

view_5.rename(columns = {'affluence_pre' : 'affluence', 
                         'group_tc_pre' : 'group_tc', 
                         'pickup_location_pre' : 'pickup_location',
                         'pickup_hex_id_pre' : 'pickup_hex_id',
                         'time_period_pre' : 'time_period',
                         'hex_count_pre' : 'hex_count'}, inplace = True)

view_5['fe2rr_delta'] = view_5['fe2rr_post'] - view_5['fe2rr_pre']
view_5['g2n_delta'] = view_5['g2n_post'] - view_5['g2n_pre']
view_5['fe2net_delta'] = view_5['fe2net_post'] - view_5['fe2net_pre']
view_5['aor_delta'] = view_5['aor_post'] - view_5['aor_pre']
view_5['surged_fe_delta'] = view_5['surged_fe_percentage_post'] - view_5['surged_fe_percentage_pre']
view_5['ppkm_delta'] = view_5['ppkm_pre'] - view_5['ppkm_post']
view_5['surge_percentage_delta'] = view_5['surge_percentage_post'] - view_5['surge_percentage_pre']
view_5['surged_fe_percentage_delta'] = view_5['surged_fe_percentage_post'] - view_5['surged_fe_percentage_pre']
view_5['discount_percentage_delta'] = view_5['discount_percentage_post'] - view_5['discount_percentage_pre']

In [104]:
view_5.head(3)

Unnamed: 0,affluence,group_tc,pickup_location,pickup_hex_id,time_period,hex_count,fe_count_pre,gross_orders_pre,net_orders_pre,surged_fe_pre,surged_net_pre,cobra_pre,ocara_pre,cobrm_pre,stockout_pre,expiry_mapped_pre,accepted_orders_pre,discount_pre,sub_total_pre,final_amount_pre,dynamic_surge_pre,dynamic_fare_pre,rate_card_amount_pre,mean_ride_distance_pre,ride_distance_pre,...,dynamic_fare_post,rate_card_amount_post,mean_ride_distance_post,ride_distance_post,surge_rate_card_amount_post,surge_dynamic_surge_post,surge_dynamic_fare_post,fe2rr_post,g2n_post,fe2net_post,aor_post,surged_fe_percentage_post,surge_percentage_post,ppkm_post,fe_hex_post,discount_percentage_post,fe2rr_delta,g2n_delta,fe2net_delta,aor_delta,surged_fe_delta,ppkm_delta,surge_percentage_delta,surged_fe_percentage_delta,discount_percentage_delta
0,High Affluence,CONTROL,Akshaynagar,88618926adfffff,1.Morning Peak,1,5452,980,241.0,337,10,428,117,1,0,192,359,1446.0,107542.0,641926.0,18955.89,0.0,38113.21,9.30959,51249.295354,...,0.0,111012.52,9.406745,37391.8112,111012.52,54148.62,0.0,16.73,31.66,5.3,47.18,24.72,48.78,13.72,3928.0,1.31,-1.25,7.07,0.88,10.55,18.54,-1.19,-0.96,18.54,-0.03
1,High Affluence,CONTROL,Akshaynagar,88618926adfffff,2.Afternoon,1,2004,270,159.0,10,1,56,45,0,0,10,212,4477.0,29692.0,211815.0,188.76,0.0,943.81,8.495359,17024.699807,...,0.0,0.0,8.051874,12158.329,0.0,0.0,0.0,17.57,58.11,10.21,76.98,0.0,,12.39,1508.0,8.88,4.1,-0.78,2.28,-1.54,-0.5,0.05,,-0.5,-6.2
2,High Affluence,CONTROL,Akshaynagar,88618926adfffff,3.Evening Peak,1,1546,280,180.0,8,0,50,44,0,0,5,230,4426.0,29634.0,162954.0,140.7,0.0,703.55,8.403108,12991.205698,...,0.0,1682.24,8.449988,11990.532815,1682.24,336.47,0.0,19.73,67.14,13.25,88.21,2.04,20.0,12.59,1419.0,9.72,1.62,2.85,1.61,6.07,1.52,-0.05,0.0,1.52,-5.22


In [106]:
df_analysis_pre_post_v5_dsm = pd.read_clipboard()
df_analysis_pre_post_v5_dsm.head(3)

Unnamed: 0,affluence,group_tc,cluster,hex_id,time_period,demand_per_day_pre,demand_per_day_post,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,demand_pd_delta,mismatch_qr_pd_delta
0,High Affluence,CONTROL,Akshaynagar,88618926adfffff,1.Morning Peak,53.53,46.0,4.47,1.83,14.4,13.75,-7.53,-0.65
1,High Affluence,CONTROL,Akshaynagar,88618926adfffff,2.Afternoon,18.07,19.83,18.0,16.83,5.47,6.42,1.76,0.95
2,High Affluence,CONTROL,Akshaynagar,88618926adfffff,3.Evening Peak,18.0,24.17,29.13,32.0,2.87,4.0,6.17,1.13


In [107]:
df_analysis_pre_post_v5 = view_5.merge(df_analysis_pre_post_v5_dsm,
                                      how = 'inner',
                                      left_on = ['affluence','group_tc','pickup_hex_id', 'time_period'],
                                      right_on = ['affluence','group_tc','hex_id', 'time_period']
                                     )
df_analysis_pre_post_v5.head(3)

Unnamed: 0,affluence,group_tc,pickup_location,pickup_hex_id,time_period,hex_count,fe_count_pre,gross_orders_pre,net_orders_pre,surged_fe_pre,surged_net_pre,cobra_pre,ocara_pre,cobrm_pre,stockout_pre,expiry_mapped_pre,accepted_orders_pre,discount_pre,sub_total_pre,final_amount_pre,dynamic_surge_pre,dynamic_fare_pre,rate_card_amount_pre,mean_ride_distance_pre,ride_distance_pre,...,aor_post,surged_fe_percentage_post,surge_percentage_post,ppkm_post,fe_hex_post,discount_percentage_post,fe2rr_delta,g2n_delta,fe2net_delta,aor_delta,surged_fe_delta,ppkm_delta,surge_percentage_delta,surged_fe_percentage_delta,discount_percentage_delta,cluster,hex_id,demand_per_day_pre,demand_per_day_post,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,demand_pd_delta,mismatch_qr_pd_delta
0,High Affluence,CONTROL,Akshaynagar,88618926adfffff,1.Morning Peak,1,5452,980,241.0,337,10,428,117,1,0,192,359,1446.0,107542.0,641926.0,18955.89,0.0,38113.21,9.30959,51249.295354,...,47.18,24.72,48.78,13.72,3928.0,1.31,-1.25,7.07,0.88,10.55,18.54,-1.19,-0.96,18.54,-0.03,Akshaynagar,88618926adfffff,53.53,46.0,4.47,1.83,14.4,13.75,-7.53,-0.65
1,High Affluence,CONTROL,Akshaynagar,88618926adfffff,2.Afternoon,1,2004,270,159.0,10,1,56,45,0,0,10,212,4477.0,29692.0,211815.0,188.76,0.0,943.81,8.495359,17024.699807,...,76.98,0.0,,12.39,1508.0,8.88,4.1,-0.78,2.28,-1.54,-0.5,0.05,,-0.5,-6.2,Akshaynagar,88618926adfffff,18.07,19.83,18.0,16.83,5.47,6.42,1.76,0.95
2,High Affluence,CONTROL,Akshaynagar,88618926adfffff,3.Evening Peak,1,1546,280,180.0,8,0,50,44,0,0,5,230,4426.0,29634.0,162954.0,140.7,0.0,703.55,8.403108,12991.205698,...,88.21,2.04,20.0,12.59,1419.0,9.72,1.62,2.85,1.61,6.07,1.52,-0.05,0.0,1.52,-5.22,Akshaynagar,88618926adfffff,18.0,24.17,29.13,32.0,2.87,4.0,6.17,1.13


In [108]:
df_analysis_pre_post_v5_final = df_analysis_pre_post_v5[['affluence', 'group_tc', 'pickup_location', 'pickup_hex_id',
                                                         'time_period',  'fe2rr_pre', 'fe2rr_post', 'fe2rr_delta',
                                  'g2n_pre', 'g2n_post', 'g2n_delta',
                                  'fe2net_pre', 'fe2net_post', 'fe2net_delta', 'aor_pre', 'aor_post', 'aor_delta',
                                  'demand_per_day_pre', 'demand_per_day_post', 'demand_pd_delta',
                                  'supply_per_day_pre','supply_per_day_post',
                                  'mismatch_qr_per_day_pre', 'mismatch_qr_per_day_post', 'mismatch_qr_pd_delta',
                                  'ppkm_pre', 'ppkm_post', 'ppkm_delta',
                                  'fe_hex_pre', 'fe_hex_post',
                                  'surged_fe_percentage_pre', 'surged_fe_percentage_post', 'surged_fe_delta',
                                  'surge_percentage_pre', 'surge_percentage_post', 'surge_percentage_delta',
                                  'discount_percentage_pre', 'discount_percentage_post', 'discount_percentage_delta',
                                  'fe_count_pre', 'fe_count_post',
                                  'gross_orders_pre', 'gross_orders_post',
                                  'net_orders_pre', 'net_orders_post',
                                  'surged_fe_pre', 'surged_fe_post',
                                  'surged_net_pre', 'surged_net_post',
                                  'discount_pre', 'discount_post',
                                  'sub_total_pre', 'sub_total_post',
                                  'cobra_pre', 'cobra_post',
                                  'ocara_pre', 'ocara_post',
                                  'cobrm_pre', 'cobrm_post',
                                  'expiry_mapped_pre', 'expiry_mapped_post',
                                  'stockout_pre', 'stockout_post',
                                  'accepted_orders_pre', 'accepted_orders_post'
                                 ]]
df_analysis_pre_post_v5_final = df_analysis_pre_post_v5_final.sort_values(['affluence','group_tc', 'pickup_location', 'pickup_hex_id', 'time_period'])

In [109]:
df_analysis_pre_post_v5_final.to_clipboard(index=False)