In [10]:
import pandas as pd
import glob
import numpy as np

import math
from datetime import datetime

from pyhive import presto

import matplotlib.pyplot as plt
import seaborn as sns

import plotly.express as px
import plotly.graph_objects as go

import statsmodels.api as sm

import statsmodels.formula.api as smf

In [11]:
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 300)

In [12]:
## Connection
connection = presto.connect(
        host='presto-gateway.serving.data.production.internal',
        port=80,
        protocol='http',
        catalog='hive',
        username='manoj.ravirajan@rapido.bike'
)

In [13]:
city = 'Chennai'
service = 'Link'
start_date = '20230710'
end_date = '20230723'

#PATH_TO_ROOT_DIR = '/Users/pallavichandra/commit_repo/latest_demand_repo/dse-demand-analysis/experiments/price_sensitivity/data/'

In [14]:
## Datasets

raw_dataset = f"""

    WITH 
    fe_merged AS (
        SELECT
            city,
            customer_id,
            fe_tbl.fare_estimate_id AS fare_estimate_id,
            pickup_cluster,
            yyyymmdd,
            orderdate,
            time_period,
            1 AS fe_count
        FROM
            (

               SELECT
                   city AS city,
                   user_id as customer_id,
                   fare_estimate_id,
                   CASE 
                        WHEN substr(quarter_hour,1,2) IN ('08','09','10') THEN 'morning_peak'
                        WHEN substr(quarter_hour,1,2) IN ('11','12','13','14','15','16') THEN 'afternoon_peak'
                        WHEN substr(quarter_hour,1,2) IN ('17','18','19','20') THEN 'evening_peak'
                        ELSE 'rest' 
                    END AS time_period,
                    pickup_cluster,
                   date_format(from_unixtime(epoch / 1000, 'Asia/Kolkata'), '%Y-%m-%d') AS orderdate,
                   yyyymmdd

               FROM
                   pricing.fare_estimates_enriched
               WHERE
                   coalesce(CAST( (static_surge + dynamic_surge + dynamic_fare) AS DOUBLE), 0 ) > 0
                   AND service_name = '{service}'
                   AND city = '{city}'
                   AND yyyymmdd >= '{start_date}'
                   AND yyyymmdd <= '{end_date}'  

         ) fe_tbl
    ),

    fe_surged_non_surged_ord AS (
        SELECT
            city,
            customer_id,
            fe_tbl.fare_estimate_id AS fare_estimate_id,
            pickup_cluster,
            yyyymmdd,
            orderdate,
            time_period,
            1 AS fe_count
        FROM
            (

               SELECT
                   city AS city,
                   user_id as customer_id,
                   fare_estimate_id,
                   CASE 
                    WHEN substr(quarter_hour,1,2) IN ('08','09','10') THEN 'morning_peak'
                    WHEN substr(quarter_hour,1,2) IN ('11','12','13','14','15','16') THEN 'afternoon_peak'
                    WHEN substr(quarter_hour,1,2) IN ('17','18','19','20') THEN 'evening_peak'
                    ELSE 'rest' 
                END AS time_period,
                pickup_cluster,
                   date_format(from_unixtime(epoch / 1000, 'Asia/Kolkata'), '%Y-%m-%d') AS orderdate,
                   yyyymmdd

               FROM
                   pricing.fare_estimates_enriched
               WHERE
                   -- coalesce(CAST( (static_surge + dynamic_surge + dynamic_fare) AS DOUBLE), 0 ) > 0
                   service_name = '{service}'
                   AND city = '{city}'
                   AND yyyymmdd >= '{start_date}'
                   AND yyyymmdd <= '{end_date}'  
         ) fe_tbl
    ),

    gross_net_orders_tbl as (

        SELECT * FROM
        (   select
                city_name AS city,
                estimate_id as fare_estimate_id,
                date_format(from_unixtime(epoch / 1000, 'Asia/Kolkata'), '%Y-%m-%d') AS orderdate,
                yyyymmdd,
                CASE 
                    WHEN substr(quarter_hour,1,2) IN ('08','09','10') THEN 'morning_peak'
                    WHEN substr(quarter_hour,1,2) IN ('11','12','13','14','15','16') THEN 'afternoon_peak'
                    WHEN substr(quarter_hour,1,2) IN ('17','18','19','20') THEN 'evening_peak'
                    ELSE 'rest' 
                END AS time_period,
                pickup_cluster,
                1 AS gross_order_count,
                CASE WHEN order_status='dropped' and spd_fraud_flag = False THEN 1 ELSE 0 END AS net_orders_count_snapshot,
                0 AS net_order_count

            from
                orders.order_logs_snapshot A

            where
               service_obj_service_name = '{service}'
               AND city_name = '{city}'
               AND coalesce(CAST( (surge) AS DOUBLE), 0 ) > 0
               AND yyyymmdd >= '{start_date}'
               AND yyyymmdd <= '{end_date}'
            )
    ),

    gross_net_orders_tbl_all as (

        SELECT * FROM
        (   select
                city_name AS city,
                estimate_id as fare_estimate_id,
                date_format(from_unixtime(epoch / 1000, 'Asia/Kolkata'), '%Y-%m-%d') AS orderdate,
                yyyymmdd,
                CASE 
                    WHEN substr(quarter_hour,1,2) IN ('08','09','10') THEN 'morning_peak'
                    WHEN substr(quarter_hour,1,2) IN ('11','12','13','14','15','16') THEN 'afternoon_peak'
                    WHEN substr(quarter_hour,1,2) IN ('17','18','19','20') THEN 'evening_peak'
                    ELSE 'rest' 
                END AS time_period,
                pickup_cluster,
                1 AS gross_order_count,
                CASE WHEN order_status='dropped' and spd_fraud_flag = False THEN 1 ELSE 0 END AS net_orders_count_snapshot,
                0 AS net_order_count

            from
                orders.order_logs_snapshot A
            where
               service_obj_service_name = '{service}'
               AND city_name = '{city}' 
               AND yyyymmdd >= '{start_date}'
               AND yyyymmdd <= '{end_date}'
            )
    ),

    surged_orders_count as (
    SELECT 
        pickup_cluster
        ,orderdate
        ,yyyymmdd
        ,time_period
        ,SUM(fe_count) AS fe_count
        ,SUM(gross_order_count) AS gross_order_count
        ,SUM(net_orders_count_snapshot) AS net_orders_count_snapshot
        --,SUM(net_order_count) AS net_order_count
        FROM
        (

            SELECT
                fe_merged.city AS city,
                fe_merged.customer_id AS customer_id,
                fe_merged.fare_estimate_id AS fare_estimate_id,
                fe_merged.orderdate AS orderdate,
                fe_merged.yyyymmdd AS yyyymmdd,
                fe_merged.time_period AS time_period,
                fe_merged.pickup_cluster AS pickup_cluster,
                fe_merged.fe_count AS fe_count,
                coalesce(gross_net_orders_tbl.gross_order_count,0) AS gross_order_count,
                coalesce(gross_net_orders_tbl.net_orders_count_snapshot ,0) AS net_orders_count_snapshot,
                coalesce(gross_net_orders_tbl.net_order_count    ,0) AS net_order_count
            FROM
                fe_merged
            LEFT JOIN 
                gross_net_orders_tbl 
            ON 
                gross_net_orders_tbl.fare_estimate_id=fe_merged.fare_estimate_id
            AND 
                fe_merged.city = gross_net_orders_tbl.city
            AND 
                fe_merged.orderdate = gross_net_orders_tbl.orderdate
            AND 
                fe_merged.time_period = gross_net_orders_tbl.time_period
            AND 
                fe_merged.pickup_cluster = gross_net_orders_tbl.pickup_cluster


        ) A
        GROUP BY 1,2,3,4
        ORDER BY 1,2,3,4
    ),

    all_orders_count as (
    SELECT 
          pickup_cluster
         ,orderdate
         ,yyyymmdd
         ,time_period
        ,SUM(fe_count) AS fe_count_all
        ,SUM(gross_order_count) AS gross_order_count_all
        ,SUM(net_orders_count_snapshot) AS net_orders_count_snapshot_all
        --,SUM(net_order_count) AS net_order_count
        FROM
        (

            SELECT

                fe_surged_non_surged_ord.city AS city,
                fe_surged_non_surged_ord.customer_id AS customer_id,
                fe_surged_non_surged_ord.fare_estimate_id AS fare_estimate_id,
                fe_surged_non_surged_ord.orderdate AS orderdate,
                fe_surged_non_surged_ord.yyyymmdd AS yyyymmdd,
                fe_surged_non_surged_ord.time_period AS time_period,
                fe_surged_non_surged_ord.pickup_cluster AS pickup_cluster,
                fe_surged_non_surged_ord.fe_count AS fe_count,
                coalesce(gross_net_orders_tbl_all.gross_order_count,0) AS gross_order_count,
                coalesce(gross_net_orders_tbl_all.net_orders_count_snapshot ,0) AS net_orders_count_snapshot,
                coalesce(gross_net_orders_tbl_all.net_order_count    ,0) AS net_order_count
            FROM
                fe_surged_non_surged_ord
            LEFT JOIN 
                gross_net_orders_tbl_all 
            ON 
                gross_net_orders_tbl_all.fare_estimate_id=fe_surged_non_surged_ord.fare_estimate_id
            AND 
                fe_surged_non_surged_ord.city = gross_net_orders_tbl_all.city
            AND 
                fe_surged_non_surged_ord.orderdate = gross_net_orders_tbl_all.orderdate
            AND 
                fe_surged_non_surged_ord.time_period = gross_net_orders_tbl_all.time_period
            AND 
                fe_surged_non_surged_ord.pickup_cluster = gross_net_orders_tbl_all.pickup_cluster
        ) A
        GROUP BY 1,2,3,4
        ORDER BY 1,2,3,4
    )

    SELECT 
        surged_ord_all.pickup_cluster,
        surged_ord_all.orderdate,
        surged_ord_all.yyyymmdd,
        surged_ord_all.time_period,
        fe_count,
        gross_order_count,
        net_orders_count_snapshot,
        fe_count_all,
        gross_order_count_all,
        net_orders_count_snapshot_all,
        ROUND((100 * CAST(surged_ord.fe_count AS DOUBLE)) / NULLIF(surged_ord_all.fe_count_all,0),4) as fe_contri_percent,
        ROUND((100 * CAST(surged_ord.gross_order_count AS DOUBLE)) / NULLIF(surged_ord_all.gross_order_count_all,0),4) as gross_contri_percent,
        ROUND((100 * CAST(surged_ord.net_orders_count_snapshot AS DOUBLE)) / NULLIF(surged_ord_all.net_orders_count_snapshot_all,0),4) as net_contri_percent
    FROM 
        surged_orders_count surged_ord
    JOIN 
        all_orders_count surged_ord_all
        ON 
            surged_ord.orderdate = surged_ord_all.orderdate
        AND 
            surged_ord.time_period = surged_ord_all.time_period
        AND 
            surged_ord.pickup_cluster = surged_ord_all.pickup_cluster
    ORDER BY 
        1,2,3,4

"""

In [15]:
df_raw_dataset = pd.read_sql(raw_dataset, connection)
df_raw_dataset.head(3)

  df_raw_dataset = pd.read_sql(raw_dataset, connection)


DatabaseError: Execution failed on sql: 

    WITH 
    fe_merged AS (
        SELECT
            city,
            customer_id,
            fe_tbl.fare_estimate_id AS fare_estimate_id,
            pickup_cluster,
            yyyymmdd,
            orderdate,
            time_period,
            1 AS fe_count
        FROM
            (

               SELECT
                   city AS city,
                   user_id as customer_id,
                   fare_estimate_id,
                   CASE 
                        WHEN substr(quarter_hour,1,2) IN ('08','09','10') THEN 'morning_peak'
                        WHEN substr(quarter_hour,1,2) IN ('11','12','13','14','15','16') THEN 'afternoon_peak'
                        WHEN substr(quarter_hour,1,2) IN ('17','18','19','20') THEN 'evening_peak'
                        ELSE 'rest' 
                    END AS time_period,
                    pickup_cluster,
                   date_format(from_unixtime(epoch / 1000, 'Asia/Kolkata'), '%Y-%m-%d') AS orderdate,
                   yyyymmdd

               FROM
                   pricing.fare_estimates_enriched
               WHERE
                   coalesce(CAST( (static_surge + dynamic_surge + dynamic_fare) AS DOUBLE), 0 ) > 0
                   AND service_name = 'Link'
                   AND city = 'Chennai'
                   AND yyyymmdd >= '20230710'
                   AND yyyymmdd <= '20230723'  

         ) fe_tbl
    ),

    fe_surged_non_surged_ord AS (
        SELECT
            city,
            customer_id,
            fe_tbl.fare_estimate_id AS fare_estimate_id,
            pickup_cluster,
            yyyymmdd,
            orderdate,
            time_period,
            1 AS fe_count
        FROM
            (

               SELECT
                   city AS city,
                   user_id as customer_id,
                   fare_estimate_id,
                   CASE 
                    WHEN substr(quarter_hour,1,2) IN ('08','09','10') THEN 'morning_peak'
                    WHEN substr(quarter_hour,1,2) IN ('11','12','13','14','15','16') THEN 'afternoon_peak'
                    WHEN substr(quarter_hour,1,2) IN ('17','18','19','20') THEN 'evening_peak'
                    ELSE 'rest' 
                END AS time_period,
                pickup_cluster,
                   date_format(from_unixtime(epoch / 1000, 'Asia/Kolkata'), '%Y-%m-%d') AS orderdate,
                   yyyymmdd

               FROM
                   pricing.fare_estimates_enriched
               WHERE
                   -- coalesce(CAST( (static_surge + dynamic_surge + dynamic_fare) AS DOUBLE), 0 ) > 0
                   service_name = 'Link'
                   AND city = 'Chennai'
                   AND yyyymmdd >= '20230710'
                   AND yyyymmdd <= '20230723'  
         ) fe_tbl
    ),

    gross_net_orders_tbl as (

        SELECT * FROM
        (   select
                city_name AS city,
                estimate_id as fare_estimate_id,
                date_format(from_unixtime(epoch / 1000, 'Asia/Kolkata'), '%Y-%m-%d') AS orderdate,
                yyyymmdd,
                CASE 
                    WHEN substr(quarter_hour,1,2) IN ('08','09','10') THEN 'morning_peak'
                    WHEN substr(quarter_hour,1,2) IN ('11','12','13','14','15','16') THEN 'afternoon_peak'
                    WHEN substr(quarter_hour,1,2) IN ('17','18','19','20') THEN 'evening_peak'
                    ELSE 'rest' 
                END AS time_period,
                pickup_cluster,
                1 AS gross_order_count,
                CASE WHEN order_status='dropped' and spd_fraud_flag = False THEN 1 ELSE 0 END AS net_orders_count_snapshot,
                0 AS net_order_count

            from
                orders.order_logs_snapshot A

            where
               service_obj_service_name = 'Link'
               AND city_name = 'Chennai'
               AND coalesce(CAST( (surge) AS DOUBLE), 0 ) > 0
               AND yyyymmdd >= '20230710'
               AND yyyymmdd <= '20230723'
            )
    ),

    gross_net_orders_tbl_all as (

        SELECT * FROM
        (   select
                city_name AS city,
                estimate_id as fare_estimate_id,
                date_format(from_unixtime(epoch / 1000, 'Asia/Kolkata'), '%Y-%m-%d') AS orderdate,
                yyyymmdd,
                CASE 
                    WHEN substr(quarter_hour,1,2) IN ('08','09','10') THEN 'morning_peak'
                    WHEN substr(quarter_hour,1,2) IN ('11','12','13','14','15','16') THEN 'afternoon_peak'
                    WHEN substr(quarter_hour,1,2) IN ('17','18','19','20') THEN 'evening_peak'
                    ELSE 'rest' 
                END AS time_period,
                pickup_cluster,
                1 AS gross_order_count,
                CASE WHEN order_status='dropped' and spd_fraud_flag = False THEN 1 ELSE 0 END AS net_orders_count_snapshot,
                0 AS net_order_count

            from
                orders.order_logs_snapshot A
            where
               service_obj_service_name = 'Link'
               AND city_name = 'Chennai' 
               AND yyyymmdd >= '20230710'
               AND yyyymmdd <= '20230723'
            )
    ),

    surged_orders_count as (
    SELECT 
        pickup_cluster
        ,orderdate
        ,yyyymmdd
        ,time_period
        ,SUM(fe_count) AS fe_count
        ,SUM(gross_order_count) AS gross_order_count
        ,SUM(net_orders_count_snapshot) AS net_orders_count_snapshot
        --,SUM(net_order_count) AS net_order_count
        FROM
        (

            SELECT
                fe_merged.city AS city,
                fe_merged.customer_id AS customer_id,
                fe_merged.fare_estimate_id AS fare_estimate_id,
                fe_merged.orderdate AS orderdate,
                fe_merged.yyyymmdd AS yyyymmdd,
                fe_merged.time_period AS time_period,
                fe_merged.pickup_cluster AS pickup_cluster,
                fe_merged.fe_count AS fe_count,
                coalesce(gross_net_orders_tbl.gross_order_count,0) AS gross_order_count,
                coalesce(gross_net_orders_tbl.net_orders_count_snapshot ,0) AS net_orders_count_snapshot,
                coalesce(gross_net_orders_tbl.net_order_count    ,0) AS net_order_count
            FROM
                fe_merged
            LEFT JOIN 
                gross_net_orders_tbl 
            ON 
                gross_net_orders_tbl.fare_estimate_id=fe_merged.fare_estimate_id
            AND 
                fe_merged.city = gross_net_orders_tbl.city
            AND 
                fe_merged.orderdate = gross_net_orders_tbl.orderdate
            AND 
                fe_merged.time_period = gross_net_orders_tbl.time_period
            AND 
                fe_merged.pickup_cluster = gross_net_orders_tbl.pickup_cluster


        ) A
        GROUP BY 1,2,3,4
        ORDER BY 1,2,3,4
    ),

    all_orders_count as (
    SELECT 
          pickup_cluster
         ,orderdate
         ,yyyymmdd
         ,time_period
        ,SUM(fe_count) AS fe_count_all
        ,SUM(gross_order_count) AS gross_order_count_all
        ,SUM(net_orders_count_snapshot) AS net_orders_count_snapshot_all
        --,SUM(net_order_count) AS net_order_count
        FROM
        (

            SELECT

                fe_surged_non_surged_ord.city AS city,
                fe_surged_non_surged_ord.customer_id AS customer_id,
                fe_surged_non_surged_ord.fare_estimate_id AS fare_estimate_id,
                fe_surged_non_surged_ord.orderdate AS orderdate,
                fe_surged_non_surged_ord.yyyymmdd AS yyyymmdd,
                fe_surged_non_surged_ord.time_period AS time_period,
                fe_surged_non_surged_ord.pickup_cluster AS pickup_cluster,
                fe_surged_non_surged_ord.fe_count AS fe_count,
                coalesce(gross_net_orders_tbl_all.gross_order_count,0) AS gross_order_count,
                coalesce(gross_net_orders_tbl_all.net_orders_count_snapshot ,0) AS net_orders_count_snapshot,
                coalesce(gross_net_orders_tbl_all.net_order_count    ,0) AS net_order_count
            FROM
                fe_surged_non_surged_ord
            LEFT JOIN 
                gross_net_orders_tbl_all 
            ON 
                gross_net_orders_tbl_all.fare_estimate_id=fe_surged_non_surged_ord.fare_estimate_id
            AND 
                fe_surged_non_surged_ord.city = gross_net_orders_tbl_all.city
            AND 
                fe_surged_non_surged_ord.orderdate = gross_net_orders_tbl_all.orderdate
            AND 
                fe_surged_non_surged_ord.time_period = gross_net_orders_tbl_all.time_period
            AND 
                fe_surged_non_surged_ord.pickup_cluster = gross_net_orders_tbl_all.pickup_cluster
        ) A
        GROUP BY 1,2,3,4
        ORDER BY 1,2,3,4
    )

    SELECT 
        surged_ord_all.pickup_cluster,
        surged_ord_all.orderdate,
        surged_ord_all.yyyymmdd,
        surged_ord_all.time_period,
        fe_count,
        gross_order_count,
        net_orders_count_snapshot,
        fe_count_all,
        gross_order_count_all,
        net_orders_count_snapshot_all,
        ROUND((100 * CAST(surged_ord.fe_count AS DOUBLE)) / NULLIF(surged_ord_all.fe_count_all,0),4) as fe_contri_percent,
        ROUND((100 * CAST(surged_ord.gross_order_count AS DOUBLE)) / NULLIF(surged_ord_all.gross_order_count_all,0),4) as gross_contri_percent,
        ROUND((100 * CAST(surged_ord.net_orders_count_snapshot AS DOUBLE)) / NULLIF(surged_ord_all.net_orders_count_snapshot_all,0),4) as net_contri_percent
    FROM 
        surged_orders_count surged_ord
    JOIN 
        all_orders_count surged_ord_all
        ON 
            surged_ord.orderdate = surged_ord_all.orderdate
        AND 
            surged_ord.time_period = surged_ord_all.time_period
        AND 
            surged_ord.pickup_cluster = surged_ord_all.pickup_cluster
    ORDER BY 
        1,2,3,4


HTTPConnectionPool(host='presto-gateway.serving.data.production.internal', port=80): Max retries exceeded with url: /v1/statement (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f87cac3bf10>: Failed to establish a new connection: [Errno 8] nodename nor servname provided, or not known'))
unable to rollback

In [52]:
df_raw_dataset.to_csv('/Users/rapido/local-datasets/non-peak-demand/raw/raw_data_surged_orders_contri_analysis_{}_{}_{}_to_{}.csv' \
                                .format(city, service,start_date,end_date)
                               , index = False)

In [53]:
surged_orders_contri_cluster_level = pd.read_csv('/Users/rapido/local-datasets/non-peak-demand/raw/raw_data_surged_orders_contri_analysis_{}_{}_{}_to_{}.csv' \
                                                 .format(city, service,start_date,end_date))

surged_orders_contri_cluster_level.pickup_cluster.unique()

array([nan, 'Adambakkam', 'Adyar', 'Alamathi', 'Alandur', 'Allapakkam',
       'Alwarpet', 'Alwarthirunagar', 'Ambattur',
       'Ambattur Industrial Estate', 'Ambattur OT', 'Anakaputhur',
       'Anna Nagar', 'Anna Nagar East', 'Annanur', 'Arasankalani',
       'Arumbakkam 2', 'Ashok Nagar Chennai', 'Avadi', 'Ayanambakkam',
       'Ayappakkam', 'Baby Nagar Velachery', 'Balaji Nagar',
       'Basin Bridge', 'Beasant Avenue', 'Beasant Nagar',
       'CHN CHETTINAD HEALTH CITY', 'CHN GUDUVANCHERY', 'CHN KANDIGAI',
       'CHN KELAMBAKKAM', 'CHN MAMBAKKAM', 'CHN MURUGAMANGALAM',
       'CHN SRM UNIVERSITY', 'CHN URAPAKKAM', 'CHN VANDALUR', 'CMBT',
       'Chembarambakkam', 'Chennai Beach', 'Chennai Central',
       'Chennai International Airport', 'Chennai One IT park ', 'Chepauk',
       'Choolai', 'Chromepet', 'ECR Beach', 'East Tambaram', 'Egmore',
       'Egmore Metro', 'Ekkatutthangal Metro', 'Ennore', 'Guindy',
       'Guindy 3', 'Guindy Madras Race Club', 'Guindy National Park',
  

In [54]:
surged_orders_contri_cluster_level['week_period'] = surged_orders_contri_cluster_level['yyyymmdd'].map(
    lambda x: datetime.strptime(str(x), '%Y%m%d').strftime('%A')
)

surged_orders_contri_cluster_level.head()

Unnamed: 0,pickup_cluster,orderdate,yyyymmdd,time_period,fe_count,gross_order_count,net_orders_count_snapshot,fe_count_all,gross_order_count_all,net_orders_count_snapshot_all,fe_contri_percent,gross_contri_percent,net_contri_percent,week_period
0,,2023-07-10,20230710,afternoon_peak,44,8,5,263,39,15,16.73,20.5128,33.3333,Monday
1,,2023-07-10,20230710,evening_peak,13,1,0,169,16,7,7.6923,6.25,0.0,Monday
2,,2023-07-10,20230710,morning_peak,57,5,1,152,31,15,37.5,16.129,6.6667,Monday
3,,2023-07-11,20230711,afternoon_peak,16,1,0,208,35,17,7.6923,2.8571,0.0,Tuesday
4,,2023-07-11,20230711,evening_peak,11,2,1,199,19,8,5.5276,10.5263,12.5,Tuesday


In [55]:
surged_orders_contri_cluster_level.loc[
    (surged_orders_contri_cluster_level['time_period'] == 'afternoon_peak'), 'time_period' ] = 'afternoon'

In [56]:
surged_orders_contri_cluster_level.head()

Unnamed: 0,pickup_cluster,orderdate,yyyymmdd,time_period,fe_count,gross_order_count,net_orders_count_snapshot,fe_count_all,gross_order_count_all,net_orders_count_snapshot_all,fe_contri_percent,gross_contri_percent,net_contri_percent,week_period
0,,2023-07-10,20230710,afternoon,44,8,5,263,39,15,16.73,20.5128,33.3333,Monday
1,,2023-07-10,20230710,evening_peak,13,1,0,169,16,7,7.6923,6.25,0.0,Monday
2,,2023-07-10,20230710,morning_peak,57,5,1,152,31,15,37.5,16.129,6.6667,Monday
3,,2023-07-11,20230711,afternoon,16,1,0,208,35,17,7.6923,2.8571,0.0,Tuesday
4,,2023-07-11,20230711,evening_peak,11,2,1,199,19,8,5.5276,10.5263,12.5,Tuesday


In [57]:
excess_supply_all_sessions = pd.read_csv('/Users/rapido/local-datasets/non-peak-demand/raw/excess_supply_all_sessions_{}_{}_{}_{}.csv' \
                                        .format(city, service,start_date,end_date))
excess_supply_all_sessions.head()

Unnamed: 0.1,Unnamed: 0,city,pickup_cluster,service_detail_id,week_period,time_period,demand,supply,mismatch_qr_level,fe_count,rr_count,net_count,fe_count_med,rr_count_med,net_count_med,mismtach_pred_tp,excess_supply_ratio,is_excess_supply,unique_name
0,19,Chennai,Adambakkam,5bed473f1278885df4ea9d57,Wednesday,evening_peak,71.0,128.0,-6.0,1485,363,270,303.0,74.0,56.0,-57.0,1.802817,yes,Adambakkam_Wednesday
1,145,Chennai,Ambattur,5bed473f1278885df4ea9d57,Friday,afternoon,53.0,78.0,-3.5,1214,291,201,246.0,58.0,41.0,-25.0,1.471698,yes,Ambattur_Friday
2,146,Chennai,Ambattur,5bed473f1278885df4ea9d57,Friday,evening_peak,88.0,116.0,-2.0,1577,449,353,311.0,89.0,67.0,-28.0,1.318182,yes,Ambattur_Friday
3,149,Chennai,Ambattur,5bed473f1278885df4ea9d57,Monday,evening_peak,79.0,96.0,-3.0,1602,414,328,351.0,81.0,66.0,-17.0,1.21519,yes,Ambattur_Monday
4,151,Chennai,Ambattur,5bed473f1278885df4ea9d57,Saturday,afternoon,57.0,70.0,-2.0,1637,301,224,342.0,61.0,44.0,-13.0,1.22807,yes,Ambattur_Saturday


In [58]:
surged_orders_contri_cluster_level['cluster_week_time'] = surged_orders_contri_cluster_level[
    'pickup_cluster'] + '-' + surged_orders_contri_cluster_level[
    'week_period'] + '-' + surged_orders_contri_cluster_level['time_period']

excess_supply_all_sessions['cluster_week_time'] = excess_supply_all_sessions[
    'pickup_cluster'] + '-' + excess_supply_all_sessions['week_period'] + '-' + excess_supply_all_sessions[
    'time_period']

In [59]:
lh_clusters_filtered = surged_orders_contri_cluster_level[
    surged_orders_contri_cluster_level['cluster_week_time'].isin(
        excess_supply_all_sessions['cluster_week_time'].unique().tolist())]

lh_clusters_filtered.head()

Unnamed: 0,pickup_cluster,orderdate,yyyymmdd,time_period,fe_count,gross_order_count,net_orders_count_snapshot,fe_count_all,gross_order_count_all,net_orders_count_snapshot_all,fe_contri_percent,gross_contri_percent,net_contri_percent,week_period,cluster_week_time
59,Adambakkam,2023-07-12,20230712,evening_peak,147,27,19,724,160,116,20.3039,16.875,16.3793,Wednesday,Adambakkam-Wednesday-evening_peak
81,Adambakkam,2023-07-19,20230719,evening_peak,4,0,0,633,126,94,0.6319,0.0,0.0,Wednesday,Adambakkam-Wednesday-evening_peak
356,Ambattur,2023-07-10,20230710,evening_peak,35,9,9,863,187,147,4.0556,4.8128,6.1224,Monday,Ambattur-Monday-evening_peak
358,Ambattur,2023-07-11,20230711,afternoon,244,42,25,991,203,136,24.6216,20.6897,18.3824,Tuesday,Ambattur-Tuesday-afternoon
359,Ambattur,2023-07-11,20230711,evening_peak,81,26,17,720,186,140,11.25,13.9785,12.1429,Tuesday,Ambattur-Tuesday-evening_peak


## Exclusing Rest

In [60]:
funnel_coverage_calc_data_all_sessions = lh_clusters_filtered[
    lh_clusters_filtered['time_period'] != 'rest']

fe_countribution = 100*funnel_coverage_calc_data_all_sessions['fe_count'].sum()/funnel_coverage_calc_data_all_sessions[
    'fe_count_all'].sum()
rr_countribution = 100*funnel_coverage_calc_data_all_sessions['gross_order_count'].sum()/funnel_coverage_calc_data_all_sessions[
    'gross_order_count_all'].sum()
net_countribution = 100*funnel_coverage_calc_data_all_sessions['net_orders_count_snapshot'].sum()/funnel_coverage_calc_data_all_sessions[
    'net_orders_count_snapshot_all'].sum()


print("surged orders % contribution by these clusters, week period and time peirod combinations during all sessions")
print("-------------------------------------------------")
print("FE Contribution %",fe_countribution)
print("RR Contribution %",rr_countribution)
print("Net Contribution %",net_countribution)


surged orders % contribution by these clusters, week period and time peirod combinations during all sessions
-------------------------------------------------
FE Contribution % 17.33565240640157
RR Contribution % 14.251653997747749
Net Contribution % 14.475685481057699


## Evening Peak

In [61]:
funnel_coverage_calc_data_ep = lh_clusters_filtered[lh_clusters_filtered['time_period'] == 'evening_peak']

fe_countribution = 100*funnel_coverage_calc_data_ep['fe_count'].sum()/funnel_coverage_calc_data_ep[
    'fe_count_all'].sum()
rr_countribution = 100*funnel_coverage_calc_data_ep['gross_order_count'].sum()/funnel_coverage_calc_data_ep[
    'gross_order_count_all'].sum()
net_countribution = 100*funnel_coverage_calc_data_ep['net_orders_count_snapshot'].sum()/funnel_coverage_calc_data_ep[
    'net_orders_count_snapshot_all'].sum()


print("% contribution by these clusters during evening peak")
print("-------------------------------------------------")
print("FE Contribution %",fe_countribution)
print("RR Contribution %",rr_countribution)
print("Net Contribution %",net_countribution)


% contribution by these clusters during evening peak
-------------------------------------------------
FE Contribution % 15.517395641065576
RR Contribution % 12.849117809953318
Net Contribution % 12.700936741066267


## Afternoon

In [62]:
funnel_coverage_calc_data_after = lh_clusters_filtered[lh_clusters_filtered['time_period'] == 'afternoon']

fe_countribution = 100*funnel_coverage_calc_data_after['fe_count'].sum()/funnel_coverage_calc_data_after[
    'fe_count_all'].sum()
rr_countribution = 100*funnel_coverage_calc_data_after['gross_order_count'].sum()/funnel_coverage_calc_data_after[
    'gross_order_count_all'].sum()
net_countribution = 100*funnel_coverage_calc_data_after['net_orders_count_snapshot'].sum()/funnel_coverage_calc_data_after[
    'net_orders_count_snapshot_all'].sum()


print("% contribution by these clusters during afternoon")
print("-------------------------------------------------")
print("FE Contribution %",fe_countribution)
print("RR Contribution %",rr_countribution)
print("Net Contribution %",net_countribution)


% contribution by these clusters during afternoon
-------------------------------------------------
FE Contribution % 19.11816807596013
RR Contribution % 15.542613584170786
Net Contribution % 16.38359110605054


## Morning Peak

In [63]:
funnel_coverage_calc_data_mp = lh_clusters_filtered[lh_clusters_filtered['time_period'] == 'morning_peak']

fe_countribution = 100*funnel_coverage_calc_data_mp['fe_count'].sum()/funnel_coverage_calc_data_mp[
    'fe_count_all'].sum()
rr_countribution = 100*funnel_coverage_calc_data_mp['gross_order_count'].sum()/funnel_coverage_calc_data_mp[
    'gross_order_count_all'].sum()
net_countribution = 100*funnel_coverage_calc_data_mp['net_orders_count_snapshot'].sum()/funnel_coverage_calc_data_mp[
    'net_orders_count_snapshot_all'].sum()


print("% contribution by these clusters during morning peak")
print("-------------------------------------------------")
print("FE Contribution %",fe_countribution)
print("RR Contribution %",rr_countribution)
print("Net Contribution %",net_countribution)


% contribution by these clusters during morning peak
-------------------------------------------------
FE Contribution % 16.182355282730303
RR Contribution % 14.325227613230378
Net Contribution % 14.67032967032967


### Shared sheet #

In [64]:
lh_clusters_filtered.head()

Unnamed: 0,pickup_cluster,orderdate,yyyymmdd,time_period,fe_count,gross_order_count,net_orders_count_snapshot,fe_count_all,gross_order_count_all,net_orders_count_snapshot_all,fe_contri_percent,gross_contri_percent,net_contri_percent,week_period,cluster_week_time
59,Adambakkam,2023-07-12,20230712,evening_peak,147,27,19,724,160,116,20.3039,16.875,16.3793,Wednesday,Adambakkam-Wednesday-evening_peak
81,Adambakkam,2023-07-19,20230719,evening_peak,4,0,0,633,126,94,0.6319,0.0,0.0,Wednesday,Adambakkam-Wednesday-evening_peak
356,Ambattur,2023-07-10,20230710,evening_peak,35,9,9,863,187,147,4.0556,4.8128,6.1224,Monday,Ambattur-Monday-evening_peak
358,Ambattur,2023-07-11,20230711,afternoon,244,42,25,991,203,136,24.6216,20.6897,18.3824,Tuesday,Ambattur-Tuesday-afternoon
359,Ambattur,2023-07-11,20230711,evening_peak,81,26,17,720,186,140,11.25,13.9785,12.1429,Tuesday,Ambattur-Tuesday-evening_peak


In [65]:
df_numbers_filtered = lh_clusters_filtered \
                        .groupby(['orderdate','time_period'])\
                        .agg({'fe_count' : 'sum',
                               'gross_order_count' : 'sum',
                               'net_orders_count_snapshot' : 'sum',
                               'fe_count_all' : 'sum',
                               'gross_order_count_all' : 'sum',
                               'net_orders_count_snapshot_all' : 'sum'
                              }).reset_index()

df_numbers_filtered['fe_contri_percent'] = (df_numbers_filtered['fe_count']*100/df_numbers_filtered['fe_count_all']).round()
df_numbers_filtered['gross_contri_percent'] = (df_numbers_filtered['gross_order_count']*100/df_numbers_filtered['gross_order_count_all']).round()
df_numbers_filtered['net_contri_percent'] = (df_numbers_filtered['net_orders_count_snapshot']*100/df_numbers_filtered['net_orders_count_snapshot_all']).round()
df_numbers_filtered

Unnamed: 0,orderdate,time_period,fe_count,gross_order_count,net_orders_count_snapshot,fe_count_all,gross_order_count_all,net_orders_count_snapshot_all,fe_contri_percent,gross_contri_percent,net_contri_percent
0,2023-07-10,afternoon,2167,281,148,17999,3023,1546,12.0,9.0,10.0
1,2023-07-10,evening_peak,1464,197,130,17355,3128,2078,8.0,6.0,6.0
2,2023-07-10,morning_peak,213,46,19,2058,413,193,10.0,11.0,10.0
3,2023-07-11,afternoon,3008,521,340,17963,3443,1964,17.0,15.0,17.0
4,2023-07-11,evening_peak,1799,292,195,17784,3532,2416,10.0,8.0,8.0
5,2023-07-11,morning_peak,632,101,66,1853,368,259,34.0,27.0,25.0
6,2023-07-12,afternoon,2020,314,199,15214,2738,1544,13.0,11.0,13.0
7,2023-07-12,evening_peak,4255,704,506,25719,5004,3559,17.0,14.0,14.0
8,2023-07-12,morning_peak,421,74,52,3044,605,395,14.0,12.0,13.0
9,2023-07-13,afternoon,8464,1241,738,21363,3777,2148,40.0,33.0,34.0


In [41]:
df_numbers_filtered.to_clipboard(index=False)