In [1]:
import os
import h3 as h3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy import stats
from pyhive import presto
from keplergl import KeplerGl
from datetime import datetime, timedelta

import warnings
warnings.filterwarnings('ignore')

In [2]:
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 300)

In [3]:
## Connection
connection = presto.connect(
        host='presto-gateway.serving.data.production.internal',
        port=80,
        protocol='http',
        catalog='hive',
        username='manoj.ravirajan@rapido.bike'
)## Parameter 

## Parameter 

In [4]:
city = 'Bangalore'
service = 'Link'

In [5]:
## datasets.service_mapping

service_mapping = f"""
        SELECT 
            city_display_name AS city,
            service_level AS service_name,
            service_detail_id,
            city_id,
            service_id
        FROM 
            datasets.service_mapping
        WHERE 
            city_display_name = '{city}'
            AND service_level = '{service}'
"""

df_service_mapping = pd.read_sql(service_mapping, connection)
df_service_mapping.head()

Unnamed: 0,city,service_name,service_detail_id,city_id,service_id
0,Bangalore,Link,57370b61a6855d70057417d1,572ca7ff116b5db3057bd814,572e29b0116b5db3057bd821


In [6]:
service_detail_id = df_service_mapping.service_detail_id.loc[0]
service_detail_id

'57370b61a6855d70057417d1'

## Dataset

In [7]:
pre_start_date = '20230724'
pre_end_date = '20230813'
post_start_date = '20230824'
post_end_date = '20230910'

In [8]:
## Experimental Hex's

experimental_hex = pd.read_csv('/Users/rapido/local-datasets/affluence/experiment/exp_affluence_bangalore_link_circuit_break_hex_8_list_v1.csv')
experimental_hex_list = experimental_hex[['pickup_cluster', 'pickup_hex_8','income_signal', 'affluence_tag','group_tc']]

In [9]:
experimental_hex_list.groupby(['affluence_tag', 'group_tc']).pickup_hex_8.count()

affluence_tag   group_tc
High Affluence  control     12
                test        13
Low Affluence   control     22
                test        23
Name: pickup_hex_8, dtype: int64

In [10]:
exp_hex_list = experimental_hex_list['pickup_hex_8'].values.tolist()
len(exp_hex_list)

70

In [11]:
## pricing.fare_estimates_enriched

raw_dataset = f"""

WITH city_cluster_hex AS (
        
            SELECT 
                city, 
                cluster,
                hex_id
            FROM 
                datasets.city_cluster_hex
            WHERE 
                city = '{city}'
                AND resolution = 8
        ),

        marketplace_mismatch_realtime AS (

            SELECT 
                yyyymmdd,
                quarter_hour,
                service_detail_id,
                service_level,
                hex_id,
                rr_counts as demand,
                idle_captain_counts_probabilistic as idle_captain_counts

            FROM 
                hive.datasets.marketplace_mismatch_realtime
            WHERE 
                -- yyyymmdd BETWEEN '{pre_start_date}' AND '{pre_end_date}'
                yyyymmdd BETWEEN '{post_start_date}' AND '{post_end_date}'
                AND service_detail_id = '{service_detail_id}'
        )

        SELECT
            yyyymmdd,
            CAST(DAY_OF_WEEK(DATE_PARSE(yyyymmdd, '%Y%m%d')) AS VARCHAR ) || '. ' || DATE_FORMAT(DATE_PARSE(yyyymmdd, '%Y%m%d'), '%W') weekday,
            CASE 
            WHEN CAST(SUBSTR(quarter_hour, 1,2) AS INT) >= 8 AND CAST(SUBSTR(quarter_hour, 1,2) AS INT) <= 11 THEN '1.Morning Peak'
            WHEN CAST(SUBSTR(quarter_hour, 1,2) AS INT) >= 17 AND CAST(SUBSTR(quarter_hour, 1,2) AS INT) <= 21 THEN '3.Evening Peak'
            WHEN CAST(SUBSTR(quarter_hour, 1,2) AS INT) > 11 AND CAST(SUBSTR(quarter_hour, 1,2) AS INT) < 17 THEN '2.Afternoon'
            ELSE '4.Rest' END AS time_period,
            quarter_hour,
            cch.city,
            service_level,
            service_detail_id,
            cluster,
            mm.hex_id,
            sum(demand) demand,
            sum(idle_captain_counts) supply,
            (sum(demand) - sum(idle_captain_counts)) mismatch
        FROM 
            marketplace_mismatch_realtime mm

        JOIN 
            city_cluster_hex cch 
            ON mm.hex_id = cch.hex_id

        GROUP BY 1,2,3,4,5,6,7,8,9
        
"""

In [12]:
# df_raw_dataset = pd.read_sql(raw_dataset, connection)
# df_raw_dataset.head(3)

In [13]:
#df_raw_dataset.to_csv('/Users/rapido/local-datasets/affluence/pre-post-analysis/mismatch_pre_raw_data_{}_to_{}.csv' \
#                                .format(pre_start_date, pre_end_date)
#                                , index = False)

'''
df_raw_dataset.to_csv('/Users/rapido/local-datasets/affluence/pre-post-analysis/mismatch_post_raw_data_{}_to_{}.csv' \
                                .format(post_start_date, post_end_date)
                               , index = False)
#'''

"\ndf_raw_dataset.to_csv('/Users/rapido/local-datasets/affluence/pre-post-analysis/mismatch_post_raw_data_{}_to_{}.csv'                                 .format(post_start_date, post_end_date)\n                               , index = False)\n#"

In [14]:
df_pre_raw_dataset = pd.read_csv('/Users/rapido/local-datasets/affluence/pre-post-analysis/mismatch_pre_raw_data_{}_to_{}.csv' \
                               .format(pre_start_date, pre_end_date))
df_post_raw_dataset = pd.read_csv('/Users/rapido/local-datasets/affluence/pre-post-analysis/mismatch_post_raw_data_{}_to_{}.csv' \
                               .format(post_start_date, post_end_date))

In [15]:
df_mismatch_pre_raw = df_pre_raw_dataset.copy(deep=True)
print(df_mismatch_pre_raw.shape)
df_mismatch_post_raw = df_post_raw_dataset.copy(deep=True)
print(df_mismatch_post_raw.shape)

(1175561, 12)
(1038702, 12)


In [16]:
df_mismatch_post_raw.head(2)

Unnamed: 0,yyyymmdd,weekday,time_period,quarter_hour,city,service_level,service_detail_id,cluster,hex_id,demand,supply,mismatch
0,20230909,6. Saturday,4.Rest,100,Bangalore,Link,57370b61a6855d70057417d1,Harlur,886189246dfffff,1.0,2.0,-1.0
1,20230909,6. Saturday,4.Rest,100,Bangalore,Link,57370b61a6855d70057417d1,Kadubeesanahalli,8861892095fffff,1.0,16.0,-15.0


In [17]:
df_mismatch_post_raw.yyyymmdd.unique()

array([20230909, 20230908, 20230910, 20230904, 20230905, 20230906,
       20230902, 20230903, 20230901, 20230907, 20230831, 20230830,
       20230829, 20230828, 20230827, 20230826, 20230825, 20230824])

In [18]:
## Experimental Hex's

experimental_hex = pd.read_csv('/Users/rapido/local-datasets/affluence/experiment/exp_affluence_bangalore_link_circuit_break_hex_8_list_v1.csv')
experimental_hex_list = experimental_hex[['pickup_hex_8', 'affluence_tag','group_tc']]
experimental_hex_list['group_tc'] = experimental_hex_list['group_tc'].str.upper()
experimental_hex_list.groupby(['affluence_tag', 'group_tc']).pickup_hex_8.count()

affluence_tag   group_tc
High Affluence  CONTROL     12
                TEST        13
Low Affluence   CONTROL     22
                TEST        23
Name: pickup_hex_8, dtype: int64

In [19]:
experimental_hex_list.head()

Unnamed: 0,pickup_hex_8,affluence_tag,group_tc
0,88618921d3fffff,High Affluence,TEST
1,88618921c7fffff,High Affluence,CONTROL
2,8861892665fffff,High Affluence,TEST
3,8860145a33fffff,High Affluence,CONTROL
4,8861892c97fffff,High Affluence,TEST


In [20]:
## Merge with exp hex's 

# pre
df_mismatch_pre = pd.merge(df_mismatch_pre_raw, \
                           experimental_hex_list, \
                           how = 'inner',
                           left_on = ['hex_id'],
                           right_on = ['pickup_hex_8']
                          )
print('pre-data')
print(df_mismatch_pre.groupby(['affluence_tag', 'group_tc']).pickup_hex_8.nunique())

print('--------------------------------')

# post
df_mismatch_post = pd.merge(df_mismatch_post_raw, \
                           experimental_hex_list, \
                           how = 'inner',
                           left_on = ['hex_id'],
                           right_on = ['pickup_hex_8']
                          )
print('post-data')
print(df_mismatch_post.groupby(['affluence_tag', 'group_tc']).pickup_hex_8.nunique())

pre-data
affluence_tag   group_tc
High Affluence  CONTROL     12
                TEST        13
Low Affluence   CONTROL     22
                TEST        23
Name: pickup_hex_8, dtype: int64
--------------------------------
post-data
affluence_tag   group_tc
High Affluence  CONTROL     12
                TEST        13
Low Affluence   CONTROL     22
                TEST        23
Name: pickup_hex_8, dtype: int64


## Refined data

In [21]:
df_mismatch_pre.head(2)

Unnamed: 0,yyyymmdd,weekday,time_period,quarter_hour,city,service_level,service_detail_id,cluster,hex_id,demand,supply,mismatch,pickup_hex_8,affluence_tag,group_tc
0,20230805,6. Saturday,1.Morning Peak,915,Bangalore,Link,57370b61a6855d70057417d1,Metro - Lalbagh,88618925bbfffff,2.0,1.0,1.0,88618925bbfffff,High Affluence,TEST
1,20230731,1. Monday,1.Morning Peak,1145,Bangalore,Link,57370b61a6855d70057417d1,Metro - Lalbagh,88618925bbfffff,0.0,3.0,-3.0,88618925bbfffff,High Affluence,TEST


In [22]:
df_mismatch_post.head(2)

Unnamed: 0,yyyymmdd,weekday,time_period,quarter_hour,city,service_level,service_detail_id,cluster,hex_id,demand,supply,mismatch,pickup_hex_8,affluence_tag,group_tc
0,20230909,6. Saturday,4.Rest,100,Bangalore,Link,57370b61a6855d70057417d1,Harlur,886189246dfffff,1.0,2.0,-1.0,886189246dfffff,High Affluence,TEST
1,20230908,5. Friday,2.Afternoon,1330,Bangalore,Link,57370b61a6855d70057417d1,Harlur,886189246dfffff,0.0,1.0,-1.0,886189246dfffff,High Affluence,TEST


In [23]:
print('yyyymmdd | hex_8 | quarter_hour | demand | supply | mismatch')

yyyymmdd | hex_8 | quarter_hour | demand | supply | mismatch


time period , day

In [24]:
df_mismatch_post['mismatch_flag'] = np.where(df_mismatch_post['mismatch'] <= 0 , 0 , 1)
df_mismatch_pre['mismatch_flag'] = np.where(df_mismatch_pre['mismatch'] <= 0 , 0 , 1)

## Analysis

In [25]:
## days 

pre_days = df_mismatch_pre.yyyymmdd.nunique()
post_days = df_mismatch_post.yyyymmdd.nunique()
print(pre_days)
print(post_days)

21
18


## Day level

In [34]:
## pre
df_analysis_3 = df_mismatch_pre \
                .groupby(['affluence_tag','group_tc']) \
                .agg(
                    demand_sum = pd.NamedAgg('demand', 'sum'),
                    supply_sum = pd.NamedAgg('supply', 'sum'),
                    mismatch_sum = pd.NamedAgg('mismatch_flag', 'sum')
                    ).reset_index()


df_analysis_3['demand_per_day'] = (df_analysis_3['demand_sum']/pre_days).round(2)
df_analysis_3['supply_per_day'] = (df_analysis_3['supply_sum']/pre_days).round(2)
df_analysis_3['mismatch_qr_per_day'] = (df_analysis_3['mismatch_sum']/pre_days).round(2)

## post 
df_analysis_4 = df_mismatch_post \
                .groupby(['affluence_tag','group_tc']) \
                .agg(
                    demand_sum = pd.NamedAgg('demand', 'sum'),
                    supply_sum = pd.NamedAgg('supply', 'sum'),
                    mismatch_sum = pd.NamedAgg('mismatch_flag', 'sum')
                    ).reset_index()


df_analysis_4['demand_per_day'] = (df_analysis_4['demand_sum']/post_days).round(2)
df_analysis_4['supply_per_day'] = (df_analysis_4['supply_sum']/post_days).round(2)
df_analysis_4['mismatch_qr_per_day'] = (df_analysis_4['mismatch_sum']/post_days).round(2)

## Adding pre post to column 
def add_pre_post():
    
    ##vpre
    pre_new_column_names = [col + "_pre" for col in df_analysis_3.columns]
    df_analysis_3.columns = pre_new_column_names
    
    ## post
    post_new_column_names = [col + "_post" for col in df_analysis_4.columns]
    df_analysis_4.columns = post_new_column_names
    
add_pre_post()

## merge 
df_day_level_dsm = pd.merge(df_analysis_3,
                            df_analysis_4,
                            how='inner',
                            left_on = ['affluence_tag_pre','group_tc_pre'],
                            right_on = ['affluence_tag_post','group_tc_post']
                           )

df_day_level_dsm.rename(columns = {'affluence_tag_pre' : 'affluence', 
                         'group_tc_pre' : 'group_tc'}, inplace = True)

df_day_level_dsm = df_day_level_dsm[['affluence', 'group_tc',
                                     'demand_per_day_pre', 'demand_per_day_post',
                                     'supply_per_day_pre', 'supply_per_day_post',
                                     'mismatch_qr_per_day_pre', 'mismatch_qr_per_day_post'
                                    ]]

df_day_level_dsm['demand_pd_delta'] = df_day_level_dsm['demand_per_day_post'] - df_day_level_dsm['demand_per_day_pre']
df_day_level_dsm['mismatch_qr_pd_delta'] = df_day_level_dsm['mismatch_qr_per_day_post'] - df_day_level_dsm['mismatch_qr_per_day_pre']

df_day_level_dsm

Unnamed: 0,affluence,group_tc,demand_per_day_pre,demand_per_day_post,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,demand_pd_delta,mismatch_qr_pd_delta
0,High Affluence,CONTROL,3372.67,3508.83,2102.71,1929.06,461.19,524.61,136.16,63.42
1,High Affluence,TEST,3237.05,3261.78,2090.95,1845.67,507.76,561.22,24.73,53.46
2,Low Affluence,CONTROL,3805.52,4117.44,2515.19,2270.83,842.57,934.72,311.92,92.15
3,Low Affluence,TEST,3549.67,4211.61,2871.43,2681.06,645.24,772.67,661.94,127.43


In [35]:
df_day_level_dsm.to_clipboard(index=False)

## Time period

In [36]:
## pre

df_analysis_1 = df_mismatch_pre \
                .groupby(['affluence_tag','group_tc','time_period']) \
                .agg(
                    demand_sum = pd.NamedAgg('demand', 'sum'),
                    supply_sum = pd.NamedAgg('supply', 'sum'),
                    mismatch_sum = pd.NamedAgg('mismatch_flag', 'sum')
                    ).reset_index()


df_analysis_1['demand_per_day'] = (df_analysis_1['demand_sum']/pre_days).round(2)
df_analysis_1['supply_per_day'] = (df_analysis_1['supply_sum']/pre_days).round(2)
df_analysis_1['mismatch_qr_per_day'] = (df_analysis_1['mismatch_sum']/pre_days).round(2)

## post 
df_analysis_2 = df_mismatch_post \
                .groupby(['affluence_tag','group_tc', 'time_period']) \
                .agg(
                    demand_sum = pd.NamedAgg('demand', 'sum'),
                    supply_sum = pd.NamedAgg('supply', 'sum'),
                    mismatch_sum = pd.NamedAgg('mismatch_flag', 'sum')
                    ).reset_index()


df_analysis_2['demand_per_day'] = (df_analysis_2['demand_sum']/post_days).round(2)
df_analysis_2['supply_per_day'] = (df_analysis_2['supply_sum']/post_days).round(2)
df_analysis_2['mismatch_qr_per_day'] = (df_analysis_2['mismatch_sum']/post_days).round(2)

## Adding pre post to column 
def add_pre_post():
    
    ##vpre
    pre_new_column_names = [col + "_pre" for col in df_analysis_1.columns]
    df_analysis_1.columns = pre_new_column_names
    
    ## post
    post_new_column_names = [col + "_post" for col in df_analysis_2.columns]
    df_analysis_2.columns = post_new_column_names
    
add_pre_post()


## merge 
df_time_period_level_dsm = pd.merge(df_analysis_1,
                                    df_analysis_2,
                                    how='inner',
                                    left_on = ['affluence_tag_pre','group_tc_pre', 'time_period_pre'],
                                    right_on = ['affluence_tag_post','group_tc_post', 'time_period_post']
                                   )

df_time_period_level_dsm.rename(columns = {'affluence_tag_pre' : 'affluence', 
                                           'group_tc_pre' : 'group_tc',
                                           'time_period_pre' : 'time_period'
                                          }, inplace = True)

df_time_period_level_dsm = df_time_period_level_dsm[['affluence', 'group_tc', 'time_period',
                                     'demand_per_day_pre', 'demand_per_day_post',
                                     'supply_per_day_pre', 'supply_per_day_post',
                                     'mismatch_qr_per_day_pre', 'mismatch_qr_per_day_post'
                                    ]]

df_time_period_level_dsm['demand_pd_delta'] = df_time_period_level_dsm['demand_per_day_post'] - df_time_period_level_dsm['demand_per_day_pre']
df_time_period_level_dsm['mismatch_qr_pd_delta'] = df_time_period_level_dsm['mismatch_qr_per_day_post'] - df_time_period_level_dsm['mismatch_qr_per_day_pre']


df_time_period_level_dsm = df_time_period_level_dsm.sort_values(['time_period','affluence','group_tc'])
df_time_period_level_dsm

Unnamed: 0,affluence,group_tc,time_period,demand_per_day_pre,demand_per_day_post,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,demand_pd_delta,mismatch_qr_pd_delta
0,High Affluence,CONTROL,1.Morning Peak,853.33,795.22,324.43,308.39,135.19,137.61,-58.11,2.42
4,High Affluence,TEST,1.Morning Peak,994.33,924.39,244.57,213.22,157.67,160.11,-69.94,2.44
8,Low Affluence,CONTROL,1.Morning Peak,1136.76,1125.83,355.29,308.06,244.71,254.28,-10.93,9.57
12,Low Affluence,TEST,1.Morning Peak,1006.24,1128.11,478.67,425.83,208.9,224.72,121.87,15.82
1,High Affluence,CONTROL,2.Afternoon,714.14,775.78,613.48,492.61,108.95,138.5,61.64,29.55
5,High Affluence,TEST,2.Afternoon,722.81,753.28,632.29,493.78,123.48,148.44,30.47,24.96
9,Low Affluence,CONTROL,2.Afternoon,995.62,1109.67,785.38,567.94,201.95,253.5,114.05,51.55
13,Low Affluence,TEST,2.Afternoon,868.9,1009.89,876.71,646.39,148.1,205.94,140.99,57.84
2,High Affluence,CONTROL,3.Evening Peak,1220.9,1298.33,602.57,550.94,123.62,141.5,77.43,17.88
6,High Affluence,TEST,3.Evening Peak,1103.0,1143.89,722.0,640.5,134.95,150.28,40.89,15.33


In [37]:
df_time_period_level_dsm.to_clipboard(index=False)

## Week day

In [30]:
## pre

df_analysis_5 = df_mismatch_pre \
                .groupby(['affluence_tag','group_tc','weekday']) \
                .agg(
                    demand_sum = pd.NamedAgg('demand', 'sum'),
                    supply_sum = pd.NamedAgg('supply', 'sum'),
                    mismatch_sum = pd.NamedAgg('mismatch_flag', 'sum')
                    ).reset_index()


df_analysis_5['demand_per_day'] = (df_analysis_5['demand_sum']/pre_days).round(2)
df_analysis_5['supply_per_day'] = (df_analysis_5['supply_sum']/pre_days).round(2)
df_analysis_5['mismatch_qr_per_day'] = (df_analysis_5['mismatch_sum']/pre_days).round(2)

## post 
df_analysis_6 = df_mismatch_post \
                .groupby(['affluence_tag','group_tc', 'weekday']) \
                .agg(
                    demand_sum = pd.NamedAgg('demand', 'sum'),
                    supply_sum = pd.NamedAgg('supply', 'sum'),
                    mismatch_sum = pd.NamedAgg('mismatch_flag', 'sum')
                    ).reset_index()


df_analysis_6['demand_per_day'] = (df_analysis_6['demand_sum']/post_days).round(2)
df_analysis_6['supply_per_day'] = (df_analysis_6['supply_sum']/post_days).round(2)
df_analysis_6['mismatch_qr_per_day'] = (df_analysis_6['mismatch_sum']/post_days).round(2)

## Adding pre post to column 
def add_pre_post():
    
    ##vpre
    pre_new_column_names = [col + "_pre" for col in df_analysis_5.columns]
    df_analysis_5.columns = pre_new_column_names
    
    ## post
    post_new_column_names = [col + "_post" for col in df_analysis_6.columns]
    df_analysis_6.columns = post_new_column_names
    
add_pre_post()


## merge 
df_weeday_level_dsm = pd.merge(df_analysis_5,
                                    df_analysis_6,
                                    how='inner',
                                    left_on = ['affluence_tag_pre','group_tc_pre', 'weekday_pre'],
                                    right_on = ['affluence_tag_post','group_tc_post', 'weekday_post']
                                   )

df_weeday_level_dsm.rename(columns = {'affluence_tag_pre' : 'affluence', 
                                           'group_tc_pre' : 'group_tc',
                                           'weekday_pre' : 'weekday'
                                          }, inplace = True)

df_weeday_level_dsm = df_weeday_level_dsm[['affluence', 'group_tc', 'weekday',
                                     'demand_per_day_pre', 'demand_per_day_post',
                                     'supply_per_day_pre', 'supply_per_day_post',
                                     'mismatch_qr_per_day_pre', 'mismatch_qr_per_day_post'
                                    ]]

df_weeday_level_dsm['demand_pd_delta'] = df_weeday_level_dsm['demand_per_day_post'] - df_weeday_level_dsm['demand_per_day_pre']
df_weeday_level_dsm['mismatch_qr_pd_delta'] = df_weeday_level_dsm['mismatch_qr_per_day_post'] - df_weeday_level_dsm['mismatch_qr_per_day_pre']


df_weeday_level_dsm = df_weeday_level_dsm.sort_values(['weekday','affluence','group_tc'])
df_weeday_level_dsm

Unnamed: 0,affluence,group_tc,weekday,demand_per_day_pre,demand_per_day_post,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,demand_pd_delta,mismatch_qr_pd_delta
0,High Affluence,CONTROL,1. Monday,547.29,449.5,255.1,200.56,71.57,60.56,-97.79,-11.01
7,High Affluence,TEST,1. Monday,474.52,392.22,263.76,201.39,73.62,63.56,-82.3,-10.06
14,Low Affluence,CONTROL,1. Monday,588.38,489.78,315.05,240.0,131.38,108.17,-98.6,-23.21
21,Low Affluence,TEST,1. Monday,567.81,575.39,334.1,263.83,103.14,91.94,7.58,-11.2
1,High Affluence,CONTROL,2. Tuesday,490.33,395.22,236.14,176.11,70.14,60.5,-95.11,-9.64
8,High Affluence,TEST,2. Tuesday,479.81,360.89,225.1,154.33,75.57,61.44,-118.92,-14.13
15,Low Affluence,CONTROL,2. Tuesday,529.14,432.61,289.1,198.44,120.86,104.5,-96.53,-16.36
22,Low Affluence,TEST,2. Tuesday,492.1,449.5,282.95,206.56,101.43,89.94,-42.6,-11.49
2,High Affluence,CONTROL,3. Wednesday,489.71,428.94,262.62,189.33,67.1,60.67,-60.77,-6.43
9,High Affluence,TEST,3. Wednesday,494.9,387.0,240.24,160.17,73.19,63.89,-107.9,-9.3


In [31]:
df_weeday_level_dsm.to_clipboard(index=False)

## Hex

In [38]:
## pre

df_analysis_7 = df_mismatch_pre \
                .groupby(['affluence_tag','group_tc','cluster','hex_id']) \
                .agg(
                    demand_sum = pd.NamedAgg('demand', 'sum'),
                    supply_sum = pd.NamedAgg('supply', 'sum'),
                    mismatch_sum = pd.NamedAgg('mismatch_flag', 'sum')
                    ).reset_index()


df_analysis_7['demand_per_day'] = (df_analysis_7['demand_sum']/pre_days).round(2)
df_analysis_7['supply_per_day'] = (df_analysis_7['supply_sum']/pre_days).round(2)
df_analysis_7['mismatch_qr_per_day'] = (df_analysis_7['mismatch_sum']/pre_days).round(2)

## post 
df_analysis_8 = df_mismatch_post \
                .groupby(['affluence_tag','group_tc','cluster','hex_id']) \
                .agg(
                    demand_sum = pd.NamedAgg('demand', 'sum'),
                    supply_sum = pd.NamedAgg('supply', 'sum'),
                    mismatch_sum = pd.NamedAgg('mismatch_flag', 'sum')
                    ).reset_index()


df_analysis_8['demand_per_day'] = (df_analysis_8['demand_sum']/post_days).round(2)
df_analysis_8['supply_per_day'] = (df_analysis_8['supply_sum']/post_days).round(2)
df_analysis_8['mismatch_qr_per_day'] = (df_analysis_8['mismatch_sum']/post_days).round(2)

## Adding pre post to column 
def add_pre_post():
    
    ##vpre
    pre_new_column_names = [col + "_pre" for col in df_analysis_7.columns]
    df_analysis_7.columns = pre_new_column_names
    
    ## post
    post_new_column_names = [col + "_post" for col in df_analysis_8.columns]
    df_analysis_8.columns = post_new_column_names
    
add_pre_post()


## merge 
df_hex_level_dsm = pd.merge(df_analysis_7,
                                    df_analysis_8,
                                    how='inner',
                                    left_on = ['affluence_tag_pre','group_tc_pre', 'cluster_pre', 'hex_id_pre'],
                                    right_on = ['affluence_tag_post','group_tc_post', 'cluster_post', 'hex_id_post']
                                   )

df_hex_level_dsm.rename(columns = {'affluence_tag_pre' : 'affluence', 
                                           'group_tc_pre' : 'group_tc',
                                           'cluster_pre' : 'cluster',
                                           'hex_id_pre' : 'hex_id'
                                          }, inplace = True)

df_hex_level_dsm = df_hex_level_dsm[['affluence', 'group_tc', 'cluster', 'hex_id',
                                     'demand_per_day_pre', 'demand_per_day_post',
                                     'supply_per_day_pre', 'supply_per_day_post',
                                     'mismatch_qr_per_day_pre', 'mismatch_qr_per_day_post'
                                    ]]

df_hex_level_dsm['demand_pd_delta'] = df_hex_level_dsm['demand_per_day_post'] - df_hex_level_dsm['demand_per_day_pre']
df_hex_level_dsm['mismatch_qr_pd_delta'] = df_hex_level_dsm['mismatch_qr_per_day_post'] - df_hex_level_dsm['mismatch_qr_per_day_pre']


df_hex_level_dsm = df_hex_level_dsm.sort_values(['affluence','group_tc','cluster','hex_id'])
df_hex_level_dsm

Unnamed: 0,affluence,group_tc,cluster,hex_id,demand_per_day_pre,demand_per_day_post,supply_per_day_pre,supply_per_day_post,mismatch_qr_per_day_pre,mismatch_qr_per_day_post,demand_pd_delta,mismatch_qr_pd_delta
0,High Affluence,CONTROL,Akshaynagar,88618926adfffff,96.57,97.89,69.9,67.56,24.19,26.89,1.32,2.7
1,High Affluence,CONTROL,Banashankri North,8860145a61fffff,125.81,146.22,44.29,24.28,46.81,54.67,20.41,7.86
2,High Affluence,CONTROL,Chanasandra,88618921c7fffff,125.76,124.89,113.19,121.22,27.24,29.5,-0.87,2.26
3,High Affluence,CONTROL,Electronic City,8861892639fffff,345.33,356.83,289.33,293.83,32.24,38.5,11.5,6.26
4,High Affluence,CONTROL,Horamavu,8861892c1dfffff,107.05,101.67,114.81,89.39,28.62,30.28,-5.38,1.66
5,High Affluence,CONTROL,Kammanahalli HRBR Layout,8861892ea5fffff,346.14,356.17,264.14,203.44,47.14,55.72,10.03,8.58
6,High Affluence,CONTROL,Mysore rd,8860145a33fffff,82.43,99.11,82.14,66.39,20.19,31.33,16.68,11.14
7,High Affluence,CONTROL,Ramamurthy Nagar,8861892e37fffff,188.38,181.72,83.24,61.56,44.9,50.0,-6.66,5.1
8,High Affluence,CONTROL,Thanisandra,8861892cbdfffff,144.62,157.17,136.19,112.56,32.76,38.22,12.55,5.46
9,High Affluence,CONTROL,Venkatapura,88618925c9fffff,992.19,1047.5,425.71,390.22,69.33,74.89,55.31,5.56


In [39]:
df_hex_level_dsm.to_clipboard(index=False)

## Hex + Time period

In [None]:
## pre

df_analysis_9 = df_mismatch_pre \
                .groupby(['affluence_tag','group_tc','cluster','hex_id', 'time_period']) \
                .agg(
                    demand_sum = pd.NamedAgg('demand', 'sum'),
                    supply_sum = pd.NamedAgg('supply', 'sum'),
                    mismatch_sum = pd.NamedAgg('mismatch_flag', 'sum')
                    ).reset_index()


df_analysis_9['demand_per_day'] = (df_analysis_9['demand_sum']/pre_days).round(2)
df_analysis_9['supply_per_day'] = (df_analysis_9['supply_sum']/pre_days).round(2)
df_analysis_9['mismatch_qr_per_day'] = (df_analysis_9['mismatch_sum']/pre_days).round(2)

## post 
df_analysis_8 = df_mismatch_post \
                .groupby(['affluence_tag','group_tc','cluster','hex_id', 'time_period']) \
                .agg(
                    demand_sum = pd.NamedAgg('demand', 'sum'),
                    supply_sum = pd.NamedAgg('supply', 'sum'),
                    mismatch_sum = pd.NamedAgg('mismatch_flag', 'sum')
                    ).reset_index()


df_analysis_8['demand_per_day'] = (df_analysis_8['demand_sum']/post_days).round(2)
df_analysis_8['supply_per_day'] = (df_analysis_8['supply_sum']/post_days).round(2)
df_analysis_8['mismatch_qr_per_day'] = (df_analysis_8['mismatch_sum']/post_days).round(2)

## Adding pre post to column 
def add_pre_post():
    
    ##vpre
    pre_new_column_names = [col + "_pre" for col in df_analysis_9.columns]
    df_analysis_9.columns = pre_new_column_names
    
    ## post
    post_new_column_names = [col + "_post" for col in df_analysis_8.columns]
    df_analysis_8.columns = post_new_column_names
    
add_pre_post()


## merge 
df_hex_level_dsm = pd.merge(df_analysis_7,
                                    df_analysis_8,
                                    how='inner',
                                    left_on = ['affluence_tag_pre','group_tc_pre', 'cluster_pre', 'hex_id_pre'],
                                    right_on = ['affluence_tag_post','group_tc_post', 'cluster_post', 'hex_id_post']
                                   )

df_hex_level_dsm.rename(columns = {'affluence_tag_pre' : 'affluence', 
                                           'group_tc_pre' : 'group_tc',
                                           'cluster_pre' : 'cluster',
                                           'hex_id_pre' : 'hex_id'
                                          }, inplace = True)

df_hex_level_dsm = df_hex_level_dsm[['affluence', 'group_tc', 'cluster', 'hex_id',
                                     'demand_per_day_pre', 'demand_per_day_post',
                                     'supply_per_day_pre', 'supply_per_day_post',
                                     'mismatch_qr_per_day_pre', 'mismatch_qr_per_day_post'
                                    ]]

df_hex_level_dsm['demand_pd_delta'] = df_hex_level_dsm['demand_per_day_post'] - df_hex_level_dsm['demand_per_day_pre']
df_hex_level_dsm['mismatch_qr_pd_delta'] = df_hex_level_dsm['mismatch_qr_per_day_post'] - df_hex_level_dsm['mismatch_qr_per_day_pre']


df_hex_level_dsm = df_hex_level_dsm.sort_values(['affluence','group_tc','cluster','hex_id'])
df_hex_level_dsm