Note: Multiplier has been taken as it is. For averages and ratios it doesn't affect the analysis. To use absolute population values, use multiplier/100 instead

# Importing libraries

In [41]:
import pandas as pd
import numpy as np
import pathlib
import pickle
from functools import reduce

In [42]:
pd.set_option('display.max_columns', None)

# Importing datasets

In [43]:
# Checking paths
pathlib.Path("../../HCES 2023-24/Python implementation/Data extraction/Population based MPCE/MPCE 2023_24 2Apr25.dta").resolve().exists()

True

In [44]:
# Reading HH-wise MPCE
path_mpce = "../../HCES 2023-24/Python implementation/Data extraction/Population based MPCE/MPCE 2023_24 2Apr25.dta"
df_mpce = pd.read_stata(path_mpce)
df_mpce = df_mpce.set_index('hhid')
df_mpce = df_mpce[['hh_size', 'sector', 'mpce', 'decile']]
display(df_mpce.head())

## Level 01 file
path_lvl01 = "../../HCES 2023-24/Dta raw files//level_01.dta"
df_lvl01 = pd.read_stata(path_lvl01)
df_lvl01 = df_lvl01.set_index('hhid')
df_lvl01 = df_lvl01[['state']]
display(df_lvl01.head())

## Level 03 file
path_lvl03 = "../../HCES 2023-24/Dta raw files//level_03.dta"
df_lvl03 = pd.read_stata(path_lvl03)
df_lvl03 = df_lvl03.set_index('hhid')
df_lvl03 = df_lvl03[['source_cooking', 'multiplier']]
display(df_lvl03.head())

## Level 07 file
path_lvl07 = "../../HCES 2023-24/Dta raw files/level_07.dta"
df_lvl07 = pd.read_stata(path_lvl07)
df_lvl07 = df_lvl07.set_index('hhid')
df_lvl07 = df_lvl07[['received_subsidy_lpg', 'num_subsidized_lpg']]
display(df_lvl07.head())

## Level 08 file
path_lvl08 = "../../HCES 2023-24/Dta raw files/level_08.dta"
df_lvl08 = pd.read_stata(path_lvl08)
df_lvl08 = df_lvl08.loc[df_lvl08['item_code'].isin([331,338])]

Unnamed: 0_level_0,hh_size,sector,mpce,decile
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
22300101,4.0,2,9407.24,9
22300201,4.0,2,25398.56,10
22300202,2.0,2,11762.5,10
22300203,2.0,2,11579.24,10
22300204,2.0,2,15350.1,10


Unnamed: 0_level_0,state
hhid,Unnamed: 1_level_1
46667201,1
46667301,1
46667302,1
46667303,1
46667304,1


Unnamed: 0_level_0,source_cooking,multiplier
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1
22300101,2,57436
22300201,2,27497
22300202,2,27497
22300203,2,27497
22300204,2,27497


Unnamed: 0_level_0,received_subsidy_lpg,num_subsidized_lpg
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1
22300101,1,2.0
22300201,1,2.0
22300202,1,1.0
22300203,2,
22300204,1,1.0


# Data wrangling

In [45]:
df_lvl08.head()

Unnamed: 0,index,hhid,questionnaire_num,level,item_code,cons_home_qty,cons_home_value,cons_total_qty,cons_total_value,source,multiplier
5,5,22300101,C,8,338,,,14.2,860.0,1.0,57436
9,9,22300201,C,8,338,,,14.2,860.0,1.0,27497
16,16,22300202,C,8,338,,,9.5,640.0,1.0,27497
23,23,22300203,C,8,338,,,9.0,720.0,1.0,27497
26,26,22300204,C,8,338,,,9.4,570.0,1.0,27497


In [46]:
# Reshaping level 08 file for firewood and LPG

## Pivoting
df_lvl08 = df_lvl08.pivot_table(index = 'hhid', columns = 'item_code', values = ['cons_total_qty','cons_total_value', 'cons_home_qty', 'cons_home_value' ])

## Converting columns to single level index
df_lvl08.columns = ["_".join(map(str, x)) for x in df_lvl08.columns]

## Changing column names
df_lvl08 = df_lvl08.rename(columns = {'cons_total_qty_331': 'firewood_qty', 'cons_total_qty_338': 'lpg_qty',
                               'cons_total_value_331': 'firewood_value', 'cons_total_value_338': 'lpg_value',
                               'cons_home_qty_331': 'firewood_home_qty', 'cons_home_value_331': 'firewood_home_value',
                               'cons_home_qty_338': 'lpg_home_qty', 'cons_home_value_338': 'lpg_home_value'})
df_lvl08.head()

# Adding market quantity and value
df_lvl08 = df_lvl08.fillna(0)
df_lvl08[['firewood_mkt_qty', 'firewood_mkt_value']] = df_lvl08[['firewood_qty', 'firewood_value' ]].values - df_lvl08[['firewood_home_qty', 'firewood_home_value' ]].values
df_lvl08[['lpg_mkt_qty', 'lpg_mkt_value']] = df_lvl08[['lpg_qty', 'lpg_value' ]].values - df_lvl08[['lpg_home_qty', 'lpg_home_value' ]].values

In [47]:
df_lvl08.head()

Unnamed: 0_level_0,firewood_home_qty,lpg_home_qty,firewood_home_value,lpg_home_value,firewood_qty,lpg_qty,firewood_value,lpg_value,firewood_mkt_qty,firewood_mkt_value,lpg_mkt_qty,lpg_mkt_value
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
22300101,0.0,0.0,0.0,0.0,0.0,14.2,0.0,860.0,0.0,0.0,14.2,860.0
22300201,0.0,0.0,0.0,0.0,0.0,14.2,0.0,860.0,0.0,0.0,14.2,860.0
22300202,0.0,0.0,0.0,0.0,0.0,9.5,0.0,640.0,0.0,0.0,9.5,640.0
22300203,0.0,0.0,0.0,0.0,0.0,9.0,0.0,720.0,0.0,0.0,9.0,720.0
22300204,0.0,0.0,0.0,0.0,0.0,9.4,0.0,570.0,0.0,0.0,9.4,570.0


In [48]:
# Merging all files

## Checking shapes
print(f"Shape of MPCE file: {df_mpce.shape}")
print(f"Shape of level 01 file: {df_lvl01.shape}")
print(f"Shape of level 03 file: {df_lvl03.shape}")
print(f"Shape of level 07 file: {df_lvl07.shape}")
print(f"Shape of level 08 file: {df_lvl08.shape}")

## Defining merge function
def merge_df (left_df, right_df):
    df = pd.merge(left_df, right_df, how = 'outer', left_index=True, right_index=True, indicator=True)
    print(f"\nSummary from merging dataframes")
    display(df['_merge'].value_counts())
    df = df.drop(labels = '_merge', axis = 1)
    return df.copy()

## Merging
df = reduce(merge_df, [df_mpce, df_lvl01, df_lvl03, df_lvl07, df_lvl08])

Shape of MPCE file: (261953, 4)
Shape of level 01 file: (261953, 1)
Shape of level 03 file: (261953, 2)
Shape of level 07 file: (261953, 2)
Shape of level 08 file: (251119, 12)

Summary from merging dataframes


_merge
both          261953
left_only          0
right_only         0
Name: count, dtype: int64


Summary from merging dataframes


_merge
both          261953
left_only          0
right_only         0
Name: count, dtype: int64


Summary from merging dataframes


_merge
both          261953
left_only          0
right_only         0
Name: count, dtype: int64


Summary from merging dataframes


_merge
both          251119
left_only      10834
right_only         0
Name: count, dtype: int64

In [49]:
# Renaming column values
df['sector'] = df['sector'].replace({1: 'rural', 2: 'urban'})
df['state'] = df['state'].replace({1: 'Jammu & Kashmir', 2: 'Himachal Pradesh', 3: 'Punjab', 4: 'Chandigarh(U.T.)', 5: 'Uttrakhand', 6: 'Haryana', 7: 'Delhi', 8: 'Rajasthan', 9: 'Uttar Pradesh', 10: 'Bihar', 11: 'Sikkim', 12: 'Arunachal Pradesh', 13: 'Nagaland', 14: 'Manipur', 15: 'Mizoram', 16: 'Tripura', 17: 'Meghalaya', 18: 'Assam', 19: 'West Bengal', 20: 'Jharkhand', 21: 'Odisha', 22: 'Chattisgarh', 23: 'Madhya Pradesh', 24: 'Gujarat', 25: 'Dadra & Nagar Haveli', 27: 'Maharashtra', 28: 'Andhra Pradesh', 29: 'Karnataka', 30: 'Goa', 31: 'Lakshadweep (U.T.)', 32: 'Kerala', 33: 'Tamilnadu', 34: 'Puducherry (U.T.)', 35: 'A and N Islands (U.T.)', 36: 'Telangana', 37: 'Ladakh (U.T.)'})
df['source_cooking'] = df['source_cooking'].replace({1: "firewood and chips", 2: "LPG", 3: "other natural gas", 4: "dung cake", 5: "kerosene", 6: "coke, coal", 7: "gobar gas", 8: "other biogas", 9: "others", 10: "charcoal", 11: "electricity", 12: "no cooking arrangement"})
df['received_subsidy_lpg'] = df['received_subsidy_lpg'].replace({1: 'yes', 2: 'no'})

# Analysis

## Primary cooking source

In [50]:
# Across India

## Calculating
trial = df.groupby(['source_cooking'], as_index=False).apply(lambda table: pd.Series(
    {'popn_size': table['multiplier'].sum()/100,
     'sample_size': table.shape[0]}
), include_groups = False)
display(trial)

Unnamed: 0,source_cooking,popn_size,sample_size
0,LPG,175265100.0,168581.0
1,charcoal,26259.5,117.0
2,"coke, coal",884585.2,978.0
3,dung cake,4354240.0,3179.0
4,electricity,299683.6,344.0
5,firewood and chips,95070380.0,77802.0
6,gobar gas,149405.4,107.0
7,kerosene,106684.1,103.0
8,no cooking arrangement,8773421.0,5180.0
9,other biogas,9156.57,8.0


In [51]:
# Primary cooking source (Rural vs Urban)

## Calculating
trial = df.groupby(['sector','source_cooking']).apply(lambda table: pd.Series(
    {'popn_size': table['multiplier'].sum()/100,
     'sample_size': table.shape[0]}
), include_groups = False)
trial = trial.reset_index()

## Pivoting
trial = trial.pivot_table(values = 'popn_size', index = ['sector'], columns = 'source_cooking')
display(trial)

source_cooking,LPG,charcoal,"coke, coal",dung cake,electricity,firewood and chips,gobar gas,kerosene,no cooking arrangement,other biogas,other natural gas,others
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
rural,93327942.94,8082.87,515222.95,4196454.51,138373.02,90449967.65,145158.3,10495.84,2108254.86,6982.48,205417.26,2671490.57
urban,81937108.02,18176.63,369362.26,157785.26,161310.56,4620413.39,4247.07,96188.27,6665166.33,2174.09,3561192.13,294110.59


In [52]:
# Primary cooking source (Rural vs Urban by decile)

## Calculating
trial = df.groupby(['sector','decile','source_cooking'], as_index=False, observed=False).apply(lambda table: pd.Series(
    {'popn_size': table['multiplier'].sum()/100,
     'sample_size': table.shape[0]}
), include_groups = False)


## Pivoting
trial = trial.pivot_table (values = 'popn_size', index = ['sector', 'source_cooking'], columns = 'decile', observed = False)
display(trial)

Unnamed: 0_level_0,decile,1,2,3,4,5,6,7,8,9,10
sector,source_cooking,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
rural,LPG,2189765.78,3828172.08,5028726.58,6219698.07,7151531.07,8826065.03,10739811.66,12809591.86,15706122.18,20828458.63
rural,charcoal,,,,,,1302.49,908.59,3434.29,1218.0,1219.5
rural,"coke, coal",90426.03,91748.22,70877.5,71535.78,49133.07,42611.01,42708.64,19717.91,17262.71,19202.08
rural,dung cake,898728.58,634518.9,490343.12,454007.64,369850.0,384076.89,363462.39,273751.16,192191.84,135523.99
rural,electricity,209.83,3245.5,5802.49,5042.2,3188.69,5469.29,6031.89,6240.95,8586.75,94555.43
rural,firewood and chips,10870772.95,10838787.05,10466555.12,10315812.37,10054736.79,9371727.69,8654068.48,7833174.91,6554033.79,5490298.5
rural,gobar gas,1345.6,4783.1,4993.6,8361.8,14919.59,24380.26,12820.3,22002.58,34660.12,16891.35
rural,kerosene,,,,,,,,1625.18,3155.97,5714.69
rural,no cooking arrangement,864697.43,82095.14,46013.55,51174.04,36971.31,42277.05,61962.83,129620.5,199414.6,594028.41
rural,other biogas,,,,,,,,,2412.68,4569.8


## LPG subsidy - Last three months

In [53]:
# Across India
trial = df.groupby(['received_subsidy_lpg'], as_index=False).apply(lambda table: pd.Series(
    {'popn_size': table['multiplier'].sum()/100,
     'sample_size': table.shape[0]}
), include_groups = False)
display(trial)

Unnamed: 0,received_subsidy_lpg,popn_size,sample_size
0,no,204741100.0,182643.0
1,yes,86930010.0,79310.0


In [54]:
# LPG subsidy (Rural vs Urban)
trial = df.groupby(['sector','received_subsidy_lpg'], as_index=False).apply(lambda table: pd.Series(
    {'popn_size': table['multiplier'].sum()/100,
     'sample_size': table.shape[0]}
), include_groups = False)

# Pivoting 
trial = trial.pivot_table(values = 'popn_size', index = 'received_subsidy_lpg', columns = 'sector')
display(trial)

sector,rural,urban
received_subsidy_lpg,Unnamed: 1_level_1,Unnamed: 2_level_1
no,133960600.0,70780475.49
yes,59823250.0,27106759.11


In [55]:
# LPG subsidy (Rural vs Urban by decile)
trial = df.groupby(['sector','decile','received_subsidy_lpg'], as_index=False, observed=False).apply(lambda table: pd.Series(
    {'wt_sum': table['multiplier'].sum()/100,
     'sample_size': table.shape[0]}
), include_groups = False)

# Pivoting
trial = trial.pivot_table(values = 'wt_sum', index = ['sector','received_subsidy_lpg'], columns = 'decile', observed = False)
display(trial)

Unnamed: 0_level_0,decile,1,2,3,4,5,6,7,8,9,10
sector,received_subsidy_lpg,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
rural,no,13135768.9,12385611.69,12156714.56,12371412.1,12400514.47,12621816.85,13117519.61,13675031.94,14367288.77,17728913.76
rural,yes,1972371.21,3368949.97,4230677.36,5046727.83,5640280.04,6400944.3,7085626.74,7717991.07,8649423.31,9710258.77
urban,no,4593144.2,4781740.27,5129570.92,5707477.17,6160735.63,6469676.06,7012139.88,7709212.94,9077767.39,14139011.03
urban,yes,2326546.99,2638357.51,2845231.38,2824514.94,2727935.83,2926260.69,2938841.31,2926048.03,2742495.73,2210526.7


## LPG susbsidised cylinder - Last 3 months

In [56]:
# Across India

## Filtering for people who recieved subsidy
trial = df.copy()
trial = trial.loc[trial['received_subsidy_lpg'] == 'yes']

# Calculation
trial = trial.groupby('num_subsidized_lpg', as_index = False).apply(lambda table: pd.Series(
    {'popn_size': table['multiplier'].sum()/100,
     'sample_size': table.shape[0]}
), include_groups = False)
display(trial)

Unnamed: 0,num_subsidized_lpg,popn_size,sample_size
0,1.0,50561433.32,44578.0
1,2.0,26125750.02,24290.0
2,3.0,10242826.37,10442.0


In [57]:
# Rural vs Urban

## Filtering for people who recieved subsidy
trial = df.copy()
trial = trial.loc[trial['received_subsidy_lpg'] == 'yes']

# Calculation
trial = trial.groupby(['sector','num_subsidized_lpg']).apply(lambda table: pd.Series(
    {'popn_size': table['multiplier'].sum()/100,
     'sample_size': table.shape[0]}
), include_groups = False)
display(trial)

Unnamed: 0_level_0,Unnamed: 1_level_0,popn_size,sample_size
sector,num_subsidized_lpg,Unnamed: 2_level_1,Unnamed: 3_level_1
rural,1.0,37768724.69,28640.0
rural,2.0,16147548.68,12330.0
rural,3.0,5906977.23,4628.0
urban,1.0,12792708.63,15938.0
urban,2.0,9978201.34,11960.0
urban,3.0,4335849.14,5814.0


In [58]:
# Rural vs Urban across deciles

## Filtering for people who recieved subsidy
trial = df.copy()
trial = trial.loc[trial['received_subsidy_lpg'] == 'yes']

## Calculation
trial = trial.groupby(['sector', 'decile', 'num_subsidized_lpg'], observed = False).apply(lambda table: pd.Series(
    {'popn_size': table['multiplier'].sum()/100,
     'sample_size': table.shape[0]}
), include_groups = False)
trial = trial.reset_index()

# Pivotting
trial = trial.pivot_table(values = 'popn_size', index = ['sector', 'num_subsidized_lpg'], columns = 'decile',observed = False)
display(trial)

Unnamed: 0_level_0,decile,1,2,3,4,5,6,7,8,9,10
sector,num_subsidized_lpg,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
rural,1.0,1401071.76,2307225.55,2748154.71,3195400.48,3622143.6,4006361.55,4451812.55,4800677.95,5352931.92,5882944.62
rural,2.0,372198.76,719902.11,1026512.97,1284279.57,1437827.1,1785563.03,1958060.45,2207975.32,2521832.85,2833396.52
rural,3.0,199100.69,341822.31,456009.68,567047.78,580309.34,609019.72,675753.74,709337.8,774658.54,993917.63
urban,1.0,1082303.94,1145015.17,1280678.4,1310443.93,1296553.52,1346444.61,1450444.23,1431786.78,1385634.43,1063403.62
urban,2.0,763540.36,1004242.29,1055779.08,1042076.01,1015353.85,1146819.64,1057620.12,1128751.96,946953.98,817064.05
urban,3.0,480702.69,489100.05,508773.9,471995.0,416028.46,432996.44,430776.96,365509.29,409907.32,330059.03


## Average quantity and value of LPG and firewood consumed

* Values are reported for past 30 days

In [10]:
# Across India

## Duplicate df
trial = df.copy()

## Sumproduct of consumption
value_mult = pd.Series({
    'lpg_qty': (trial['lpg_qty'] * trial['multiplier']).sum(),
    'lpg_value':(trial['lpg_value'] * trial['multiplier']).sum(),
    'firewood_qty': (trial['firewood_qty'] * trial['multiplier']).sum(),
    'firewood_value':(trial['firewood_value'] * trial['multiplier']).sum()
}).round(2)

## Sumproduct of hh size
hh_size_mult = (trial['hh_size']*trial['multiplier']).sum()

# Weighted averages
trial = value_mult/hh_size_mult
display(trial)

lpg_qty             1.598124
lpg_value         106.070636
firewood_qty        9.937351
firewood_value     46.950929
dtype: float64

In [11]:
# Rural vs Urban

# Duplicate df and replace Nan values
trial = df.copy()
trial[['lpg_qty', 'lpg_value', 'firewood_qty', 'firewood_value']] = trial[['lpg_qty', 'lpg_value', 'firewood_qty', 'firewood_value']].fillna(0)


# Sumproduct of consumption
value_mult = trial.groupby('sector').apply(lambda table: pd.Series({
    'wt_lpg_qty': (table['lpg_qty']*table['multiplier']).sum(),
    'wt_lpg_value': (table['lpg_value']*table['multiplier']).sum(),
    'wt_firewod_qty': (table['firewood_qty']*table['multiplier']).sum(),
    'wt_firewood_value': (table['firewood_value']*table['multiplier']).sum()
    }), include_groups = False).round(2)

## Sumproduct of hh size 
hh_size_mult = trial.groupby('sector').apply(lambda table: (table['multiplier']*table['hh_size']).sum(), include_groups = False)

trial = value_mult.div(hh_size_mult, axis = 0)
display(trial)

Unnamed: 0_level_0,wt_lpg_qty,wt_lpg_value,wt_firewod_qty,wt_firewood_value
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rural,1.270472,84.779279,13.300155,62.60823
urban,2.369823,156.216889,2.017142,10.07423


In [52]:
# Rural vs Urban across deciles

# Duplicate df and replace Nan values
trial = df.copy()
trial[['lpg_qty', 'lpg_value', 'firewood_qty', 'firewood_value']] = trial[['lpg_qty', 'lpg_value', 'firewood_qty', 'firewood_value']].fillna(0)


# Sumproduct of consumption
value_mult = trial.groupby(['sector', 'decile'], observed=False).apply(lambda table: pd.Series({
    'wt_lpg_qty': (table['lpg_qty']*table['multiplier']).sum(),
    'wt_lpg_value': (table['lpg_value']*table['multiplier']).sum(),
    'wt_firewod_qty': (table['firewood_qty']*table['multiplier']).sum(),
    'wt_firewood_value': (table['firewood_value']*table['multiplier']).sum()
    }), include_groups = False).round(2)

## Sumproduct of hh size 
hh_size_mult = trial.groupby(['sector', 'decile'], observed=False).apply(lambda table: (table['multiplier']*table['hh_size']).sum(), include_groups = False)

trial = value_mult.div(hh_size_mult, axis = 0).round(2)
display(trial)

Unnamed: 0_level_0,Unnamed: 1_level_0,wt_lpg_qty,wt_lpg_value,wt_firewod_qty,wt_firewood_value
sector,decile,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
rural,1,0.39,26.79,15.03,66.63
rural,2,0.66,44.75,15.22,69.26
rural,3,0.83,55.93,14.96,69.12
rural,4,0.99,66.54,14.43,68.61
rural,5,1.1,73.44,13.9,66.8
rural,6,1.27,84.82,13.48,64.89
rural,7,1.46,97.54,12.59,60.64
rural,8,1.66,110.09,12.01,58.06
rural,9,1.92,127.53,11.07,53.08
rural,10,2.41,160.36,10.31,48.98


In [12]:
# Rural vs Urban across states

# Duplicate df and replace Nan values
trial = df.copy()


# Sumproduct of consumption
value_mult = trial.groupby(['sector', 'state'], observed = False).apply(lambda table: pd.Series({
    'wt_lpg_qty': (table['lpg_qty'] * table['multiplier']).sum(),
    'wt_lpg_value':(table['lpg_value'] * table['multiplier']).sum(),
    'wt_firewood_qty': (table['firewood_qty'] * table['multiplier']).sum(),
    'wt_firewood_value': (table['firewood_value'] * table['multiplier']).sum()
    }), include_groups = False).round(2)

## Sumproduct of hh size 
hh_size_mult = trial.groupby(['sector', 'state'], observed = False).apply(lambda table: (table['multiplier']*table['hh_size']).sum(), include_groups = False)

trial = value_mult.div(hh_size_mult, axis = 0)
display(trial)

Unnamed: 0_level_0,Unnamed: 1_level_0,wt_lpg_qty,wt_lpg_value,wt_firewood_qty,wt_firewood_value
sector,state,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
rural,A and N Islands (U.T.),2.935685,193.343921,1.297966,6.060627
rural,Andhra Pradesh,1.881545,127.873048,6.425040,26.289377
rural,Arunachal Pradesh,1.257256,88.357207,18.783891,121.369622
rural,Assam,0.990500,66.470068,21.029068,98.147684
rural,Bihar,1.095967,77.998667,7.409549,53.221343
...,...,...,...,...,...
urban,Telangana,2.047571,138.812165,0.123514,0.860567
urban,Tripura,2.366207,179.032254,10.530072,55.055257
urban,Uttar Pradesh,2.279728,147.500488,1.162149,7.105410
urban,Uttrakhand,2.800773,178.980782,1.175975,5.868351


In [15]:
trial[trial.index.get_level_values(0) == 'urban']

Unnamed: 0_level_0,Unnamed: 1_level_0,wt_lpg_qty,wt_lpg_value,wt_firewood_qty,wt_firewood_value
sector,state,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
urban,A and N Islands (U.T.),3.446384,235.749064,0.214033,0.853284
urban,Andhra Pradesh,2.180879,147.770615,0.758318,3.808627
urban,Arunachal Pradesh,2.411137,173.94136,10.266982,67.898802
urban,Assam,2.604416,177.177031,2.012344,9.431749
urban,Bihar,2.236689,160.099878,2.053898,14.849673
urban,Chandigarh(U.T.),2.925051,201.796932,0.026843,0.214747
urban,Chattisgarh,2.318764,158.815367,2.579977,14.5027
urban,Dadra & Nagar Haveli,2.523876,163.694446,2.506612,12.889267
urban,Delhi,2.24933,145.565912,0.021515,0.193664
urban,Goa,3.187415,198.297461,0.736371,2.944782


## Average quantity and value of firewood consumed from market

In [110]:
# Across India

## Duplicate df
trial = df.copy()

## Sumproduct of consumption
value_mult = pd.Series({
    'firewood_mkt_qty': (trial['firewood_mkt_qty'] * trial['multiplier']).sum(),
    'firewood_mkt_value':(trial['firewood_mkt_value'] * trial['multiplier']).sum()
}).round(2)

## Sumproduct of hh size
hh_size_mult = (trial['hh_size']*trial['multiplier']).sum()

# Weighted averages
trial = value_mult/hh_size_mult
display(trial)

firewood_mkt_qty       6.965658
firewood_mkt_value    33.798832
dtype: float64

In [125]:
# Rural vs Urban

# Duplicate df and replace Nan values
trial = df.copy()


# Sumproduct of consumption
value_mult = trial.groupby('sector').apply(lambda table: pd.Series({
    'wt_firewood_mkt_qty': (table['firewood_mkt_qty'] * table['multiplier']).sum(),
    'wt_firewood_mkt_value':(table['firewood_mkt_value'] * table['multiplier']).sum()
    }), include_groups = False).round(2)

## Sumproduct of hh size 
hh_size_mult = trial.groupby('sector').apply(lambda table: (table['multiplier']*table['hh_size']).sum(), include_groups = False)

trial = value_mult.div(hh_size_mult, axis = 0)
display(trial)

Unnamed: 0_level_0,wt_firewood_mkt_qty,wt_firewood_mkt_value
sector,Unnamed: 1_level_1,Unnamed: 2_level_1
rural,9.254475,44.566351
urban,1.574948,8.438745


In [127]:
# Rural vs Urban across deciles

# Duplicate df and replace Nan values
trial = df.copy()


# Sumproduct of consumption
value_mult = trial.groupby(['sector', 'decile']).apply(lambda table: pd.Series({
    'wt_firewood_mkt_qty': (table['firewood_mkt_qty'] * table['multiplier']).sum(),
    'wt_firewood_mkt_value':(table['firewood_mkt_value'] * table['multiplier']).sum()
    }), include_groups = False).round(2)

## Sumproduct of hh size 
hh_size_mult = trial.groupby(['sector', 'decile']).apply(lambda table: (table['multiplier']*table['hh_size']).sum(), include_groups = False)

trial = value_mult.div(hh_size_mult, axis = 0)
display(trial)

Unnamed: 0_level_0,Unnamed: 1_level_0,wt_firewood_mkt_qty,wt_firewood_mkt_value
sector,decile,Unnamed: 2_level_1,Unnamed: 3_level_1
rural,1,12.304579,54.413062
rural,2,11.530213,53.543743
rural,3,10.885525,51.120064
rural,4,10.227797,49.68617
rural,5,9.593851,47.209538
rural,6,8.857644,43.94227
rural,7,8.304052,41.296977
rural,8,7.827312,39.188145
rural,9,7.015588,35.049399
rural,10,5.997914,30.213107


In [27]:
# Rural vs Urban across states

# Duplicate df and replace Nan values
trial = df.copy()


# Sumproduct of consumption
value_mult = trial.groupby(['sector', 'state'], observed = False).apply(lambda table: pd.Series({
    'wt_firewood_mkt_qty': (table['firewood_mkt_qty'] * table['multiplier']).sum(),
    'wt_firewood_mkt_value':(table['firewood_mkt_value'] * table['multiplier']).sum()
    }), include_groups = False).round(2)

## Sumproduct of hh size 
hh_size_mult = trial.groupby(['sector', 'state'], observed = False).apply(lambda table: (table['multiplier']*table['hh_size']).sum(), include_groups = False)

trial = value_mult.div(hh_size_mult, axis = 0)
display(trial)

Unnamed: 0_level_0,Unnamed: 1_level_0,wt_firewood_mkt_qty,wt_firewood_mkt_value
sector,state,Unnamed: 2_level_1,Unnamed: 3_level_1
rural,A and N Islands (U.T.),1.297966,6.060627
rural,Andhra Pradesh,6.337395,26.043898
rural,Arunachal Pradesh,16.854789,111.634300
rural,Assam,12.851265,65.990285
rural,Bihar,5.368933,38.197365
...,...,...,...
urban,Telangana,0.123115,0.857575
urban,Tripura,8.572852,46.564274
urban,Uttar Pradesh,1.002565,6.393113
urban,Uttrakhand,1.169996,5.838457


In [28]:
trial[trial.index.get_level_values(0) == 'urban']

Unnamed: 0_level_0,Unnamed: 1_level_0,wt_firewood_mkt_qty,wt_firewood_mkt_value
sector,state,Unnamed: 2_level_1,Unnamed: 3_level_1
urban,A and N Islands (U.T.),0.214033,0.853284
urban,Andhra Pradesh,0.755151,3.791601
urban,Arunachal Pradesh,9.150919,60.658396
urban,Assam,1.506175,7.37831
urban,Bihar,1.645396,11.821243
urban,Chandigarh(U.T.),0.026843,0.214747
urban,Chattisgarh,2.280275,12.776171
urban,Dadra & Nagar Haveli,2.314554,11.806258
urban,Delhi,0.021515,0.193664
urban,Goa,0.6423,2.568494


## Household average annual cylinder consumption

In [19]:
# Across India

## Duplicate df
trial = df.copy()

## Sumproduct of consumption
value_mult = pd.Series({
    'lpg_qty': (trial['lpg_qty'] * trial['multiplier']).sum()/trial['multiplier'].sum(),
})

#Converting to annual cylinder consumption
value_mult = round(value_mult*(12/14.2),2)

display(value_mult)

lpg_qty    5.64
dtype: float64

In [21]:
# Rural vs Urban

# Duplicate df and replace Nan values
trial = df.copy()
trial[['lpg_qty', 'lpg_value', 'firewood_qty', 'firewood_value']] = trial[['lpg_qty', 'lpg_value', 'firewood_qty', 'firewood_value']].fillna(0)


# Sumproduct of consumption
value_mult = trial.groupby('sector').apply(lambda table: pd.Series({
    'wt_lpg_qty': (table['lpg_qty']*table['multiplier']).sum()/table['multiplier'].sum(),
    }), include_groups = False)

#Converting to annual cylinder consumption
value_mult = round(value_mult*(12/14.2),2)

display(value_mult)

Unnamed: 0_level_0,wt_lpg_qty
sector,Unnamed: 1_level_1
rural,4.74
urban,7.42


In [22]:
# Rural vs Urban across deciles

# Duplicate df and replace Nan values
trial = df.copy()
trial[['lpg_qty', 'lpg_value', 'firewood_qty', 'firewood_value']] = trial[['lpg_qty', 'lpg_value', 'firewood_qty', 'firewood_value']].fillna(0)


# Sumproduct of consumption
value_mult = trial.groupby(['sector', 'decile'], observed=False).apply(lambda table: pd.Series({
    'wt_lpg_qty': (table['lpg_qty']*table['multiplier']).sum()/table['multiplier'].sum(),
    }), include_groups = False).round(2)

#Converting to annual cylinder consumption
value_mult = round(value_mult*(12/14.2),2)

display(value_mult)

Unnamed: 0_level_0,Unnamed: 1_level_0,wt_lpg_qty
sector,decile,Unnamed: 2_level_1
rural,1,1.88
rural,2,3.04
rural,3,3.67
rural,4,4.12
rural,5,4.39
rural,6,4.83
rural,7,5.23
rural,8,5.59
rural,9,6.04
rural,10,6.35


## Share of HH expenditure on LPG

In [34]:
# Across India

## Duplicate df
trial = df.copy()

## Filtering for HHs with primary cooking source as firewood or LPG
trial = trial.loc[trial['source_cooking'].isin(['LPG','firewood and chips']),:]

## Replace NA values with 0
trial[['lpg_value', 'firewood_mkt_value']] = trial[['lpg_value', 'firewood_mkt_value']].fillna(0)

## Total expenditure
trial['monthly_totalexp'] = trial['hh_size']*trial['mpce']

## Checking groupsize
group_size = trial.groupby('source_cooking').size()
group_size.name = 'sample size'
display(pd.DataFrame(group_size))


## Sumproduct of consumption (Note: LPG and firewood consumption are recorded for last month. Therefore, no scaling required)
value_mult = trial.groupby('source_cooking').apply(lambda table: pd.Series({
    'wt_lpg_value': (table['lpg_value']*table['multiplier']).sum(),
    'wt_firewood_mkt_value': (table['firewood_mkt_value']*table['multiplier']).sum(),
    'wt_total_exp': (table['monthly_totalexp']*table['multiplier']).sum()
    }), include_groups = False).round(2)

## Sumproduct of hh size 
hh_size_mult = trial.groupby('source_cooking').apply(lambda table: (table['multiplier']*table['hh_size']).sum(), include_groups = False)

# Average monthly per capita expenditure 
trial = value_mult.div(hh_size_mult, axis = 0)
trial = trial.rename(columns = {'wt_lpg_value': 'LPG MPCE','wt_firewood_mkt_value': 'Firewood mkt MPCE','wt_total_exp': 'Overall MPCE' })

display(trial)

Unnamed: 0_level_0,sample size
source_cooking,Unnamed: 1_level_1
LPG,168581
firewood and chips,77802


Unnamed: 0_level_0,LPG MPCE,Firewood mkt MPCE,Overall MPCE
source_cooking,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LPG,157.022996,10.625108,5718.144013
firewood and chips,34.421541,76.116185,3557.139368


In [35]:
# Across Rural vs Urban

## Duplicate df
trial = df.copy()

## Filtering for HHs with primary cooking source as firewood or LPG
trial = trial.loc[trial['source_cooking'].isin(['LPG','firewood and chips']),:]

## Replace NA values with 0
trial[['lpg_value', 'firewood_mkt_value']] = trial[['lpg_value', 'firewood_mkt_value']].fillna(0)

## Total expenditure
trial['monthly_totalexp'] = trial['hh_size']*trial['mpce']

## Checking groupsize
group_size = trial.groupby(['sector','source_cooking']).size()
group_size.name = 'sample size'
display(pd.DataFrame(group_size))


## Sumproduct of consumption (Note: LPG and firewood consumption are recorded for last month. Therefore, no scaling required)
value_mult = trial.groupby(['sector','source_cooking']).apply(lambda table: pd.Series({
    'wt_lpg_value': (table['lpg_value']*table['multiplier']).sum(),
    'wt_firewood_mkt_value': (table['firewood_mkt_value']*table['multiplier']).sum(),
    'wt_total_exp': (table['monthly_totalexp']*table['multiplier']).sum()
    }), include_groups = False).round(2)

## Sumproduct of hh size 
hh_size_mult = trial.groupby(['sector','source_cooking']).apply(lambda table: (table['multiplier']*table['hh_size']).sum(), include_groups = False)

# Average monthly per capita expenditure 
trial = value_mult.div(hh_size_mult, axis = 0)
trial = trial.rename(columns = {'wt_lpg_value': 'LPG MPCE','wt_firewood_mkt_value': 'Firewood mkt MPCE','wt_total_exp': 'Overall MPCE' })

display(trial)

Unnamed: 0_level_0,Unnamed: 1_level_0,sample size
sector,source_cooking,Unnamed: 2_level_1
rural,LPG,75775
rural,firewood and chips,72246
urban,LPG,92806
urban,firewood and chips,5556


Unnamed: 0_level_0,Unnamed: 1_level_0,LPG MPCE,Firewood mkt MPCE,Overall MPCE
sector,source_cooking,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rural,LPG,143.632662,16.071894,4794.049507
rural,firewood and chips,33.556451,75.242904,3525.958495
urban,LPG,173.891778,3.763395,6882.293466
urban,firewood and chips,53.088018,94.95938,4229.944193


In [36]:
# Rural across deciles

## Duplicate df
trial = df.copy()

## Filtering for HHs with primary cooking source as firewood or LPG
trial = trial.loc[trial['source_cooking'].isin(['LPG','firewood and chips']),:]

## Replace NA values with 0
trial[['lpg_value', 'firewood_mkt_value']] = trial[['lpg_value', 'firewood_mkt_value']].fillna(0)

## Total expenditure
trial['monthly_totalexp'] = trial['hh_size']*trial['mpce']

## Checking groupsize
group_size = trial.groupby(['sector','source_cooking', 'decile']).size()
group_size.name = 'sample size'
display(pd.DataFrame(group_size))


## Sumproduct of consumption (Note: LPG and firewood consumption are recorded for last month. Therefore, no scaling required)
value_mult = trial.groupby(['sector','source_cooking', 'decile']).apply(lambda table: pd.Series({
    'wt_lpg_value': (table['lpg_value']*table['multiplier']).sum(),
    'wt_firewood_mkt_value': (table['firewood_mkt_value']*table['multiplier']).sum(),
    'wt_total_exp': (table['monthly_totalexp']*table['multiplier']).sum()
    }), include_groups = False).round(2)

## Sumproduct of hh size 
hh_size_mult = trial.groupby(['sector','source_cooking', 'decile']).apply(lambda table: (table['multiplier']*table['hh_size']).sum(), include_groups = False)

# Average monthly per capita expenditure 
trial = value_mult.div(hh_size_mult, axis = 0)
trial = trial.rename(columns = {'wt_lpg_value': 'LPG MPCE','wt_firewood_mkt_value': 'Firewood MPCE','wt_total_exp': 'Overall MPCE' })

display(trial)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,sample size
sector,source_cooking,decile,Unnamed: 3_level_1
rural,LPG,1,1660
rural,LPG,2,2951
rural,LPG,3,3844
rural,LPG,4,4742
rural,LPG,5,5547
rural,LPG,6,6814
rural,LPG,7,8357
rural,LPG,8,10142
rural,LPG,9,12651
rural,LPG,10,19067


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,LPG MPCE,Firewood MPCE,Overall MPCE
sector,source_cooking,decile,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
rural,LPG,1,100.771414,12.696155,1982.23102
rural,LPG,2,110.760828,13.328361,2482.350295
rural,LPG,3,117.983548,13.91535,2837.327068
rural,LPG,4,124.255527,15.164836,3164.625104
rural,LPG,5,128.809155,15.217227,3501.755586
rural,LPG,6,133.967553,15.710966,3870.788723
rural,LPG,7,140.67365,15.835159,4307.45274
rural,LPG,8,147.177307,17.25572,4892.005177
rural,LPG,9,157.369668,17.901901,5773.594502
rural,LPG,10,186.099566,17.392726,8533.131812
