Note: Multiplier has been taken as it is. For averages and ratios it doesn't affect athe analysis. To use absolute population values, use multiplier/100 instead

# Importing libraries

In [1]:
import pandas as pd
import numpy as np
import pathlib
import pickle
from functools import reduce

In [2]:
pd.set_option('display.max_columns', None)

# Importing datasets

In [3]:
# Checking paths
pathlib.Path("../../HCES 2022-23/Python implementation/Codes/Population based MPCE/MPCE_1Jan24.pkl").resolve().exists()

True

In [4]:
# Reading HH-wise MPCE
path_mpce = "../../HCES 2022-23/Python implementation/Codes/Population based MPCE/MPCE_1Jan24.pkl"
df_mpce = pd.read_pickle(path_mpce)
df_mpce = df_mpce[['hh_size', 'sector', 'mpce', 'decile']]
display(df_mpce.head())

## Level 01 file
path_lvl01 = "../../HCES 2022-23/Dta raw files/level_01.dta"
df_lvl01 = pd.read_stata(path_lvl01)
df_lvl01 = df_lvl01.set_index('hhid')
df_lvl01 = df_lvl01[['state']]
display(df_lvl01.head())

## Level 03 file
path_lvl03 = "../../HCES 2022-23/Dta raw files/level_03.dta"
df_lvl03 = pd.read_stata(path_lvl03)
df_lvl03 = df_lvl03.set_index('hhid')
df_lvl03 = df_lvl03[['source_cooking', 'multiplier']]
display(df_lvl03.head())

## Level 07 file
path_lvl07 = "../../HCES 2022-23/Dta raw files/level_07.dta"
df_lvl07 = pd.read_stata(path_lvl07)
df_lvl07 = df_lvl07.set_index('hhid')
df_lvl07 = df_lvl07[['receieved_subsidy_lpg', 'num_subsidized_lpg']]
display(df_lvl07.head())

## Level 08 file
path_lvl08 = "../../HCES 2022-23/Dta raw files/level_08.dta"
df_lvl08 = pd.read_stata(path_lvl08)
df_lvl08 = df_lvl08.loc[df_lvl08['item_code'].isin([331,338])]

Unnamed: 0_level_0,hh_size,sector,mpce,decile
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
31000301,1.0,2,19675.64,10
31000302,5.0,2,8059.52,8
31000303,5.0,2,7628.02,8
31000304,2.0,2,6522.62,7
31000305,1.0,2,6765.77,7


Unnamed: 0_level_0,state
hhid,Unnamed: 1_level_1
65556201,1
65556301,1
65556302,1
65556303,1
65556304,1


Unnamed: 0_level_0,source_cooking,multiplier
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1
31000301,2,96498
31000302,2,96498
31000303,2,96498
31000304,2,96498
31000305,2,96498


Unnamed: 0_level_0,receieved_subsidy_lpg,num_subsidized_lpg
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1
31000301,2,
31000302,1,1.0
31000303,1,1.0
31000304,1,1.0
31000305,2,


# Data wrangling

In [5]:
# Reshaping level 08 file for firewood and LPG

## Pivoting
df_lvl08 = df_lvl08.pivot_table(index = 'hhid', columns = 'item_code', values = ['cons_total_qty','cons_total_value', 'cons_home_qty', 'cons_home_value' ])

## Converting columns to single level index
df_lvl08.columns = ["_".join(map(str, x)) for x in df_lvl08.columns]

## Changing column names
df_lvl08 = df_lvl08.rename(columns = {'cons_total_qty_331': 'firewood_qty', 'cons_total_qty_338': 'lpg_qty',
                               'cons_total_value_331': 'firewood_value', 'cons_total_value_338': 'lpg_value',
                               'cons_home_qty_331': 'firewood_home_qty', 'cons_home_value_331': 'firewood_home_value',
                               'cons_home_qty_338': 'lpg_home_qty', 'cons_home_value_338': 'lpg_home_value'})
df_lvl08.head()

# Adding market quantity and value
df_lvl08 = df_lvl08.fillna(0)
df_lvl08[['firewood_mkt_qty', 'firewood_mkt_value']] = df_lvl08[['firewood_qty', 'firewood_value' ]].values - df_lvl08[['firewood_home_qty', 'firewood_home_value' ]].values
df_lvl08[['lpg_mkt_qty', 'lpg_mkt_value']] = df_lvl08[['lpg_qty', 'lpg_value' ]].values - df_lvl08[['lpg_home_qty', 'lpg_home_value' ]].values

df_lvl08.head()

Unnamed: 0_level_0,firewood_home_qty,lpg_home_qty,firewood_home_value,lpg_home_value,firewood_qty,lpg_qty,firewood_value,lpg_value,firewood_mkt_qty,firewood_mkt_value,lpg_mkt_qty,lpg_mkt_value
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
31000301,0.0,0.0,0.0,0.0,0.0,2.0,0.0,240.0,0.0,0.0,2.0,240.0
31000302,0.0,0.0,0.0,0.0,0.0,8.0,0.0,640.0,0.0,0.0,8.0,640.0
31000303,0.0,0.0,0.0,0.0,0.0,9.0,0.0,720.0,0.0,0.0,9.0,720.0
31000304,0.0,0.0,0.0,0.0,0.0,5.0,0.0,400.0,0.0,0.0,5.0,400.0
31000305,0.0,0.0,0.0,0.0,0.0,4.0,0.0,320.0,0.0,0.0,4.0,320.0


In [6]:
# Merging all files

## Checking shapes
print(f"Shape of MPCE file: {df_mpce.shape}")
print(f"Shape of level 01 file: {df_lvl01.shape}")
print(f"Shape of level 03 file: {df_lvl03.shape}")
print(f"Shape of level 07 file: {df_lvl07.shape}")
print(f"Shape of level 08 file: {df_lvl08.shape}")

## Defining merge function
def merge_df (left_df, right_df):
    df = pd.merge(left_df, right_df, how = 'outer', left_index=True, right_index=True, indicator=True)
    print(f"\nSummary from merging dataframes")
    display(df['_merge'].value_counts())
    df = df.drop(labels = '_merge', axis = 1)
    return df.copy()

## Merging
df = reduce(merge_df, [df_mpce, df_lvl01, df_lvl03, df_lvl07, df_lvl08])

Shape of MPCE file: (261746, 4)
Shape of level 01 file: (261746, 1)
Shape of level 03 file: (261746, 2)
Shape of level 07 file: (261746, 2)
Shape of level 08 file: (252955, 12)

Summary from merging dataframes


_merge
both          261746
left_only          0
right_only         0
Name: count, dtype: int64


Summary from merging dataframes


_merge
both          261746
left_only          0
right_only         0
Name: count, dtype: int64


Summary from merging dataframes


_merge
both          261746
left_only          0
right_only         0
Name: count, dtype: int64


Summary from merging dataframes


_merge
both          252955
left_only       8791
right_only         0
Name: count, dtype: int64

In [7]:
# Renaming column values
df['sector'] = df['sector'].replace({1: 'rural', 2: 'urban'})
df['state'] = df['state'].replace({1: 'Jammu & Kashmir', 2: 'Himachal Pradesh', 3: 'Punjab', 4: 'Chandigarh(U.T.)', 5: 'Uttrakhand', 6: 'Haryana', 7: 'Delhi', 8: 'Rajasthan', 9: 'Uttar Pradesh', 10: 'Bihar', 11: 'Sikkim', 12: 'Arunachal Pradesh', 13: 'Nagaland', 14: 'Manipur', 15: 'Mizoram', 16: 'Tripura', 17: 'Meghalaya', 18: 'Assam', 19: 'West Bengal', 20: 'Jharkhand', 21: 'Odisha', 22: 'Chattisgarh', 23: 'Madhya Pradesh', 24: 'Gujarat', 25: 'Dadra & Nagar Haveli', 27: 'Maharashtra', 28: 'Andhra Pradesh', 29: 'Karnataka', 30: 'Goa', 31: 'Lakshadweep (U.T.)', 32: 'Kerala', 33: 'Tamilnadu', 34: 'Puducherry (U.T.)', 35: 'A and N Islands (U.T.)', 36: 'Telangana', 37: 'Ladakh (U.T.)'})
df['source_cooking'] = df['source_cooking'].replace({1: "firewood and chips", 2: "LPG", 3: "other natural gas", 4: "dung cake", 5: "kerosene", 6: "coke, coal", 7: "gobar gas", 8: "other biogas", 9: "others", 10: "charcoal", 11: "electricity", 12: "no cooking arrangement"})
df['receieved_subsidy_lpg'] = df['receieved_subsidy_lpg'].replace({1: 'yes', 2: 'no'})

# Analysis

## Primary cooking source

In [8]:
# Across India

## Calculating
trial = df.groupby(['source_cooking'], as_index=False).apply(lambda table: pd.Series(
    {'popn_size': table['multiplier'].sum(),
     'sample_size': table.size}
), include_groups = False)
display(trial)

Unnamed: 0,source_cooking,popn_size,sample_size
0,LPG,15927952229,3206580
1,charcoal,2740225,1700
2,"coke, coal",101807090,23120
3,dung cake,695334826,101520
4,electricity,35922157,8620
5,firewood and chips,10649712107,1738940
6,gobar gas,13655523,2100
7,kerosene,19559832,4120
8,no cooking arrangement,628452832,76020
9,other biogas,2135539,300


In [9]:
# Primary cooking source (Rural vs Urban)

## Calculating
trial = df.groupby(['sector','source_cooking']).apply(lambda table: pd.Series(
    {'popn_size': table['multiplier'].sum(),
     'sample_size': table.size}
), include_groups = False)
trial = trial.reset_index()

## Pivoting
trial = trial.pivot_table(values = 'popn_size', index = ['sector'], columns = 'source_cooking')
display(trial)

source_cooking,LPG,charcoal,"coke, coal",dung cake,electricity,firewood and chips,gobar gas,kerosene,no cooking arrangement,other biogas,other natural gas,others
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
rural,8366512000.0,1060130.0,55011889.0,673427775.0,9290719.0,10069220000.0,13407987.0,3408425.0,211530273.0,1654981.0,24215703.0,60796558.0
urban,7561440000.0,1680095.0,46795201.0,21907051.0,26631438.0,580488000.0,247536.0,16151407.0,416922559.0,480558.0,243365178.0,34192124.0


In [10]:
# Primary cooking source (Rural vs Urban by decile)

## Calculating
trial = df.groupby(['sector','decile','source_cooking'], as_index=False, observed=False).apply(lambda table: pd.Series(
    {'popn_size': table['multiplier'].sum(),
     'sample_size': table.size}
), include_groups = False)


## Pivoting
trial = trial.pivot_table (values = 'popn_size', index = ['sector', 'source_cooking'], columns = 'decile', observed = False)
display(trial)


Unnamed: 0_level_0,decile,1,2,3,4,5,6,7,8,9,10
sector,source_cooking,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
rural,LPG,173990200.0,307424500.0,423708900.0,516778600.0,658287100.0,792786300.0,954567410.0,1145934000.0,1439283000.0,1953751000.0
rural,charcoal,,26717.0,116018.0,,40200.0,57609.0,242377.0,178341.0,357648.0,41220.0
rural,"coke, coal",4953282.0,6149718.0,4852760.0,5822642.0,6715582.0,7928990.0,5697399.0,4779956.0,6006616.0,2104944.0
rural,dung cake,111177600.0,86531050.0,85594750.0,65959250.0,65741650.0,58703630.0,58844928.0,57837430.0,48123710.0,34913820.0
rural,electricity,123845.0,439121.0,555382.0,549057.0,842665.0,663499.0,1367850.0,491821.0,1295385.0,2962094.0
rural,firewood and chips,1154369000.0,1174058000.0,1145904000.0,1146830000.0,1067373000.0,1048291000.0,972475364.0,919415300.0,798043700.0,642464200.0
rural,gobar gas,122912.0,565061.0,396968.0,641633.0,636406.0,1269761.0,1645060.0,2118865.0,2806974.0,3204347.0
rural,kerosene,102376.0,225316.0,585403.0,263224.0,204752.0,280635.0,470606.0,366953.0,122029.0,787131.0
rural,no cooking arrangement,22029480.0,6703582.0,5854034.0,7974438.0,11657040.0,9612868.0,10083775.0,15457580.0,7073938.0,115083500.0
rural,other biogas,,,,146183.0,,,,106267.0,309652.0,1092879.0


## LPG subsidy - Last three months

In [11]:
# Across India
trial = df.groupby(['receieved_subsidy_lpg'], as_index=False).apply(lambda table: pd.Series(
    {'popn_size': table['multiplier'].sum(),
     'sample_size': table.size}
), include_groups = False)
display(trial)

Unnamed: 0,receieved_subsidy_lpg,popn_size,sample_size
0,no,22129917998,4065540
1,yes,6309923925,1169380


In [12]:
# LPG subsidy (Rural vs Urban)
trial = df.groupby(['sector','receieved_subsidy_lpg'], as_index=False).apply(lambda table: pd.Series(
    {'popn_size': table['multiplier'].sum(),
     'sample_size': table.size}
), include_groups = False)

# Pivoting 
trial = trial.pivot_table(values = 'popn_size', index = 'receieved_subsidy_lpg', columns = 'sector')
display(trial)

sector,rural,urban
receieved_subsidy_lpg,Unnamed: 1_level_1,Unnamed: 2_level_1
no,15330330000.0,6799583000.0
yes,4159206000.0,2150718000.0


In [13]:
# LPG subsidy (Rural vs Urban by decile)
trial = df.groupby(['sector','decile','receieved_subsidy_lpg'], as_index=False, observed=False).apply(lambda table: pd.Series(
    {'wt_sum': table['multiplier'].sum(),
     'sample_size': table.size}
), include_groups = False)

# Pivoting
trial = trial.pivot_table(values = 'wt_sum', index = ['sector','receieved_subsidy_lpg'], columns = 'decile', observed = False)
display(trial)

Unnamed: 0_level_0,decile,1,2,3,4,5,6,7,8,9,10
sector,receieved_subsidy_lpg,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
rural,no,1345338000.0,1351555000.0,1375220000.0,1429463000.0,1447575000.0,1498177000.0,1533765000.0,1623491000.0,1679853000.0,2045896000.0
rural,yes,124928400.0,237842400.0,298340700.0,321345200.0,372568100.0,427569300.0,480403500.0,531148000.0,632624200.0,732436600.0
urban,no,442618900.0,489858200.0,528376000.0,557899900.0,597551500.0,643120100.0,694247700.0,755349900.0,845814100.0,1244747000.0
urban,yes,157828000.0,192108200.0,205929000.0,206661200.0,212076700.0,225097700.0,225161400.0,239977400.0,261306700.0,224571400.0


# LPG susbsidised cylinder - Last 3 months

In [14]:
# Across India

## Filtering for people who recieved subsidy
trial = df.copy()
trial = trial.loc[trial['receieved_subsidy_lpg'] == 'yes']

# Calculation
trial = trial.groupby('num_subsidized_lpg', as_index = False).apply(lambda table: pd.Series(
    {'popn_size': table['multiplier'].sum(),
     'sample_size': table.size}
), include_groups = False)
display(trial)

Unnamed: 0,num_subsidized_lpg,popn_size,sample_size
0,1.0,3900515861,688320
1,2.0,1750466336,337620
2,3.0,658941728,143440


In [15]:
# Rural vs Urban

## Filtering for people who recieved subsidy
trial = df.copy()
trial = trial.loc[trial['receieved_subsidy_lpg'] == 'yes']

# Calculation
trial = trial.groupby(['sector','num_subsidized_lpg']).apply(lambda table: pd.Series(
    {'popn_size': table['multiplier'].sum(),
     'sample_size': table.size}
), include_groups = False)
display(trial)

Unnamed: 0_level_0,Unnamed: 1_level_0,popn_size,sample_size
sector,num_subsidized_lpg,Unnamed: 2_level_1,Unnamed: 3_level_1
rural,1.0,2831518859,410514
rural,2.0,1003612412,148789
rural,3.0,324075030,50312
urban,1.0,1068997002,243390
urban,2.0,746853924,171950
urban,3.0,334866698,85956


In [16]:
# Rural vs Urban across deciles

## Filtering for people who recieved subsidy
trial = df.copy()
trial = trial.loc[trial['receieved_subsidy_lpg'] == 'yes']

## Calculation
trial = trial.groupby(['sector', 'decile', 'num_subsidized_lpg'], observed = False).apply(lambda table: pd.Series(
    {'popn_size': table['multiplier'].sum(),
     'sample_size': table.size}
), include_groups = False)
trial = trial.reset_index()

# Pivotting
trial = trial.pivot_table(values = 'popn_size', index = ['sector', 'num_subsidized_lpg'], columns = 'decile',observed = False)
display(trial)

Unnamed: 0_level_0,decile,1,2,3,4,5,6,7,8,9,10
sector,num_subsidized_lpg,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
rural,1.0,97305500.0,178581965.0,219297280.0,232676510.0,261046305.0,299532810.0,315431768.0,354278978.0,409245667.0,464122076.0
rural,2.0,21215479.0,45025801.0,59989360.0,64605908.0,84526979.0,95695982.0,120638223.0,138301253.0,170929653.0,202683774.0
rural,3.0,6407458.0,14234600.0,19054099.0,24062738.0,26994792.0,32340486.0,44333482.0,38567749.0,52448904.0,65630722.0
urban,1.0,77707959.0,91110693.0,104067025.0,103230337.0,104273686.0,109300637.0,109154327.0,119131765.0,135015828.0,116004745.0
urban,2.0,52677297.0,66364220.0,67085649.0,70094292.0,75858431.0,80691239.0,80106084.0,83525653.0,91618712.0,78832347.0
urban,3.0,27442744.0,34633269.0,34776332.0,33336564.0,31944616.0,35105785.0,35900946.0,37319979.0,34672132.0,29734331.0


# Average quantity and value of TOTAL LPG and firewood consumed per capita

* Values are reported for past 30 days

In [8]:
# Across India

## Duplicate df
trial = df.copy()

## Sumproduct of consumption
value_mult = pd.Series({
    'lpg_qty': (trial['lpg_qty'] * trial['multiplier']).sum(),
    'lpg_value':(trial['lpg_value'] * trial['multiplier']).sum(),
    'firewood_qty': (trial['firewood_qty'] * trial['multiplier']).sum(),
    'firewood_value':(trial['firewood_value'] * trial['multiplier']).sum()
}).round(2)

## Sumproduct of hh size
hh_size_mult = (trial['hh_size']*trial['multiplier']).sum()

# Weighted averages
trial = value_mult/hh_size_mult
display(trial)

lpg_qty             1.473315
lpg_value         117.535183
firewood_qty       11.037265
firewood_value     51.857151
dtype: float64

In [243]:
# Rural vs Urban

# Duplicate df and replace Nan values
trial = df.copy()
trial[['lpg_qty', 'lpg_value', 'firewood_qty', 'firewood_value']] = trial[['lpg_qty', 'lpg_value', 'firewood_qty', 'firewood_value']].fillna(0)


# Sumproduct of consumption
value_mult = trial.groupby('sector').apply(lambda table: pd.Series({
    'wt_lpg_qty': (table['lpg_qty']*table['multiplier']).sum(),
    'wt_lpg_value': (table['lpg_value']*table['multiplier']).sum(),
    'wt_firewod_qty': (table['firewood_qty']*table['multiplier']).sum(),
    'wt_firewood_value': (table['firewood_value']*table['multiplier']).sum()
    }), include_groups = False).round(2)

## Sumproduct of hh size 
hh_size_mult = trial.groupby('sector').apply(lambda table: (table['multiplier']*table['hh_size']).sum(), include_groups = False)

trial = value_mult.div(hh_size_mult, axis = 0)
display(trial)

Unnamed: 0_level_0,wt_lpg_qty,wt_lpg_value,wt_firewod_qty,wt_firewood_value
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rural,1.127155,90.514766,14.474689,67.810782
urban,2.348439,185.84521,2.347152,11.524946


In [247]:
# Rural vs Urban across deciles

# Duplicate df and replace Nan values
trial = df.copy()
trial[['lpg_qty', 'lpg_value', 'firewood_qty', 'firewood_value']] = trial[['lpg_qty', 'lpg_value', 'firewood_qty', 'firewood_value']].fillna(0)


# Sumproduct of consumption
value_mult = trial.groupby(['sector', 'decile'], observed=False).apply(lambda table: pd.Series({
    'wt_lpg_qty': (table['lpg_qty']*table['multiplier']).sum(),
    'wt_lpg_value': (table['lpg_value']*table['multiplier']).sum(),
    'wt_firewod_qty': (table['firewood_qty']*table['multiplier']).sum(),
    'wt_firewood_value': (table['firewood_value']*table['multiplier']).sum()
    }), include_groups = False).round(2)

## Sumproduct of hh size 
hh_size_mult = trial.groupby(['sector', 'decile'], observed=False).apply(lambda table: (table['multiplier']*table['hh_size']).sum(), include_groups = False)

trial = value_mult.div(hh_size_mult, axis = 0).round(2)
display(trial)

Unnamed: 0_level_0,Unnamed: 1_level_0,wt_lpg_qty,wt_lpg_value,wt_firewod_qty,wt_firewood_value
sector,decile,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
rural,1,0.29,23.72,15.31,67.78
rural,2,0.52,41.9,16.06,72.38
rural,3,0.68,54.98,15.78,72.8
rural,4,0.8,64.15,15.65,73.38
rural,5,0.97,77.81,15.17,71.79
rural,6,1.13,90.41,14.99,71.43
rural,7,1.3,104.57,14.19,67.55
rural,8,1.51,120.98,13.87,67.05
rural,9,1.78,142.92,12.6,60.58
rural,10,2.29,183.72,11.13,53.38


In [11]:
df.head()

Unnamed: 0_level_0,hh_size,sector,mpce,decile,state,source_cooking,multiplier,receieved_subsidy_lpg,num_subsidized_lpg,firewood_home_qty,lpg_home_qty,firewood_home_value,lpg_home_value,firewood_qty,lpg_qty,firewood_value,lpg_value,firewood_mkt_qty,firewood_mkt_value,lpg_mkt_qty,lpg_mkt_value
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
31000301,1.0,urban,19675.64,10,Andhra Pradesh,LPG,96498,no,,0.0,0.0,0.0,0.0,0.0,2.0,0.0,240.0,0.0,0.0,2.0,240.0
31000302,5.0,urban,8059.52,8,Andhra Pradesh,LPG,96498,yes,1.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,640.0,0.0,0.0,8.0,640.0
31000303,5.0,urban,7628.02,8,Andhra Pradesh,LPG,96498,yes,1.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,720.0,0.0,0.0,9.0,720.0
31000304,2.0,urban,6522.62,7,Andhra Pradesh,LPG,96498,yes,1.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,400.0,0.0,0.0,5.0,400.0
31000305,1.0,urban,6765.77,7,Andhra Pradesh,LPG,96498,no,,0.0,0.0,0.0,0.0,0.0,4.0,0.0,320.0,0.0,0.0,4.0,320.0


In [12]:
# Rural vs Urban across states

# Duplicate df and replace Nan values
trial = df.copy()


# Sumproduct of consumption
value_mult = trial.groupby(['sector', 'state'], observed = False).apply(lambda table: pd.Series({
    'wt_lpg_qty': (table['lpg_qty'] * table['multiplier']).sum(),
    'wt_lpg_value':(table['lpg_value'] * table['multiplier']).sum(),
    'wt_firewood_qty': (table['firewood_qty'] * table['multiplier']).sum(),
    'wt_firewood_value': (table['firewood_value'] * table['multiplier']).sum()
    }), include_groups = False).round(2)

## Sumproduct of hh size 
hh_size_mult = trial.groupby(['sector', 'state'], observed = False).apply(lambda table: (table['multiplier']*table['hh_size']).sum(), include_groups = False)

trial = value_mult.div(hh_size_mult, axis = 0)
display(trial)

Unnamed: 0_level_0,Unnamed: 1_level_0,wt_lpg_qty,wt_lpg_value,wt_firewood_qty,wt_firewood_value
sector,state,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
rural,A and N Islands (U.T.),2.655244,217.624077,2.361476,14.484645
rural,Andhra Pradesh,1.895074,152.515515,6.706925,27.148969
rural,Arunachal Pradesh,0.968536,79.500800,25.564439,140.625085
rural,Assam,0.967616,78.142976,16.955429,84.168833
rural,Bihar,0.854335,72.119167,10.485264,69.381512
...,...,...,...,...,...
urban,Telangana,2.014468,168.378900,0.260950,1.722246
urban,Tripura,2.711926,239.896157,7.097235,45.793740
urban,Uttar Pradesh,2.300679,181.358964,1.678701,8.900367
urban,Uttrakhand,2.824488,221.467120,1.627196,6.784211


In [14]:
trial[trial.index.get_level_values(0) == 'rural']

Unnamed: 0_level_0,Unnamed: 1_level_0,wt_lpg_qty,wt_lpg_value,wt_firewood_qty,wt_firewood_value
sector,state,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
rural,A and N Islands (U.T.),2.655244,217.624077,2.361476,14.484645
rural,Andhra Pradesh,1.895074,152.515515,6.706925,27.148969
rural,Arunachal Pradesh,0.968536,79.5008,25.564439,140.625085
rural,Assam,0.967616,78.142976,16.955429,84.168833
rural,Bihar,0.854335,72.119167,10.485264,69.381512
rural,Chandigarh(U.T.),2.308463,179.290253,0.02206,0.671203
rural,Chattisgarh,0.506296,42.4044,21.453751,110.025868
rural,Dadra & Nagar Haveli,1.74473,145.776756,12.013179,64.89773
rural,Delhi,2.660327,203.502311,0.121473,0.999543
rural,Goa,2.734743,210.097285,2.931283,13.099684


# Average quantity and value of MARKET firewood consumed per capita

In [255]:
# Across India

## Duplicate df
trial = df.copy()

## Sumproduct of consumption
value_mult = pd.Series({
    'firewood_mkt_qty': (trial['firewood_mkt_qty'] * trial['multiplier']).sum(),
    'firewood_mkt_value':(trial['firewood_mkt_value'] * trial['multiplier']).sum()
}).round(2)

## Sumproduct of hh size
hh_size_mult = (trial['hh_size']*trial['multiplier']).sum()

# Weighted averages
trial = value_mult/hh_size_mult
display(trial)

firewood_mkt_qty       7.677946
firewood_mkt_value    36.449999
dtype: float64

In [256]:
# Rural vs Urban

# Duplicate df and replace Nan values
trial = df.copy()


# Sumproduct of consumption
value_mult = trial.groupby('sector').apply(lambda table: pd.Series({
    'wt_firewood_mkt_qty': (table['firewood_mkt_qty'] * table['multiplier']).sum(),
    'wt_firewood_mkt_value':(table['firewood_mkt_value'] * table['multiplier']).sum()
    }), include_groups = False).round(2)

## Sumproduct of hh size 
hh_size_mult = trial.groupby('sector').apply(lambda table: (table['multiplier']*table['hh_size']).sum(), include_groups = False)

trial = value_mult.div(hh_size_mult, axis = 0)
display(trial)

Unnamed: 0_level_0,wt_firewood_mkt_qty,wt_firewood_mkt_value
sector,Unnamed: 1_level_1,Unnamed: 2_level_1
rural,9.986364,47.110268
urban,1.84206,9.499887


In [259]:
# Rural vs Urban across deciles

# Duplicate df and replace Nan values
trial = df.copy()


# Sumproduct of consumption
value_mult = trial.groupby(['sector', 'decile'], observed = False).apply(lambda table: pd.Series({
    'wt_firewood_mkt_qty': (table['firewood_mkt_qty'] * table['multiplier']).sum(),
    'wt_firewood_mkt_value':(table['firewood_mkt_value'] * table['multiplier']).sum()
    }), include_groups = False).round(2)

## Sumproduct of hh size 
hh_size_mult = trial.groupby(['sector', 'decile'], observed = False).apply(lambda table: (table['multiplier']*table['hh_size']).sum(), include_groups = False)

trial = value_mult.div(hh_size_mult, axis = 0)
display(trial)

Unnamed: 0_level_0,Unnamed: 1_level_0,wt_firewood_mkt_qty,wt_firewood_mkt_value
sector,decile,Unnamed: 2_level_1,Unnamed: 3_level_1
rural,1,12.019989,53.07622
rural,2,11.860949,53.813415
rural,3,11.32003,52.788773
rural,4,11.180783,52.685019
rural,5,10.659816,50.925218
rural,6,10.225955,48.933431
rural,7,9.329608,44.787533
rural,8,9.058715,44.088838
rural,9,7.701046,37.614
rural,10,6.506182,32.387946


In [40]:
# Rural vs Urban across states

# Duplicate df and replace Nan values
trial = df.copy()


# Sumproduct of consumption
value_mult = trial.groupby(['sector', 'state'], observed = False).apply(lambda table: pd.Series({
    'wt_firewood_mkt_qty': (table['firewood_mkt_qty'] * table['multiplier']).sum(),
    'wt_firewood_mkt_value':(table['firewood_mkt_value'] * table['multiplier']).sum()
    }), include_groups = False).round(2)

## Sumproduct of hh size 
hh_size_mult = trial.groupby(['sector', 'state'], observed = False).apply(lambda table: (table['multiplier']*table['hh_size']).sum(), include_groups = False)

trial = value_mult.div(hh_size_mult, axis = 0)
display(trial)

Unnamed: 0_level_0,Unnamed: 1_level_0,wt_firewood_mkt_qty,wt_firewood_mkt_value
sector,state,Unnamed: 2_level_1,Unnamed: 3_level_1
rural,A and N Islands (U.T.),2.286046,13.300046
rural,Andhra Pradesh,6.404175,25.909515
rural,Arunachal Pradesh,19.685340,112.403346
rural,Assam,8.646849,46.328774
rural,Bihar,7.647358,49.365340
...,...,...,...
urban,Telangana,0.247391,1.633835
urban,Tripura,5.669061,37.511397
urban,Uttar Prdesh,1.403836,7.702412
urban,Uttrakhand,1.446670,6.368216


In [41]:
trial[trial.index.get_level_values(0) == 'urban']

Unnamed: 0_level_0,Unnamed: 1_level_0,wt_firewood_mkt_qty,wt_firewood_mkt_value
sector,state,Unnamed: 2_level_1,Unnamed: 3_level_1
urban,A and N Islands (U.T.),0.017689,0.106132
urban,Andhra Pradesh,0.691379,3.956743
urban,Arunachal Pradesh,5.820248,36.3579
urban,Assam,1.559906,9.377244
urban,Bihar,2.334052,15.699532
urban,Chandigarh(U.T.),0.0,0.0
urban,Chattisgarh,3.83599,19.932021
urban,Dadra & Nagar Haveli,1.633755,8.175905
urban,Delhi,0.009241,0.146277
urban,Goa,2.292424,10.585234


# Household average annual cylinder consumption

In [None]:
# Across India

## Duplicate df
trial = df.copy()

## Sumproduct of consumption
value_mult = pd.Series({
    'lpg_qty': (trial['lpg_qty'] * trial['multiplier']).sum()/trial['multiplier'].sum(),
})

#Converting to annual cylinder consumption
value_mult = round(value_mult*12/14.2, 2)

display(value_mult)

lpg_qty    5.31
dtype: float64

In [None]:
# Rural vs Urban

# Duplicate df and replace Nan values
trial = df.copy()
trial[['lpg_qty', 'lpg_value', 'firewood_qty', 'firewood_value']] = trial[['lpg_qty', 'lpg_value', 'firewood_qty', 'firewood_value']].fillna(0)


# Sumproduct of consumption
value_mult = trial.groupby('sector').apply(lambda table: pd.Series({
    'wt_lpg_qty': (table['lpg_qty']*table['multiplier']).sum()/table['multiplier'].sum(),
    }), include_groups = False)

#Converting to annual cylinder consumption
value_mult = round(value_mult*12/14.2, 2)

display(value_mult)

Unnamed: 0_level_0,wt_lpg_qty
sector,Unnamed: 1_level_1
rural,4.25
urban,7.63


In [34]:
# Rural vs Urban across deciles

# Duplicate df and replace Nan values
trial = df.copy()
trial[['lpg_qty', 'lpg_value', 'firewood_qty', 'firewood_value']] = trial[['lpg_qty', 'lpg_value', 'firewood_qty', 'firewood_value']].fillna(0)


# Sumproduct of consumption
value_mult = trial.groupby(['sector', 'decile'], observed=False).apply(lambda table: pd.Series({
    'wt_lpg_qty': (table['lpg_qty']*table['multiplier']).sum()/table['multiplier'].sum()
    }), include_groups = False).round(2)

#Converting to annual cylinder consumption
value_mult = round(value_mult*12/14.2, 2)

display(value_mult)

Unnamed: 0_level_0,Unnamed: 1_level_0,wt_lpg_qty
sector,decile,Unnamed: 2_level_1
rural,1,1.47
rural,2,2.4
rural,3,3.0
rural,4,3.35
rural,5,3.91
rural,6,4.29
rural,7,4.75
rural,8,5.15
rural,9,5.67
rural,10,6.06
