# Gender Heterogeneous Treatment Difference

In [1]:
%pwd

'/Users/lichao/Library/CloudStorage/OneDrive-KyushuUniversity/ESG09_Article/Code'

In [2]:
%cd ..

/Users/lichao/Library/CloudStorage/OneDrive-KyushuUniversity/ESG09_Article


## Import Package

In [13]:
import numpy as np
import os 
import pandas as pd
from scipy import stats

## Compared Gender Difference

In [4]:
FemaleTreatedAsMale = pd.read_parquet(os.path.join('Results', 'PredictionWB_XgbMaleModelFemalePrediction_v1.parquet'))

In [5]:
FemaleTreatedAsFemale = pd.read_parquet(os.path.join('Results', 'PredictionWB_XgbFemaleModelFemalePrediction_v1.parquet'))

In [6]:
MaleTreatedAsMale = pd.read_parquet(os.path.join('Results', 'PredictionWB_XgbMaleModelMalePrediction_v1.parquet'))

In [7]:
MaleTreatedAsFemale = pd.read_parquet(os.path.join('Results', 'PredictionWB_XgbFemaleModelMalePrediction_v1.parquet'))

In [8]:
FemaleTreatedAsMale.columns

Index(['index', 'Real_y', 'Predict_y'], dtype='object')

In [9]:
FemaleTreatedAsMale['Predict_y'].mean()

5.384263837383596

In [10]:
FemaleTreatedAsFemale['Predict_y'].mean()

5.569484513283753

In [11]:
MaleTreatedAsMale['Predict_y'].mean()

5.466163937999297

In [12]:
MaleTreatedAsFemale['Predict_y'].mean()

5.644549766090342

### Female Group with Different Treatment

In [15]:
FemaleTreatedAsFemale['Predict_y'].mean() - FemaleTreatedAsMale['Predict_y'].mean()

0.18522067590015734

In [17]:
t_stat, p_value = stats.ttest_ind(FemaleTreatedAsMale['Predict_y'], FemaleTreatedAsFemale['Predict_y'])
print(f"T-statistic: {t_stat}, P-value: {p_value}")

T-statistic: -89.49583348257622, P-value: 0.0


### Male Group with Different Treatment

In [18]:
MaleTreatedAsFemale['Predict_y'].mean() - MaleTreatedAsMale['Predict_y'].mean()

0.17838582809104508

In [19]:
t_stat, p_value = stats.ttest_ind(MaleTreatedAsFemale['Predict_y'], MaleTreatedAsMale['Predict_y'])
print(f"T-statistic: {t_stat}, P-value: {p_value}")

T-statistic: 80.7528736446734, P-value: 0.0


### Female-treated Heterogeneous 

In [21]:
FemaleTreatedAsFemale['Predict_y'].mean() - MaleTreatedAsFemale['Predict_y'].mean()

-0.07506525280658849

In [22]:
t_stat, p_value = stats.ttest_ind(FemaleTreatedAsFemale['Predict_y'], MaleTreatedAsFemale['Predict_y'])
print(f"T-statistic: {t_stat}, P-value: {p_value}")

T-statistic: -34.882110704079594, P-value: 1.685601802142487e-266


### Male-treated Hererogeneous

In [23]:
FemaleTreatedAsMale['Predict_y'].mean() - MaleTreatedAsMale['Predict_y'].mean()

-0.08190010061570074

In [24]:
t_stat, p_value = stats.ttest_ind(FemaleTreatedAsMale['Predict_y'], MaleTreatedAsMale['Predict_y'])
print(f"T-statistic: {t_stat}, P-value: {p_value}")

T-statistic: -38.47424438163812, P-value: 0.0


## Gender Difference of Each Wave 

In [81]:
FemaleTreatedAsMale = pd.read_parquet(os.path.join('Results', 'PredictionWB_XgbMaleModelFemalePrediction_v1.parquet'))

In [82]:
FemaleTreatedAsFemale = pd.read_parquet(os.path.join('Results', 'PredictionWB_XgbFemaleModelFemalePrediction_v1.parquet'))

In [83]:
MaleTreatedAsMale = pd.read_parquet(os.path.join('Results', 'PredictionWB_XgbMaleModelMalePrediction_v1.parquet'))

In [84]:
MaleTreatedAsFemale = pd.read_parquet(os.path.join('Results', 'PredictionWB_XgbFemaleModelMalePrediction_v1.parquet'))

In [85]:
Df_Filename = os.path.join("Data", "GallupWB_Ml64var1911k14wave_v1.parquet")

In [86]:
wave_df = pd.read_parquet(Df_Filename)

In [87]:
wave_df = wave_df[['wave']].reset_index()

In [88]:
wave_df.columns

Index(['index', 'wave'], dtype='object')

In [89]:
wave_df['wave'] = wave_df['wave'].astype(int)

In [90]:
set(wave_df['wave'])

{4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}

### Check Each Wave Difference

In [91]:
FemaleTreatedAsMale_wave = pd.merge(FemaleTreatedAsMale, wave_df, on = 'index')
FemaleTreatedAsFemale_wave = pd.merge(FemaleTreatedAsFemale, wave_df, on = 'index')
MaleTreatedAsMale_wave = pd.merge(MaleTreatedAsMale, wave_df, on = 'index')
MaleTreatedAsFemale_wave = pd.merge(MaleTreatedAsFemale, wave_df, on = 'index')

In [92]:
wave_level_df = []

for wave in set(wave_df['wave']):
    print(f"Wave: {wave}")
    FemaleTreatedAsMale_wave_select = FemaleTreatedAsMale_wave[FemaleTreatedAsMale_wave['wave']==wave]
    FemaleTreatedAsFemale_wave_select = FemaleTreatedAsFemale_wave[FemaleTreatedAsFemale_wave['wave']==wave]
    MaleTreatedAsMale_wave_select = MaleTreatedAsMale_wave[MaleTreatedAsMale_wave['wave']==wave]
    MaleTreatedAsFemale_wave_select = MaleTreatedAsFemale_wave[MaleTreatedAsFemale_wave['wave']==wave]
    line = [wave]
    
    FemaleTreatmentDifference = FemaleTreatedAsFemale_wave_select['Predict_y'].mean() - FemaleTreatedAsMale_wave_select['Predict_y'].mean()
    t_stat, p_value = stats.ttest_ind(FemaleTreatedAsFemale_wave_select['Predict_y'], FemaleTreatedAsMale_wave_select['Predict_y'])
    print(f"Wave: {wave} Female Treatment Difference (F - M): {FemaleTreatmentDifference:.4f} T-statistic: {t_stat:.4f}, P-value: {p_value:.4f}")
    line = line + [FemaleTreatmentDifference, t_stat, p_value]
    
    MaleTreatmentDifference = MaleTreatedAsFemale_wave_select['Predict_y'].mean() - MaleTreatedAsMale_wave_select['Predict_y'].mean()
    t_stat, p_value = stats.ttest_ind(MaleTreatedAsFemale_wave_select['Predict_y'], MaleTreatedAsMale_wave_select['Predict_y'])
    print(f"Wave: {wave} Male Treatment Difference (F - M): {MaleTreatmentDifference:.4f} T-statistic: {t_stat:.4f}, P-value: {p_value:.4f}")
    line = line + [MaleTreatmentDifference, t_stat, p_value]
    
    FemaleTreatedGenderDifference = FemaleTreatedAsFemale_wave_select['Predict_y'].mean() - MaleTreatedAsFemale_wave_select['Predict_y'].mean()
    t_stat, p_value = stats.ttest_ind(FemaleTreatedAsFemale_wave_select['Predict_y'], MaleTreatedAsFemale_wave_select['Predict_y'])
    print(f"Wave: {wave} Female Treated Gender Difference (F - M): {FemaleTreatedGenderDifference:.4f} T-statistic: {t_stat:.4f}, P-value: {p_value:.4f}")
    line = line + [FemaleTreatedGenderDifference, t_stat, p_value]
    
    MaleTreatedGenderDifference = FemaleTreatedAsMale_wave_select['Predict_y'].mean() - MaleTreatedAsMale_wave_select['Predict_y'].mean()
    t_stat, p_value = stats.ttest_ind(FemaleTreatedAsMale_wave_select['Predict_y'], MaleTreatedAsMale_wave_select['Predict_y'])
    print(f"Wave: {wave} Male Treated Gender Difference (F - M): {MaleTreatedGenderDifference:.4f} T-statistic: {t_stat:.4f}, P-value: {p_value:.4f}")
    line = line + [MaleTreatedGenderDifference, t_stat, p_value]
    
    print('------------------\n')
    wave_level_df.append(line)

Wave: 4
Wave: 4 Female Treatment Difference (F - M): 0.1310 T-statistic: 14.8563, P-value: 0.0000
Wave: 4 Male Treatment Difference (F - M): 0.1243 T-statistic: 13.0555, P-value: 0.0000
Wave: 4 Female Treated Gender Difference (F - M): -0.0287 T-statistic: -3.0734, P-value: 0.0021
Wave: 4 Male Treated Gender Difference (F - M): -0.0353 T-statistic: -3.9138, P-value: 0.0001
------------------

Wave: 5
Wave: 5 Female Treatment Difference (F - M): 0.1401 T-statistic: 17.4070, P-value: 0.0000
Wave: 5 Male Treatment Difference (F - M): 0.1333 T-statistic: 15.1375, P-value: 0.0000
Wave: 5 Female Treated Gender Difference (F - M): -0.0099 T-statistic: -1.1475, P-value: 0.2512
Wave: 5 Male Treated Gender Difference (F - M): -0.0167 T-statistic: -2.0106, P-value: 0.0444
------------------

Wave: 6
Wave: 6 Female Treatment Difference (F - M): 0.1443 T-statistic: 20.9938, P-value: 0.0000
Wave: 6 Male Treatment Difference (F - M): 0.1412 T-statistic: 18.8962, P-value: 0.0000
Wave: 6 Female Treated

In [93]:
pd.DataFrame(wave_level_df).to_parquet(os.path.join('Results', 'GenderTreatmentWaveDifference_v1.parquet'))

## Gender Difference of Each Country 

In [94]:
FemaleTreatedAsMale = pd.read_parquet(os.path.join('Results', 'PredictionWB_XgbMaleModelFemalePrediction_v1.parquet'))

In [95]:
FemaleTreatedAsFemale = pd.read_parquet(os.path.join('Results', 'PredictionWB_XgbFemaleModelFemalePrediction_v1.parquet'))

In [96]:
MaleTreatedAsMale = pd.read_parquet(os.path.join('Results', 'PredictionWB_XgbMaleModelMalePrediction_v1.parquet'))

In [97]:
MaleTreatedAsFemale = pd.read_parquet(os.path.join('Results', 'PredictionWB_XgbFemaleModelMalePrediction_v1.parquet'))

In [98]:
Df_Filename = os.path.join("Data", "GallupWB_Ml64var1911k14wave_v1.parquet")

In [99]:
country_df = pd.read_parquet(Df_Filename)

In [100]:
country_df = country_df[['COUNTRY_ISO3']].reset_index()

In [101]:
country_df.columns

Index(['index', 'COUNTRY_ISO3'], dtype='object')

In [102]:
country_df['COUNTRY_ISO3'] = country_df['COUNTRY_ISO3']

In [103]:
set(country_df['COUNTRY_ISO3'])

{'AFG',
 'AGO',
 'ALB',
 'ARE',
 'ARG',
 'ARM',
 'AUS',
 'AUT',
 'AZE',
 'BDI',
 'BEL',
 'BEN',
 'BFA',
 'BGD',
 'BGR',
 'BHR',
 'BIH',
 'BLR',
 'BLZ',
 'BOL',
 'BRA',
 'BTN',
 'BWA',
 'CAF',
 'CAN',
 'CHE',
 'CHL',
 'CHN',
 'CIV',
 'CMR',
 'COD',
 'COG',
 'COL',
 'COM',
 'CRI',
 'CYP',
 'CZE',
 'DEU',
 'DNK',
 'DOM',
 'DZA',
 'ECU',
 'EGY',
 'ESP',
 'EST',
 'ETH',
 'FIN',
 'FRA',
 'GAB',
 'GBR',
 'GEO',
 'GHA',
 'GIN',
 'GMB',
 'GRC',
 'GTM',
 'HKG',
 'HND',
 'HRV',
 'HTI',
 'HUN',
 'IDN',
 'IND',
 'IRL',
 'IRN',
 'IRQ',
 'ISL',
 'ISR',
 'ITA',
 'JAM',
 'JOR',
 'JPN',
 'KAZ',
 'KEN',
 'KGZ',
 'KHM',
 'KOR',
 'KWT',
 'LAO',
 'LBN',
 'LBR',
 'LBY',
 'LKA',
 'LSO',
 'LTU',
 'LUX',
 'LVA',
 'MAR',
 'MDA',
 'MDG',
 'MDV',
 'MEX',
 'MKD',
 'MLI',
 'MLT',
 'MMR',
 'MNE',
 'MNG',
 'MOZ',
 'MRT',
 'MUS',
 'MWI',
 'MYS',
 'NAM',
 'NER',
 'NGA',
 'NIC',
 'NLD',
 'NOR',
 'NPL',
 'NZL',
 'PAK',
 'PAN',
 'PER',
 'PHL',
 'POL',
 'PRI',
 'PRT',
 'PRY',
 'PSE',
 'QAT',
 'ROU',
 'RUS',
 'RWA',
 'SAU',


### Check Each Country Difference

In [104]:
FemaleTreatedAsMale_wave = pd.merge(FemaleTreatedAsMale, country_df, on = 'index')
FemaleTreatedAsFemale_wave = pd.merge(FemaleTreatedAsFemale, country_df, on = 'index')
MaleTreatedAsMale_wave = pd.merge(MaleTreatedAsMale, country_df, on = 'index')
MaleTreatedAsFemale_wave = pd.merge(MaleTreatedAsFemale, country_df, on = 'index')

In [105]:
country_level_df = []

for wave in sorted(set(country_df['COUNTRY_ISO3'])):
    print(f"Country: {wave}")
    FemaleTreatedAsMale_wave_select = FemaleTreatedAsMale_wave[FemaleTreatedAsMale_wave['COUNTRY_ISO3']==wave]
    FemaleTreatedAsFemale_wave_select = FemaleTreatedAsFemale_wave[FemaleTreatedAsFemale_wave['COUNTRY_ISO3']==wave]
    MaleTreatedAsMale_wave_select = MaleTreatedAsMale_wave[MaleTreatedAsMale_wave['COUNTRY_ISO3']==wave]
    MaleTreatedAsFemale_wave_select = MaleTreatedAsFemale_wave[MaleTreatedAsFemale_wave['COUNTRY_ISO3']==wave]
    line = [wave]
    
    FemaleTreatmentDifference = FemaleTreatedAsFemale_wave_select['Predict_y'].mean() - FemaleTreatedAsMale_wave_select['Predict_y'].mean()
    t_stat, p_value = stats.ttest_ind(FemaleTreatedAsFemale_wave_select['Predict_y'], FemaleTreatedAsMale_wave_select['Predict_y'])
    print(f"Country: {wave} Female Treatment Difference (F - M): {FemaleTreatmentDifference:.4f} T-statistic: {t_stat:.4f}, P-value: {p_value:.4f}")
    line = line + [FemaleTreatmentDifference, t_stat, p_value]
    
    MaleTreatmentDifference = MaleTreatedAsFemale_wave_select['Predict_y'].mean() - MaleTreatedAsMale_wave_select['Predict_y'].mean()
    t_stat, p_value = stats.ttest_ind(MaleTreatedAsFemale_wave_select['Predict_y'], MaleTreatedAsMale_wave_select['Predict_y'])
    print(f"Country: {wave} Male Treatment Difference (F - M): {MaleTreatmentDifference:.4f} T-statistic: {t_stat:.4f}, P-value: {p_value:.4f}")
    line = line + [MaleTreatmentDifference, t_stat, p_value]
    
    FemaleTreatedGenderDifference = FemaleTreatedAsFemale_wave_select['Predict_y'].mean() - MaleTreatedAsFemale_wave_select['Predict_y'].mean()
    t_stat, p_value = stats.ttest_ind(FemaleTreatedAsFemale_wave_select['Predict_y'], MaleTreatedAsFemale_wave_select['Predict_y'])
    print(f"Country: {wave} Female Treated Gender Difference (F - M): {FemaleTreatedGenderDifference:.4f} T-statistic: {t_stat:.4f}, P-value: {p_value:.4f}")
    line = line + [FemaleTreatedGenderDifference, t_stat, p_value]
    
    MaleTreatedGenderDifference = FemaleTreatedAsMale_wave_select['Predict_y'].mean() - MaleTreatedAsMale_wave_select['Predict_y'].mean()
    t_stat, p_value = stats.ttest_ind(FemaleTreatedAsMale_wave_select['Predict_y'], MaleTreatedAsMale_wave_select['Predict_y'])
    print(f"Country: {wave} Male Treated Gender Difference (F - M): {MaleTreatedGenderDifference:.4f} T-statistic: {t_stat:.4f}, P-value: {p_value:.4f}")
    line = line + [MaleTreatedGenderDifference, t_stat, p_value]
    
    print('------------------\n')
    country_level_df.append(line)

Country: AFG
Country: AFG Female Treatment Difference (F - M): 0.0604 T-statistic: 3.1639, P-value: 0.0016
Country: AFG Male Treatment Difference (F - M): 0.0763 T-statistic: 4.1115, P-value: 0.0000
Country: AFG Female Treated Gender Difference (F - M): -0.1712 T-statistic: -9.0727, P-value: 0.0000
Country: AFG Male Treated Gender Difference (F - M): -0.1552 T-statistic: -8.2703, P-value: 0.0000
------------------

Country: AGO
Country: AGO Female Treatment Difference (F - M): 0.1348 T-statistic: 2.1699, P-value: 0.0302
Country: AGO Male Treatment Difference (F - M): 0.1618 T-statistic: 2.8475, P-value: 0.0045
Country: AGO Female Treated Gender Difference (F - M): -0.1667 T-statistic: -2.7749, P-value: 0.0056
Country: AGO Male Treated Gender Difference (F - M): -0.1397 T-statistic: -2.3764, P-value: 0.0176
------------------

Country: ALB
Country: ALB Female Treatment Difference (F - M): 0.3561 T-statistic: 14.1439, P-value: 0.0000
Country: ALB Male Treatment Difference (F - M): 0.3348

In [107]:
pd.DataFrame(country_level_df).to_parquet(os.path.join('Results', 'GenderTreatmentCountryDifference_v1.parquet'))