In [15]:
# import Pkg; Pkg.add("")
import Pkg
Pkg.activate(".")
using CSV
using DataFrames
using Statistics
using HypothesisTests

[32m[1m Activating[22m[39m new environment at `~/Renewvia/survey_impact_analysis/Project.toml`
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPrecompiling HypothesisTests [09f84164-cd44-5f33-b23f-e6b0d136a0d5]


In [41]:
begin
    # download and load
    hs_post = CSV.read("datasets_clean/household_post_survey_encoded.csv", DataFrame) 
    hs_pre = CSV.read("datasets_clean/household_pre_survey_encoded.csv", DataFrame)
    ci_post = CSV.read("datasets_clean/commercial_post_survey_encoded", DataFrame)
end;

In [43]:
first(hs_pre, 5)

Unnamed: 0_level_0,Column1,renewvia_id,avg_monthly_household_income,household_headcount,female_schooling
Unnamed: 0_level_1,Int64,String15,Int64?,Int64?,Int64?
1,0,501121.0,missing,missing,missing
2,1,570063.0,27000,3,missing
3,2,570028.0,30000,9,2
4,3,,30000,2,missing
5,4,570097.0,38000,7,2


In [44]:
first(hs_post, 5)

Unnamed: 0_level_0,Column1,renewvia_id,interviewed_before_0,interviewed_before_1,occupation_change_0
Unnamed: 0_level_1,Int64,String15?,Float64?,Float64?,Float64?
1,0,missing,missing,missing,missing
2,1,521168.0,0.0,1.0,0.0
3,2,521039.0,1.0,0.0,1.0
4,3,521055.0,0.0,1.0,0.0
5,4,521090.0,0.0,1.0,0.0


In [45]:
first(ci_post, 5)

Unnamed: 0_level_0,Column1,renewvia_id,business_type,operation_status,kerosene_usage_change,diesel_usage_change
Unnamed: 0_level_1,Int64,String7,String31,Float64?,Float64?,Float64?
1,0,131206,shop,missing,-1.0,-1.0
2,1,131231,other_business,missing,-1.0,-1.0
3,2,131542,shop,missing,-1.0,-1.0
4,3,131220,shop,missing,1.0,1.0
5,4,131485,shop,missing,-1.0,-1.0


# Gender Equality
1. School Attendance per gender
2. Employment (Jobs/Business) per Gender

## School Attendance per gender

**Data Pre-processing**: We captured directionality of changes in the variable *'female_schooling_change'* and *'female_schooling_change'* with the following encoding :
'no_its_the_same' = 0, 'yes_its_increased' = 1, 'yes_its_decreased' = -1

**Statistical Test**: One-sample t-Test

**Hypothesis**
* $H_{0}$: There were no changes in school attendance for female children
* $H_{1}$: There were a change in school attendance for female children

In [46]:
begin 
    # Allow female_schooling_change col to have missing values
    allowmissing!(hs_post, :female_schooling_change)
    
    # Convert "NA" to missing
    replace!(hs_post.female_schooling_change, "NA" => missing) 
end

onesamplettest = let 
    # Fetch a random sample 
    x1 = data |>
    x -> data[!, :female_schooling_change] |> skipmissing |> collect |> x->rand(x, 1000)
    OneSampleTTest(x1, 0)
end

One sample t-test
-----------------
Population details:
    parameter of interest:   Mean
    value under h_0:         0
    point estimate:          0.162
    95% confidence interval: (0.136, 0.188)

Test summary:
    outcome with 95% confidence: reject h_0
    two-sided p-value:           <1e-31

Details:
    number of observations:   1000
    t-statistic:              12.213571485548346
    degrees of freedom:       999
    empirical standard error: 0.01326393350148937


**Hypothesis**
* $H_{0}$: There were no changes in school attendance for male children
* $H_{1}$: There were a change in school attendance for male children

In [47]:
begin 
    # Allow female_schooling_change col to have missing values
    allowmissing!(hs_post, :male_schooling_change)
    
    # Convert "NA" to missing
    replace!(hs_post.male_schooling_change, "NA" => missing) 
end

onesamplettest = let 
    # Fetch a random sample 
    x1 = data |>
    x -> data[!, :male_schooling_change] |> skipmissing |> collect |> x->rand(x, 1000)
    OneSampleTTest(x1, 0)
end

One sample t-test
-----------------
Population details:
    parameter of interest:   Mean
    value under h_0:         0
    point estimate:          0.149
    95% confidence interval: (0.1238, 0.1742)

Test summary:
    outcome with 95% confidence: reject h_0
    two-sided p-value:           <1e-28

Details:
    number of observations:   1000
    t-statistic:              11.600901156855814
    degrees of freedom:       999
    empirical standard error: 0.012843829801268938


**Result Interpretation**: In either instance, the p-value was small enough to reject the null hypothesis thus, we may assume that there was a significant change in the schooling of both male and female children.

## Female Economic Empowerment

**Data Pre-processing**: Here, the variable was denoted as *'female_schooling_change'* and *'female_schooling_change'* with the following encoding :
'no_its_the_same' = 0, 'yes_its_increased' = 1, 'yes_its_decreased' = -1

**Statistical Test**: Two sample Paired t-Test