In [None]:
# Packages to install

import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
import os
from pathlib import Path

from linearmodels.iv.absorbing import AbsorbingLS #have to install linearmodels first


In [None]:
df = pd.read_stata("dhs.dta")
# Look at first few rows
# df.head()

In [None]:

# New variables - age, country_age, post,intensity_m_post
df["age"] = df["v012"].astype("str")

df["country_age"] = df["v000"] + df["age"]

df["post"]=0
df.loc[df["age_reform"]<18 , "post"] = 1

df["intensity_m_post"] = df["intensity_main"]*df["post"]







In [None]:
df["country_age"] = df["country_age"].astype("category")
df["region"] = df["region"].astype("category")

In [None]:

# mod = AbsorbingLS(
#     df["m_b18"], df[["intensity_m_post"]], absorb=df[["country_age", "region"]]
# )
# mod.fit(cov_type='clustered', clusters=df["region"])

mean_m_b18 = np.mean(df["m_b18"])

# Create the regression model with the mean of the dependent variable included
mod = AbsorbingLS(df["m_b18"], df[["intensity_m_post"]], absorb=df[["country_age", "region"]])
mod.fit(cov_type='clustered', clusters=df["region"])
print(mean_m_b18)


In [None]:
 #Drop missing values for v511 variable  
df_v511=df.dropna(subset=["v511"])


mod = AbsorbingLS(
    df_v511["v511"], df_v511[["intensity_m_post"]], absorb=df_v511[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_v511["region"])

mean_v511 = np.mean(df_v511["v511"])
print(mean_v511)

In [None]:
#Drop missing values for v212 variable  
df_v212=df.dropna(subset=["v212"])

mod = AbsorbingLS(
    df_v212["v212"], df_v212[["intensity_m_post"]], absorb=df_v212[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_v212["region"])

# results_v212 = smf.ols("v212 ~ intensity_m_post + C(region) + C(country_age)", data=df_v212).fit(cov_type="cluster", cov_kwds={"groups": df_v212["region"]})
mean_v212 = np.mean(df_v212["v212"])
print(mean_v212)


In [None]:

#Drop missing values for edu_year variable  
df_edu_year=df.dropna(subset=["edu_year"])

mod = AbsorbingLS(
    df_edu_year["edu_year"], df_edu_year[["intensity_m_post"]], absorb=df_edu_year[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_edu_year["region"])

mean_edu_year = np.mean(df_edu_year["edu_year"])
print(mean_edu_year)


# results_edu_year = smf.ols("edu_year ~ intensity_m_post + C(region) + C(country_age)", data=df_edu_year).fit(cov_type="cluster", cov_kwds={"groups": df_edu_year["region"]})


In [None]:

# #Drop missing values for emp variable  
df_emp = df.dropna(subset=["emp"])

mod = AbsorbingLS(
    df_emp["emp"], df_emp[["intensity_m_post"]], absorb=df_emp[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_emp["region"])


mean_emp = np.mean(df_emp["emp"])
print(mean_emp)




In [None]:
# For Urban females 


df_u = df[df["urban"] == "1"]


mean_m_b18 = np.mean(df_u["m_b18"])

# Create the regression model with the mean of the dependent variable included
mod = AbsorbingLS(df_u["m_b18"], df_u[["intensity_m_post"]], absorb=df_u[["country_age", "region"]])
mod.fit(cov_type='clustered', clusters=df_u["region"])
print(mean_m_b18)


# For Rural females 


df_u = df[df["urban"] == "1"]


mean_m_b18 = np.mean(df_u["m_b18"])

# Create the regression model with the mean of the dependent variable included
mod = AbsorbingLS(df_u["m_b18"], df_u[["intensity_m_post"]], absorb=df_u[["country_age", "region"]])
mod.fit(cov_type='clustered', clusters=df_u["region"])
print(mean_m_b18)

In [None]:
# For Urban females 
#Drop missing values for v511 variable  
df_v511=df.dropna(subset=["v511"])
df_v511_u = df_v511[df_v511["urban"] == "1"]
mean_df_v511_u = np.mean(df_v511_u["v511"])
mod = AbsorbingLS(
    df_v511_u["v511"], df_v511_u[["intensity_m_post"]], absorb=df_v511_u[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_v511_u["region"])
print(mean_df_v511_u)

# For Rural females
#Drop missing values for v511 variable  
df_v511=df.dropna(subset=["v511"])
df_v511_u = df_v511[df_v511["urban"] == "0"]
mean_df_v511_u = np.mean(df_v511_u["v511"])
mod = AbsorbingLS(
    df_v511_u["v511"], df_v511_u[["intensity_m_post"]], absorb=df_v511_u[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_v511_u["region"])
print(mean_df_v511_u)

In [None]:
# For Urban females 
df_v212=df.dropna(subset=["v212"])
df_v212_u = df_v212[df_v212["urban"] == "1"]

mod = AbsorbingLS(
  df_v212_u["v212"], df_v212_u[["intensity_m_post"]], absorb=df_v212_u[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_v212_u["region"])

mean_v212_u = np.mean(df_v212_u["v212"])
print(mean_v212_u)


# For Rural females 

df_v212=df.dropna(subset=["v212"])
df_v212_u = df_v212[df_v212["urban"] == "0"]

mod = AbsorbingLS(
  df_v212_u["v212"], df_v212_u[["intensity_m_post"]], absorb=df_v212_u[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_v212_u["region"])

mean_v212_u = np.mean(df_v212_u["v212"])
print(mean_v212_u)

In [None]:

# For Urban females 
# Drop missing values for edu_year variable  
df_edu_year=df.dropna(subset=["edu_year"])
df_edu_year_u = df_edu_year[df_edu_year["urban"] == "1"]

mod = AbsorbingLS(
    df_edu_year_u["edu_year"], df_edu_year_u[["intensity_m_post"]], absorb=df_edu_year_u[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_edu_year_u["region"])

mean_edu_year_u = np.mean(df_edu_year_u["edu_year"])
print(mean_edu_year_u)

# For Rural females 
# Drop missing values for edu_year variable  
df_edu_year=df.dropna(subset=["edu_year"])
df_edu_year_u = df_edu_year[df_edu_year["urban"] == "0"]

mod = AbsorbingLS(
    df_edu_year_u["edu_year"], df_edu_year_u[["intensity_m_post"]], absorb=df_edu_year_u[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_edu_year_u["region"])

mean_edu_year_u = np.mean(df_edu_year_u["edu_year"])
print(mean_edu_year_u)

In [None]:
 
 # For Urban females 
 #Drop missing values for emp variable  
df_emp = df.dropna(subset=["emp"])
df_emp_u = df_emp[df_emp["urban"] == "1"]


mod = AbsorbingLS(
    df_emp_u["emp"], df_emp_u[["intensity_m_post"]], absorb=df_emp_u[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_emp_u["region"])


mean_emp_u = np.mean(df_emp_u["emp"])
print(mean_emp_u)

 # For Rural females 
 #Drop missing values for emp variable  
df_emp = df.dropna(subset=["emp"])
df_emp_u = df_emp[df_emp["urban"] == "0"]


mod = AbsorbingLS(
    df_emp_u["emp"], df_emp_u[["intensity_m_post"]], absorb=df_emp_u[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_emp_u["region"])


mean_emp_u = np.mean(df_emp_u["emp"])
print(mean_emp_u)

In [None]:
# Minimum age 
df_u = df[df["minage_fem_pc_95"] == "Until 16 or 17"]


mean_m_b18 = np.mean(df_u["m_b18"])

# Create the regression model with the mean of the dependent variable included
mod = AbsorbingLS(df_u["m_b18"], df_u[["intensity_m_post"]], absorb=df_u[["country_age", "region"]])
mod.fit(cov_type='clustered', clusters=df_u["region"])
print(mean_m_b18)


# Minimum age 16 or 17
df_u = df[df["minage_fem_pc_95"] == "Until 14 or 15"]


mean_m_b18 = np.mean(df_u["m_b18"])

# Create the regression model with the mean of the dependent variable included
mod = AbsorbingLS(df_u["m_b18"], df_u[["intensity_m_post"]], absorb=df_u[["country_age", "region"]])
mod.fit(cov_type='clustered', clusters=df_u["region"])
print(mean_m_b18)

# Minimum age 16 or 17
df_u = df[df["minage_fem_pc_95"] == "No minimum age"]


mean_m_b18 = np.mean(df_u["m_b18"])

# Create the regression model with the mean of the dependent variable included
mod = AbsorbingLS(df_u["m_b18"], df_u[["intensity_m_post"]], absorb=df_u[["country_age", "region"]])
mod.fit(cov_type='clustered', clusters=df_u["region"])
print(mean_m_b18)

In [None]:
#Drop missing values for v511 variable  
df_v511=df.dropna(subset=["v511"])
df_v511_u = df_v511[df_v511["minage_fem_pc_95"] == "Until 16 or 17"]
mean_df_v511_u = np.mean(df_v511_u["v511"])
mod = AbsorbingLS(
    df_v511_u["v511"], df_v511_u[["intensity_m_post"]], absorb=df_v511_u[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_v511_u["region"])
print(mean_df_v511_u)

#Drop missing values for v511 variable  
df_v511=df.dropna(subset=["v511"])
df_v511_u = df_v511[df_v511["minage_fem_pc_95"] == "Until 14 or 15"]
mean_df_v511_u = np.mean(df_v511_u["v511"])
mod = AbsorbingLS(
    df_v511_u["v511"], df_v511_u[["intensity_m_post"]], absorb=df_v511_u[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_v511_u["region"])
print(mean_df_v511_u)


#Drop missing values for v511 variable  
df_v511=df.dropna(subset=["v511"])
df_v511_u = df_v511[df_v511["minage_fem_pc_95"] == "No minimum age"]
mean_df_v511_u = np.mean(df_v511_u["v511"])
mod = AbsorbingLS(
    df_v511_u["v511"], df_v511_u[["intensity_m_post"]], absorb=df_v511_u[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_v511_u["region"])
print(mean_df_v511_u)

In [None]:
df_v212=df.dropna(subset=["v212"])
df_v212_u = df_v212[df_v212["minage_fem_pc_95"] == "Until 16 or 17"]

mod = AbsorbingLS(
  df_v212_u["v212"], df_v212_u[["intensity_m_post"]], absorb=df_v212_u[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_v212_u["region"])

 mean_v212_u = np.mean(df_v212_u["v212"])
 print(mean_v212_u)


df_v212=df.dropna(subset=["v212"])
df_v212_u = df_v212[df_v212["minage_fem_pc_95"] == "Until 14 or 15"]

mod = AbsorbingLS(
  df_v212_u["v212"], df_v212_u[["intensity_m_post"]], absorb=df_v212_u[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_v212_u["region"])

 mean_v212_u = np.mean(df_v212_u["v212"])
 print(mean_v212_u)


df_v212=df.dropna(subset=["v212"])
df_v212_u = df_v212[df_v212["minage_fem_pc_95"] == "No minimum age"]

mod = AbsorbingLS(
  df_v212_u["v212"], df_v212_u[["intensity_m_post"]], absorb=df_v212_u[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_v212_u["region"])

 mean_v212_u = np.mean(df_v212_u["v212"])
 print(mean_v212_u)

In [None]:
# Drop missing values for edu_year variable  
df_edu_year=df.dropna(subset=["edu_year"])
df_edu_year_u = df_edu_year[df_edu_year["minage_fem_pc_95"] == "Until 16 or 17"]

mod = AbsorbingLS(
    df_edu_year_u["edu_year"], df_edu_year_u[["intensity_m_post"]], absorb=df_edu_year_u[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_edu_year_u["region"])

# Drop missing values for edu_year variable  
df_edu_year=df.dropna(subset=["edu_year"])
df_edu_year_u = df_edu_year[df_edu_year["minage_fem_pc_95"] == "Until 14 or 15"]

mod = AbsorbingLS(
    df_edu_year_u["edu_year"], df_edu_year_u[["intensity_m_post"]], absorb=df_edu_year_u[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_edu_year_u["region"])

# Drop missing values for edu_year variable  
df_edu_year=df.dropna(subset=["edu_year"])
df_edu_year_u = df_edu_year[df_edu_year["minage_fem_pc_95"] == "No minimum age"]

mod = AbsorbingLS(
    df_edu_year_u["edu_year"], df_edu_year_u[["intensity_m_post"]], absorb=df_edu_year_u[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_edu_year_u["region"])

In [None]:
   
#Drop missing values for emp variable  
df_emp = df.dropna(subset=["emp"])
df_emp_u = df_emp[df_emp["minage_fem_pc_95"] == "Until 16 or 17"]


mod = AbsorbingLS(
    df_emp_u["emp"], df_emp_u[["intensity_m_post"]], absorb=df_emp_u[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_emp_u["region"])


# mean_emp_u = np.mean(df_emp_u["emp"])
# print(mean_emp_u)


#Drop missing values for emp variable  
df_emp = df.dropna(subset=["emp"])
df_emp_u = df_emp[df_emp["minage_fem_pc_95"] == "Until 14 or 15"]


mod = AbsorbingLS(
    df_emp_u["emp"], df_emp_u[["intensity_m_post"]], absorb=df_emp_u[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_emp_u["region"])


# mean_emp_u = np.mean(df_emp_u["emp"])
# print(mean_emp_u)





 #Drop missing values for emp variable  
df_emp = df.dropna(subset=["emp"])
df_emp_u = df_emp[df_emp["minage_fem_pc_95"] == "No minimum age"]


mod = AbsorbingLS(
    df_emp_u["emp"], df_emp_u[["intensity_m_post"]], absorb=df_emp_u[["country_age", "region"]]
)
mod.fit(cov_type='clustered', clusters=df_emp_u["region"])


# mean_emp_u = np.mean(df_emp_u["emp"])
# print(mean_emp_u)

