In [1]:
library(tidyverse)
library(haven)
library(ivreg)

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.5.1     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


In [2]:
census_data <- read_dta('cen_ind_2021_pumf_v2 2.dta')

In [3]:
cleaned_data <- census_data |> select(agegrp, yrim, lfact, marsth, hdgree, jobperm,CFInc_AT, PR1, NOC21, Gender, immstat) |> 
    filter(agegrp != 88) |> 
    filter(yrim != 8888 & yrim != 9999) |>
    filter(lfact != 88 & lfact != 99) |>
    filter(marsth != 8) |>
    filter(hdgree != 88 & hdgree != 99) |>
    filter(jobperm != 8 & jobperm != 9) |>
    filter(CFInc_AT != 88) |>
    filter(PR1 != 88 & PR1 != 99) |>
    filter(NOC21 != 88 & NOC21 != 99)


In [7]:

statistic<- cleaned_data |> summarise(
    across(everything(),
      list(
        mean = ~ mean(.x, na.rm = TRUE),
        sd   = ~ sd(.x, na.rm = TRUE),
        min  = ~ min(.x, na.rm = TRUE),
        max  = ~ max(.x, na.rm = TRUE)
      )
    )
  )

statistic_tidy <- statistic |> t()

statistic_tidy

0,1
agegrp_mean,12.5840709
agegrp_sd,2.7017528
agegrp_min,6.0
agegrp_max,21.0
yrim_mean,1413.1256184
yrim_sd,914.9249402
yrim_min,1.0
yrim_max,2020.0
lfact_mean,2.2227089
lfact_sd,3.0558378


In [None]:
census_subset <- cleaned_data %>% filter(agegrp == 16 | agegrp == 17 ) |>
    mutate(retired = ifelse(lfact %in% c(11,12,13), 1, 0)) |>
    mutate(female = ifelse(Gender == 1, 1, 0))
head(census_subset)

In [None]:
census_subset <- census_subset %>% 
mutate(eligible = ifelse(agegrp == 17 & (immstat == 1 | (immstat %in% c(2,3) & (2021 - yrim) >= 10)), 1, 0))

head(census_subset)

In [None]:
model1 <- lm(retired ~ eligible, data = census_subset) 
summary(model1)

In [None]:
model2 <- lm(retired ~ eligible + female + factor(marsth), data = census_subset) 
summary(model2)

In [None]:
model3 <- lm(retired ~ eligible + female + factor(marsth) + factor(hdgree) + factor(jobperm), data = census_subset) 
summary(model3)

In [None]:
model4 <- lm(retired ~ eligible + female + factor(marsth) + factor(hdgree) + factor(jobperm) + CFInc_AT	 + factor(PR1) + factor(NOC21), data = census_subset) 
summary(model4)

In [None]:

iv_model <- ivreg(
  retired ~ eligible
          + female
          + factor(marsth)
          + factor(hdgree)
          + factor(jobperm)
          + CFInc_AT
          + factor(PR1)
          + factor(NOC21)
  | yob
  + female
  + factor(marsth)
  + factor(hdgree)
  + factor(jobperm)
  + CFInc_AT
  + factor(PR1)
  + factor(NOC21),
  data = census_subset
)

In [None]:
iv_model