-
Notifications
You must be signed in to change notification settings - Fork 0
/
ipums_merge_example.R
50 lines (36 loc) · 1.34 KB
/
ipums_merge_example.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
## Example file to merge censoc dataset with publicly available IPUMS full count 1940 census data
## Note: to obtain IPUMS file see documentation
library(data.table)
library(tidyverse)
# file paths for censoc and IPUMS census data
censoc_path <- "path/to/your/censoc"
census_path <- "path/to/your/census"
# read in censoc data
censoc <- fread(censoc_path)
# read in ipums data
census <- fread(census_path)
setkey(censoc, HISTID)
setkey(census, HISTID)
merged_df <- censoc[census, nomatch=0]
rm(censoc)
rm(census)
# Example: calculate average age of death by race -------------------------
# only keep neccesary columns
merged_df <- merged_df[,c("unique_id", "STATEFIP",
"AGE", "byear", "dyear", "bmonth", "dmonth",
"RACE")]
# code the race variable to have meaningful names, see: https://usa.ipums.org/usa-action/variables/RACE#codes_section
merged_df <- merged_df %>% mutate(race_name = case_when(
RACE == 1 ~ "White",
RACE == 2 ~ "Black",
RACE == 3 ~ "American Indian/Alaskan native",
RACE == 4 ~ "Chinese",
RACE == 5 ~ "Japanese",
RACE == 6 ~ "Other Asian or Pacific Islander",
TRUE ~ "NA"
))
# calculate age at death
merged_df[, age_at_death := dyear - byear]
# calculate average age at death by race
merged_df %>% group_by(race_name) %>%
summarise(mean_age_at_death = mean(age_at_death))