# 3.1.1. Column headers are values, not variable names - pew

In [1]:
library(foreign)
library(stringr)
library(plyr)
library(reshape2)

# Data from http://pewforum.org/Datasets/Dataset-Download.aspx

# Load data -----------------------------------------------------------------

pew <- read.spss("../../data/pew.sav")
pew <- as.data.frame(pew)


re-encoding from CP1252
"Undeclared level(s) 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96 added in variable: age"

In [2]:
dim(pew)
head(pew)

weight,psraid,int_date,lang,type,cregion,state,usr,usr1,form,...,q63,educ,income,regist,regicert,party,partyln,ideo,pvote04a,pvote04b
4.512821,10000001,50807,English,RDD,Northeast,Connecticut,Suburban,Suburban,Form A,...,"Yes, father born outside U.S.","Technical, trade, or vocational school AFTER high school","75 to under $100,000","Yes, registered",Absolutely certain,Republican,,Moderate,Voted,Bush
2.102564,10000002,50807,English,RDD,Northeast,Maine,Rural,Rural,Form B,...,"No, both parents born in U.S.",High school graduate (Grade 12 or GED certificate),"20 to under $30,000","No, not registered",,Republican,,Conservative,Did not vote (includes too young to vote),
1.282051,10000003,50807,English,RDD,Northeast,Maine,Rural,Rural,Form A,...,"No, both parents born in U.S.","College graduate (B.S., B.A., or other 4-year degree)","30 to under $40,000","No, not registered",,Independent,Democrat,Conservative,Did not vote (includes too young to vote),
1.355323,10000004,50807,English,RDD,Northeast,Maine,Rural,Rural,Form B,...,"No, both parents born in U.S.","Some college, no 4-year degree (including associate degree)","Less than $10,000","No, not registered",,Independent,Democrat,Moderate,Did not vote (includes too young to vote),
1.589744,10000005,50807,English,RDD,Northeast,New York,Urban,Urban,Form A,...,"Yes, father born outside U.S.",Post-graduate training or professional schooling after colle,"50 to under $75,000","Yes, registered",Absolutely certain,Independent,Democrat,Moderate,Voted,Other candidate
1.410256,10000007,61507,English,RDD,Northeast,New York,Urban,Urban,Form A,...,,Post-graduate training or professional schooling after colle,"20 to under $30,000","Yes, registered",Absolutely certain,Democrat,,Very liberal,Voted,Kerry


In [3]:
religion <- pew[c("q16", "reltrad", "income")]
religion$reltrad <- as.character(religion$reltrad)
religion$reltrad <- str_replace(religion$reltrad, " Churches", "")
religion$reltrad <- str_replace(religion$reltrad, " Protestant", " Prot")
religion$reltrad[religion$q16 == " Atheist (do not believe in God) "] <- "Atheist"
religion$reltrad[religion$q16 == " Agnostic (not sure if there is a God) "] <- "Agnostic"
religion$reltrad <- str_trim(religion$reltrad)
religion$reltrad <- str_replace_all(religion$reltrad, " \\(.*?\\)", "")


In [4]:
dim(religion)
head(religion)

q16,reltrad,income
Protestant,Evangelical Prot,"75 to under $100,000"
Protestant,Mainline Prot,"20 to under $30,000"
Protestant,Mainline Prot,"30 to under $40,000"
Nothing in particular,Unaffiliated,"Less than $10,000"
Jewish (Judaism),Jewish,"50 to under $75,000"
Jewish (Judaism),Jewish,"20 to under $30,000"


In [5]:
religion$income <- c("Less than $10,000" = "<$10k", 
  "10 to under $20,000" = "$10-20k", 
  "20 to under $30,000" = "$20-30k", 
  "30 to under $40,000" = "$30-40k", 
  "40 to under $50,000" = "$40-50k", 
  "50 to under $75,000" = "$50-75k",
  "75 to under $100,000" = "$75-100k", 
  "100 to under $150,000" = "$100-150k", 
  "$150,000 or more" = ">150k", 
  "Don't know/Refused (VOL)" = "Don't know/refused")[religion$income]

religion$income <- factor(religion$income, levels = c("<$10k", "$10-20k", "$20-30k", "$30-40k", "$40-50k", "$50-75k", 
  "$75-100k", "$100-150k", ">150k", "Don't know/refused"))


In [6]:
dim(religion)
head(religion)

q16,reltrad,income
Protestant,Evangelical Prot,$75-100k
Protestant,Mainline Prot,$20-30k
Protestant,Mainline Prot,$30-40k
Nothing in particular,Unaffiliated,<$10k
Jewish (Judaism),Jewish,$50-75k
Jewish (Judaism),Jewish,$20-30k


In [7]:
counts <- count(religion, c("reltrad", "income"))
names(counts)[1] <- "religion"


In [8]:
dim(counts)
head(counts)

religion,income,freq
Agnostic,<$10k,27
Agnostic,$10-20k,34
Agnostic,$20-30k,60
Agnostic,$30-40k,81
Agnostic,$40-50k,76
Agnostic,$50-75k,137


In [9]:
# Convert into the form in which I originally saw it -------------------------

raw <- dcast(counts, religion ~ income, value.var = "freq")


In [10]:
raw

religion,<$10k,$10-20k,$20-30k,$30-40k,$40-50k,$50-75k,$75-100k,$100-150k,>150k,Don't know/refused
Agnostic,27,34,60,81,76,137,122,109,84,96
Atheist,12,27,37,52,35,70,73,59,74,76
Buddhist,27,21,30,34,33,58,62,39,53,54
Catholic,418,617,732,670,638,1116,949,792,633,1489
Don’t know/refused,15,14,15,11,10,35,21,17,18,116
Evangelical Prot,575,869,1064,982,881,1486,949,723,414,1529
Hindu,1,9,7,9,11,34,47,48,54,37
Historically Black Prot,228,244,236,238,197,223,131,81,78,339
Jehovah's Witness,20,27,24,24,21,30,15,11,6,37
Jewish,19,19,25,25,30,95,69,87,151,162
