# Analysis of White Ancestry and 2016 Candidate Preference

Between September 22 and October 2 (notably, before the bombshell video of Trump making lewd comments about women), as part of its regular online political poll, the survey firm Morning Consult asked more than 5,000 registered voters to check which of the most common ancestry categories recorded by the Census Bureau applied to them, and also to pick the one ancestry they identified with most. Below, you can find BuzzFeed News' analysis of the results.

In [1]:
import pandas as pd
from collections import OrderedDict

In [2]:
data = pd.read_csv("../data/BUZZFEED_DATA.csv", encoding="latin-1")
print("{0} total responses".format(len(data)))

5365 total responses


## Label variables

The raw data uses numeric codes; here, we translate them into names.

In [3]:
data["demRace_label"] = data["demRace"].apply(OrderedDict([
    (1, "American Indian"),  
    (2, "Asian American"),  
    (3, "Black"),  
    (4, "White"),  
    (5, "Other"),  
]).get).fillna("[n/a]")

In [4]:
data["BF2_label"] = data["BF2"].apply(dict([
    (1, "German"),  
    (2, "Irish"),  
    (3, "English"),  
    (4, "American"),  
    (5, "Italian"),  
    (6, "Polish"),  
    (7, "French"),  
    (8, "Scottish"),  
    (9, "Norwegian"),  
    (10, "Dutch"),  
    (11, "Swedish"),  
    (12, "Other"),
]).get).fillna("[n/a]")

In [5]:
data["v16g5_label"] = data["v16g5"].apply(OrderedDict([
    (1, "Clinton"),
    (2, "Trump"),
    (3, "Don't Know / No Opinion")
]).get).fillna("[n/a]")

In [6]:
data["v16g18_label"] = data["v16g18"].apply(OrderedDict([
    (1, "Democratic candidate"),
    (2, "Republican candidate"),
    (3, "Don't Know / No Opinion")
]).get).fillna("[n/a]")

In [7]:
data["nr2_label"] = data["nr2"].apply(OrderedDict([
    (1, "Strongly Approve"),
    (2, "Somewhat Approve"),
    (3, "Somewhat Disapprove"),
    (4, "Strongly Disapprove"),
    (5, "Don’t Know / No Opinion"),
]).get).fillna("[n/a]")

In [8]:
data["demPidNoLn_label"] = data["demPidNoLn"].apply(OrderedDict([
    (1, "Republican"),
    (2, "Democrat"),
    (3, "Independent"),
    (4, "Something else")
]).get).fillna("[n/a]")

In [9]:
data["demRelig_label"] = data["demRelig"].apply(OrderedDict([
    (1, "Protestant"),
    (2, "Roman Catholic"),
    (3, "Mormon"),
    (4, "Orthodox"),
    (5, "Jewish"),
    (6, "Muslim"),
    (7, "Buddhist"),
    (8, "Hindu"),
    (9, "Atheist"),
    (10, "Agnostic"),
    (11, "Something else"),
    (12, "Nothing in particular"),             
]).get).fillna("[n/a]")

In [10]:
data["is_protestant"] = data["demRelig_label"] == "Protestant"

In [11]:
data["is_senior"] = data["demAgeFull"] >= 49 # code `49` corresponds to age 65

## Set up demographic groups

In [12]:
data["ident_german"] = data["BF1_4NET"] == 1
data["ident_irish"] = data["BF1_5NET"] == 1
data["ident_english"] = data["BF1_6NET"] == 1
data["ident_american"] = data["BF1_7NET"] == 1
data["ident_italian"] = data["BF1_8NET"] == 1
data["ident_scottish"] = data["BF1_11NET"] == 1

In [13]:
data_white = data[data["demRace_label"] == "White"]

In [14]:
demographic_groups = OrderedDict([
    ("Men", data[data["demGender"] == 1]),
    ("Women", data[data["demGender"] == 2]),
    ("Black", data[data["demRace_label"] == "Black"]),
    ("Hispanic", data[data["demHisp"] == 1]),
    ("White", data_white),
    ("White non-Hispanic", data_white[data_white["demHisp"] != 1]),
    ("White American [any]", data_white[data_white["ident_american"]]),
    ("White American [most]", data_white[data_white["BF2_label"] == "American"]),
    ("White German [any]", data_white[data_white["ident_german"]]),
    ("White German [most]", data_white[data_white["BF2_label"] == "German"]),
    ("White English [any]", data_white[data_white["ident_english"]]),
    ("White English [most]", data_white[data_white["BF2_label"] == "English"]),
    ("White Scottish [any]", data_white[data_white["ident_scottish"]]),
    ("White Scottish [most]", data_white[data_white["BF2_label"] == "Scottish"]),
    ("White Irish [any]", data_white[data_white["ident_irish"]]),
    ("White Irish [most]", data_white[data_white["BF2_label"] == "Irish"]),
    ("White Italian [any]", data_white[data_white["ident_italian"]]),
    ("White Italian [most]", data_white[data_white["BF2_label"] == "Italian"]),
])

## Set up calculations

In [15]:
def get_counts(grouped):
    return grouped.size().unstack().fillna(0)[CANDIDATES]

In [16]:
def splits_table_maker(labels, splitter):
    def fn(grouped):
        x = grouped["wts"].sum()
        if hasattr(x.index, "levels"):
            x = x.unstack().fillna(0)
            table = (x.T / x.sum(axis=1)).T.round(2)[labels]
            table["split"] = table.apply(splitter, axis=1)
        else:
            table = (x / x.sum()).round(2)[labels]
            table["split"] = splitter(table)
        return table
    return fn

In [17]:
CANDIDATES = ["Clinton", "Trump", "Don't Know / No Opinion"]

In [18]:
def get_pres_split(row):
    diff = row["Clinton"] - row["Trump"]
    if diff == 0: return "--"
    if diff > 0:
        return "Clinton +{0:.0f}%".format(diff * 100)
    if diff < 0:
        return "Trump +{0:.0f}%".format(-diff * 100)

In [19]:
get_pres_weighted_splits = splits_table_maker(CANDIDATES, get_pres_split)

In [20]:
CONG_OPTIONS = ["Democratic candidate", "Republican candidate", "Don't Know / No Opinion"]

In [21]:
def get_cong_split(row):
    diff = row["Democratic candidate"] - row["Republican candidate"]
    if diff == 0: return "--"
    if diff > 0:
        return "Dem. +{0:.0f}%".format(diff * 100)
    if diff < 0:
        return "Rep. +{0:.0f}%".format(-diff * 100)

In [22]:
get_cong_weighted_splits = splits_table_maker(CONG_OPTIONS, get_cong_split)

In [23]:
OBAMA_OPTIONS = [ 
    "Strongly Approve", "Somewhat Approve", 
    "Somewhat Disapprove", "Strongly Disapprove", 
    "Don’t Know / No Opinion"
]

In [24]:
def get_obama_split(row):
    diff = (row["Strongly Approve"] + row["Somewhat Approve"]) - \
        (row["Somewhat Disapprove"] + row["Strongly Disapprove"])
    if diff == 0: return "--"
    if diff > 0:
        return "+{0:.0f}%".format(diff * 100)
    if diff < 0:
        return "-{0:.0f}%".format(-diff * 100)

In [25]:
get_obama_weighted_splits = splits_table_maker(OBAMA_OPTIONS, get_obama_split)

## Raw respondent counts

In [26]:
pd.DataFrame({ k: v.groupby("v16g5_label").size() 
    for k, v in demographic_groups.items() }).T\
    .loc[list(demographic_groups.keys())][CANDIDATES]

v16g5_label,Clinton,Trump,Don't Know / No Opinion
Men,1020,1029,419
Women,1268,1048,581
Black,348,34,59
Hispanic,275,112,82
White,1688,1906,846
White non-Hispanic,1512,1828,791
White American [any],419,522,219
White American [most],317,431,163
White German [any],371,492,216
White German [most],146,210,73


## Weighted presidential splits

In [27]:
pd.DataFrame({ k: get_pres_weighted_splits(v.groupby("v16g5_label"))
    for k, v in demographic_groups.items() }).T\
    .loc[list(demographic_groups.keys())]

v16g5_label,Clinton,Trump,Don't Know / No Opinion,split
Men,0.41,0.42,0.17,Trump +1%
Women,0.45,0.35,0.2,Clinton +10%
Black,0.78,0.07,0.15,Clinton +71%
Hispanic,0.59,0.24,0.17,Clinton +35%
White,0.37,0.44,0.19,Trump +7%
White non-Hispanic,0.35,0.45,0.19,Trump +10%
White American [any],0.35,0.47,0.18,Trump +12%
White American [most],0.33,0.5,0.17,Trump +17%
White German [any],0.34,0.47,0.19,Trump +13%
White German [most],0.33,0.51,0.16,Trump +18%


## Weighted congressional splits

In [28]:
pd.DataFrame({ k: get_cong_weighted_splits(v.groupby("v16g18_label"))
    for k, v in demographic_groups.items() }).T\
    .loc[list(demographic_groups.keys())]

v16g18_label,Democratic candidate,Republican candidate,Don't Know / No Opinion,split
Men,0.43,0.4,0.17,Dem. +3%
Women,0.43,0.34,0.23,Dem. +9%
Black,0.79,0.09,0.12,Dem. +70%
Hispanic,0.58,0.26,0.16,Dem. +32%
White,0.37,0.42,0.21,Rep. +5%
White non-Hispanic,0.35,0.43,0.22,Rep. +8%
White American [any],0.35,0.42,0.22,Rep. +7%
White American [most],0.34,0.43,0.23,Rep. +9%
White German [any],0.33,0.47,0.2,Rep. +14%
White German [most],0.31,0.51,0.18,Rep. +20%


## Weighted Obama approval splits

In [29]:
pd.DataFrame({ k: get_obama_weighted_splits(v.groupby("nr2_label"))
    for k, v in demographic_groups.items() }).T\
    .loc[list(demographic_groups.keys())]

nr2_label,Strongly Approve,Somewhat Approve,Somewhat Disapprove,Strongly Disapprove,Don’t Know / No Opinion,split
Men,0.23,0.27,0.12,0.36,0.02,+2%
Women,0.23,0.26,0.15,0.32,0.04,+2%
Black,0.55,0.31,0.05,0.06,0.03,+75%
Hispanic,0.32,0.35,0.11,0.17,0.05,+39%
White,0.17,0.25,0.15,0.39,0.03,-12%
White non-Hispanic,0.16,0.25,0.15,0.41,0.03,-15%
White American [any],0.16,0.24,0.16,0.41,0.04,-17%
White American [most],0.15,0.23,0.15,0.43,0.04,-20%
White German [any],0.14,0.25,0.17,0.42,0.02,-20%
White German [most],0.13,0.22,0.16,0.47,0.02,-28%


## White German-ancestry vs. all White non-Hispanic demographics

In [30]:
def summarize_demog(df):
    calc = lambda x: x["wts"].sum() / df["wts"].sum()
    return {
        "prop_seniors": calc(df[df["is_senior"]]),
        "prop_protestant": calc(df[df["is_protestant"]]),
        "prop_rural": calc(df[df["demUsr"] == 3])
    }

In [31]:
pd.DataFrame({ k: summarize_demog(v) for k, v in demographic_groups.items() }).round(2)\
    [[ "White non-Hispanic", "White German [any]", "White German [most]"]]

Unnamed: 0,White non-Hispanic,White German [any],White German [most]
prop_protestant,0.3,0.34,0.39
prop_rural,0.31,0.31,0.33
prop_seniors,0.25,0.23,0.29


---

---

---