# Analyzing Demographics of Close Races

In [1]:
import pandas as pd

In [2]:
pd.options.display.max_colwidth = 100

## Look At Demographics In Close House Races

In [3]:
house_candidates = pd.read_csv("../data/fivethirtyeight/house_candidate_odds.csv")
house_candidates.head()

Unnamed: 0,candidate,classic_prob,district,party,state
0,Alyse S. Galvin,29.244,1,D,AK
1,Don Young,70.756,1,R,AK
2,Others,0.0,1,,AK
3,Terri A. Sewell,100.0,7,D,AL
4,Danner Kline,0.01,6,D,AL


In [4]:
house_races = house_candidates.groupby(["district", "state"])

In [5]:
race_details = pd.DataFrame({
    "candidates": house_races.size(),
    "max_classic_prob": house_races["classic_prob"].apply(lambda x: max(x.values))
}).reset_index()

In [6]:
usps_fips_cross = pd.read_csv("../data/usps-fips-crosswalk.csv", dtype={"FIPS":str}).set_index("Code")

In [7]:
usps_fips_cross.head()

Unnamed: 0_level_0,State,FIPS
Code,Unnamed: 1_level_1,Unnamed: 2_level_1
AL,Alabama,1
AK,Alaska,2
AZ,Arizona,4
AR,Arkansas,5
CA,California,6


In [8]:
def district_fips(row):
    if row["district"] < 10:
        # For states with only 1 at-large district, the FIPS is 0, not 1
        if row["state"] in ["AK", "DE", "MT", "ND", "SD", "VT", "WY"]:
            return usps_fips_cross.loc[row["state"]]["FIPS"] + "00"
        else:
            return usps_fips_cross.loc[row["state"]]["FIPS"] + "0" + str(int(row["district"]))
    else:
        return usps_fips_cross.loc[row["state"]]["FIPS"] + str(int(row["district"]))

In [9]:
race_details["fips"] = race_details.apply(district_fips, axis=1)

In [10]:
race_details.head()

Unnamed: 0,district,state,candidates,max_classic_prob,fips
0,1,AK,3,70.756,200
1,1,AL,2,99.936,101
2,1,AR,3,99.942,501
3,1,AZ,2,83.738,401
4,1,CA,2,78.128,601


*Note: `close_races` includes FiveThirtyEight's "Toss-Up" and "Lean" races*

In [11]:
close_races = race_details[
    (race_details["max_classic_prob"] < 75)
].copy()

other_races = race_details[
    (race_details["max_classic_prob"] >= 75)
]

print("There are {} close Houses races, " 
      "and {} other races."\
    .format(
        len(close_races),
        len(other_races)
    ))

There are 48 close Houses races, and 387 other races.


## Load and join congressional district demographics

In [12]:
cd_demographics = pd.read_csv("../data/census/CD.csv")
cd_demographics["fips"] = cd_demographics["GEOID"].apply(lambda x: x.split("US")[-1])
cd_demographics.sample(2)

Unnamed: 0,GEONAME,LNTITLE,GEOID,LNNUMBER,TOT_EST,TOT_MOE,ADU_EST,ADU_MOE,CIT_EST,CIT_MOE,CVAP_EST,CVAP_MOE,fips
4446,"Congressional District 18 (115th Congress), Pennsylvania",Total,50000US4218,1,706815,736.0,567265,1125.0,694095,1273,556465,1539,4218
417,"Congressional District 12 (115th Congress), California",Not Hispanic or Latino,50000US0612,2,630635,1878.0,556050,1638.0,560865,2893,489850,2721,612


In [31]:
def find_pct_total(row):
    totals_df = cd_demographics[
        cd_demographics["LNTITLE"] == "Total"
    ].set_index("GEOID").copy()
    total_row = totals_df.loc[row["GEOID"]]
    return round(row["CVAP_EST"] / total_row["CVAP_EST"] * 100, 1)

In [32]:
cd_demographics["pct_total"] = cd_demographics.apply(find_pct_total, axis=1)

## Find demographics for close and not close House races

In [22]:
def find_race_demographics(races_df, demographics_df):
    race_demos = pd.merge(
        races_df,
        demographics_df,
        how="left",
        on="fips"
    )
    race_demo_totals = race_demos.groupby("LNTITLE")["CVAP_EST"].sum().to_frame()
    race_demo_totals["pct_total"] = round(race_demo_totals["CVAP_EST"] / \
    race_demo_totals.loc["Total"]["CVAP_EST"] * 100, 1)
    race_demo_totals.sort_values("pct_total", ascending=False)
    return race_demo_totals

In [23]:
close_race_demos = find_race_demographics(close_races, cd_demographics)
other_race_demos = find_race_demographics(other_races, cd_demographics)

In [24]:
comparison = close_race_demos\
    .join(
        other_race_demos, 
        lsuffix="_close", 
        rsuffix="_other"
    )

In [25]:
comparison["close_vs_others"] = comparison["pct_total_close"] - comparison["pct_total_other"]

In [26]:
comparison[
    (comparison["pct_total_close"] >= 1) &
    (comparison["pct_total_other"] >= 1) &
    (comparison.index != "Not Hispanic or Latino")
]\
    .sort_values("close_vs_others", ascending=False)[["pct_total_close", "pct_total_other", "close_vs_others"]]

Unnamed: 0_level_0,pct_total_close,pct_total_other,close_vs_others
LNTITLE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
White Alone,75.2,69.2,6.0
Total,100.0,100.0,0.0
Asian Alone,3.9,4.0,-0.1
Hispanic or Latino,10.1,11.5,-1.4
Black or African American Alone,8.1,12.9,-4.8


In [33]:
cd_demographics[
    (cd_demographics["LNTITLE"] == "Black or African American Alone") &
    (cd_demographics["fips"].isin(close_races["fips"])) &
    (cd_demographics["pct_total"] <= 2)
][["GEONAME", "pct_total"]].sort_values("pct_total", ascending=True)

Unnamed: 0,GEONAME,pct_total
2357,"Congressional District 2 (115th Congress), Maine",0.4
3033,"Congressional District (at Large) (115th Congress), Montana",0.4
992,"Congressional District 3 (115th Congress), Colorado",0.8
5217,"Congressional District 4 (115th Congress), Utah",1.2
888,"Congressional District 48 (115th Congress), California",1.3
5412,"Congressional District 3 (115th Congress), Washington",1.4
3332,"Congressional District 2 (115th Congress), New Mexico",1.8
849,"Congressional District 45 (115th Congress), California",1.9
2773,"Congressional District 1 (115th Congress), Minnesota",1.9


In [34]:
tossup_races = close_races[
    close_races["max_classic_prob"] < 60
].copy()

In [36]:
cd_demographics[
    (cd_demographics["LNTITLE"] == "White Alone") &
    (cd_demographics["pct_total"] >= 85) &
    (cd_demographics["fips"].isin(tossup_races["fips"]))
][["GEONAME", "LNTITLE", "GEOID", "CVAP_EST", "pct_total"]].sort_values("pct_total", ascending=False)

Unnamed: 0,GEONAME,LNTITLE,GEOID,CVAP_EST,pct_total
2775,"Congressional District 1 (115th Congress), Minnesota",White Alone,50000US2701,456850,92.8
3633,"Congressional District 22 (115th Congress), New York",White Alone,50000US3622,499165,91.3
2671,"Congressional District 7 (115th Congress), Michigan",White Alone,50000US2607,488615,90.7
3594,"Congressional District 19 (115th Congress), New York",White Alone,50000US3619,491215,88.8
2255,"Congressional District 6 (115th Congress), Kentucky",White Alone,50000US2106,490230,87.6
2684,"Congressional District 8 (115th Congress), Michigan",White Alone,50000US2608,467530,87.2
2151,"Congressional District 2 (115th Congress), Kansas",White Alone,50000US2002,465370,86.9


# Look at demographics in close Senate races

In [37]:
senate_candidates = pd.read_csv("../data/fivethirtyeight/senate_candidate_odds.csv")
senate_candidates.head()

Unnamed: 0,candidate,class,classic_prob,party,state
0,Kyrsten Sinema,1,61.63,D,AZ
1,Angela Green,1,0.002,G,AZ
2,Martha McSally,1,38.368,R,AZ
3,Dianne Feinstein,1,98.37,D,CA
4,Kevin de Leon,1,1.63,D,CA


In [38]:
senate_races = senate_candidates.groupby(["state", "class"])

In [39]:
senate_race_details = pd.DataFrame({
    "candidates": senate_races.size(),
    "classic_probs": senate_races["classic_prob"].apply(lambda x: list(x.values)),
    "max_classic_prob": senate_races["classic_prob"].apply(lambda x: max(x.values))
}).reset_index()

In [40]:
senate_race_details["fips"] = senate_race_details["state"].apply(lambda x: usps_fips_cross.loc[x]["FIPS"])

In [41]:
usps_state = {
    "AZ": "ARIZONA",
    "CA": "CALIFORNIA",
    "CT": "CONNECTICUT",
    "DE": "DELAWARE",
    "FL": "FLORIDA",
    "HI": "HAWAII",
    "IN": "INDIANA",
    "MA": "MASSACHUSETTS",
    "MD": "MARYLAND",
    "ME": "MAINE",
    "MI": "MICHIGAN",
    "MN": "MINNESOTA",
    "MO": "MISSOURI",
    "MS": "MISSISSIPPI",
    "MT": "MONTANA",
    "ND": "NORTH DAKOTA",
    "NE": "NEBRASKA",
    "NJ": "NEW JERSEY",
    "NM": "NEW MEXICO",
    "NV": "NEVADA",
    "NY": "NEW YORK",
    "OH": "OHIO",
    "PA": "PENNSYLVANIA",
    "RI": "RHODE ISLAND",
    "TN": "TENNESSEE",
    "TX": "TEXAS",
    "UT": "UTAH",
    "VA": "VIRGINIA",
    "VT": "VERMONT",
    "WA": "WASHINGTON",
    "WI": "WISCONSIN",
    "WV": "WEST VIRGINIA",
    "WY": "WYOMING"
}

In [42]:
senate_race_details["state_name"] = senate_race_details["state"].apply(lambda x: usps_state[x])

In [43]:
senate_race_details.head()

Unnamed: 0,state,class,candidates,classic_probs,max_classic_prob,fips,state_name
0,AZ,1,3,"[61.63, 0.002, 38.368]",61.63,4,ARIZONA
1,CA,1,2,"[98.37, 1.63]",98.37,6,CALIFORNIA
2,CT,1,3,"[99.412, 0.588, 0.0]",99.412,9,CONNECTICUT
3,DE,1,3,"[99.844, 0.156, 0.0]",99.844,10,DELAWARE
4,FL,1,2,"[62.874, 37.126]",62.874,12,FLORIDA


In [44]:
close_senate_races = senate_race_details[
    (senate_race_details["max_classic_prob"] < 75)
].copy()

other_senate_races = senate_race_details[
    (senate_race_details["max_classic_prob"] >= 75)
]

print("There are {} close Senate races, " 
      "and {} other races."\
    .format(
        len(close_senate_races),
        len(other_senate_races)
    ))

There are 6 close Senate races, and 29 other races.


## Load and join state demographics

In [45]:
state_demographics = pd.read_csv("../data/census/State.csv")
state_demographics["fips"] = state_demographics["GEOID"].apply(lambda x: x.split("US")[-1])
state_demographics.sample(2)

Unnamed: 0,GEONAME,LNTITLE,GEOID,LNNUMBER,TOT_EST,TOT_MOE,ADU_EST,ADU_MOE,CIT_EST,CIT_MOE,CVAP_EST,CVAP_MOE,fips
301,Minnesota,American Indian or Alaska Native Alone,04000US27,3,52405,1062.0,36275,708.0,52125,1069,36025,720,27
193,Indiana,Remainder of Two or More Race Responses,04000US18,12,9270,1080.0,5725,668.0,8950,1081,5430,663,18


In [49]:
overall_demos = state_demographics.groupby("LNTITLE")["CVAP_EST"].sum().to_frame()
overall_demos["pct_total"] = round(overall_demos["CVAP_EST"] / \
overall_demos.loc["Total"]["CVAP_EST"] * 100, 1)

In [50]:
overall_demos.sort_values("pct_total", ascending=False).head()

Unnamed: 0_level_0,CVAP_EST,pct_total
LNTITLE,Unnamed: 1_level_1,Unnamed: 2_level_1
Total,227315640,100.0
Not Hispanic or Latino,199128785,87.6
White Alone,156808060,69.0
Hispanic or Latino,28186855,12.4
Black or African American Alone,27891260,12.3


In [51]:
close_senate_demos = find_race_demographics(close_senate_races, state_demographics)
other_senate_demos = find_race_demographics(other_senate_races, state_demographics)

In [52]:
senate_comparison = close_senate_demos\
    .join(
        other_senate_demos, 
        lsuffix="_close", 
        rsuffix="_other"
    )\
    .join(
        overall_demos
    )

In [54]:
senate_comparison["close_vs_other"] = senate_comparison["pct_total_close"] - senate_comparison["pct_total_other"]
senate_comparison["close_vs_other_ratio"] = round(senate_comparison["pct_total_close"] / \
                                                  senate_comparison["pct_total_other"], 1)

In [55]:
senate_comparison["close_vs_total"] = senate_comparison["pct_total_close"] - senate_comparison["pct_total"]
senate_comparison["close_vs_total_ratio"] = round(senate_comparison["pct_total_close"] / \
                                                  senate_comparison["pct_total"], 1)

In [57]:
senate_comparison[   
    (senate_comparison["pct_total_close"] >= 0.5) &
    (senate_comparison["pct_total_other"] >= 0.4) &
    (senate_comparison.index != "Not Hispanic or Latino")
]\
    .sort_values("close_vs_other", ascending=False)[[
        "pct_total_close", "pct_total_other", "pct_total",
        "close_vs_total"
    ]]

Unnamed: 0_level_0,pct_total_close,pct_total_other,pct_total,close_vs_total
LNTITLE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
White Alone,70.1,68.7,69.0,1.1
Black or African American Alone,12.0,10.9,12.3,-0.3
Hispanic or Latino,13.4,12.9,12.4,1.0
American Indian or Alaska Native Alone,0.9,0.5,0.7,0.2
American Indian or Alaska Native and White,0.5,0.4,0.5,0.0
Total,100.0,100.0,100.0,0.0
Asian Alone,2.1,5.2,4.0,-1.9


## Running the Senate Analysis with 2016 Registered Voters

In [59]:
demo_by_state = pd.read_excel(
    "../data/census/table04b.xls",
    skiprows=4,
    skipfooter=5,
    na_values=["-"]
)

demo_by_state.columns = [
    "state", "demographic", "total_pop", "total_citizen_pop", "total_registered", 
   "pct_registered", "moe_registered", "pct_registered_citizen", 
   "moe_registered_citizen", "total_voted", "pct_voted", "moe_voted", 
   "percent_voted_citizen", "moe_voted_citizen"
]

demo_by_state["state"] = demo_by_state["state"].fillna(method="ffill")
demo_by_state["total_registered"] = demo_by_state["total_registered"].apply(lambda x: float(str(x).replace(",", "")))

In [60]:
demo_by_state.head()

Unnamed: 0,state,demographic,total_pop,total_citizen_pop,total_registered,pct_registered,moe_registered,pct_registered_citizen,moe_registered_citizen,total_voted,pct_voted,moe_voted,percent_voted_citizen,moe_voted_citizen
0,US,Total,245502,224059,157596.0,64.2,0.3,70.3,0.3,137537.0,56.0,0.3,61.4,0.3
1,US,Male,118488,107554,73761.0,62.3,0.4,68.6,0.4,63801.0,53.8,0.4,59.3,0.5
2,US,Female,127013,116505,83835.0,66.0,0.4,72.0,0.4,73735.0,58.1,0.4,63.3,0.4
3,US,White alone,192129,177865,127463.0,66.3,0.3,71.7,0.3,111891.0,58.2,0.3,62.9,0.4
4,US,White non-Hispanic alone,157395,154450,114151.0,72.5,0.3,73.9,0.3,100849.0,64.1,0.4,65.3,0.4


In [61]:
close_senate_races.head()

Unnamed: 0,state,class,candidates,classic_probs,max_classic_prob,fips,state_name
0,AZ,1,3,"[61.63, 0.002, 38.368]",61.63,4,ARIZONA
4,FL,1,2,"[62.874, 37.126]",62.874,12,FLORIDA
13,MO,1,3,"[58.282, 41.718, 0.0]",58.282,29,MISSOURI
17,ND,1,2,"[34.338, 65.66199999999999]",65.662,38,NORTH DAKOTA
21,NV,1,3,"[44.306000000000004, 55.693999999999996, 0.0]",55.694,32,NEVADA


In [62]:
def find_registered_pcts(races, demos):
    dataframe = pd.merge(
        races,
        demos,
        how="left",
        left_on="state_name",
        right_on="state"
    )
    demo_totals = dataframe.groupby("demographic")["total_registered"].sum().to_frame()
    demo_totals["pct_total"] = round(demo_totals["total_registered"] / \
                                     demo_totals.loc["Total"]["total_registered"] * 100, 2)
    return demo_totals

In [63]:
find_registered_pcts(close_senate_races, demo_by_state).join(
    find_registered_pcts(other_senate_races, demo_by_state),
    lsuffix="_close", 
    rsuffix="_other"
)

Unnamed: 0_level_0,total_registered_close,pct_total_close,total_registered_other,pct_total_other
demographic,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Asian alone,417.0,1.97,4699.0,4.68
Asian alone or in combination,465.0,2.2,5169.0,5.14
Black alone,2344.0,11.09,11527.0,11.47
Black alone or in combination,2453.0,11.61,12172.0,12.11
Female,11347.0,53.71,53340.0,53.09
Hispanic (of any race),2771.0,13.12,10617.0,10.57
Male,9780.0,46.29,47138.0,46.91
Total,21128.0,100.0,100477.0,100.0
White alone,17919.0,84.81,81234.0,80.85
White alone or in combination,18119.0,85.76,82760.0,82.37


---

---

---