# Analyzing Demographics of Close House and Senate Races

In [1]:
import pandas as pd

In [2]:
pd.options.display.max_colwidth = 100

# House Analysis

In [3]:
house_candidates = pd.read_csv("../data/fivethirtyeight/house_candidate_odds.csv")
house_candidates.head()

Unnamed: 0,candidate,classic_prob,district,party,state
0,Alyse S. Galvin,28.732,1,D,AK
1,Don Young,71.268,1,R,AK
2,Others,0.0,1,,AK
3,Terri A. Sewell,100.0,7,D,AL
4,Danner Kline,0.016,6,D,AL


Calculate FiveThirtyEight's win-probability for each House race's leading candidate:

In [4]:
house_race_details = (
    house_candidates
    .groupby([
        "district",
        "state"
    ])
    .pipe(lambda grp: pd.DataFrame({
        "candidates": grp.size(),
        "max_classic_prob": grp["classic_prob"].max()
    }))
    .reset_index()
)

house_race_details.head()

Unnamed: 0,district,state,candidates,max_classic_prob
0,1,AK,3,71.268
1,1,AL,2,99.958
2,1,AR,3,99.938
3,1,AZ,2,83.6
4,1,CA,2,77.966


In [5]:
fips_cross = (
    pd.read_csv(
        "../data/postal-to-fips-crosswalk.csv",
        dtype = {
            "FIPS": str
        }
    )
    .set_index("Code")
)

In [6]:
fips_cross.head()

Unnamed: 0_level_0,State,FIPS
Code,Unnamed: 1_level_1,Unnamed: 2_level_1
AL,Alabama,1
AK,Alaska,2
AZ,Arizona,4
AR,Arkansas,5
CA,California,6


In [7]:
fips_cross_dict = fips_cross["FIPS"].to_dict()

In [8]:
def make_district_fips(row):
    state_fips = fips_cross_dict[row["state"]]

    # For states with only 1 at-large district, the FIPS is 0, not 1
    if row["state"] in [ "AK", "DE", "MT", "ND", "SD", "VT", "WY" ]:
        district_str = "00"
    else:
        district_str = f"{row['district']:02d}"

    return state_fips + district_str

In [9]:
house_race_details["fips"] = house_race_details.apply(make_district_fips, axis=1)

In [10]:
house_race_details.head()

Unnamed: 0,district,state,candidates,max_classic_prob,fips
0,1,AK,3,71.268,200
1,1,AL,2,99.958,101
2,1,AR,3,99.938,501
3,1,AZ,2,83.6,401
4,1,CA,2,77.966,601


*Note: `close_races` includes FiveThirtyEight's "Toss-Up" and "Lean" races*

In [11]:
close_races = house_race_details[
    (house_race_details["max_classic_prob"] < 75)
].copy()

other_races = house_race_details[
    (house_race_details["max_classic_prob"] >= 75)
]

print(
    f"There are {len(close_races)} close Houses races, "
    f"and {len(other_races)} other races."
)

There are 48 close Houses races, and 387 other races.


## Load and join congressional district demographics

In [12]:
cd_demographics = pd.read_csv("../data/census/CD.csv")

cd_demographics["fips"] = cd_demographics["GEOID"].apply(lambda x: x.split("US")[-1])

cd_demographics.head(5)

Unnamed: 0,GEONAME,LNTITLE,GEOID,LNNUMBER,TOT_EST,TOT_MOE,ADU_EST,ADU_MOE,CIT_EST,CIT_MOE,CVAP_EST,CVAP_MOE,fips
0,"Congressional District 1 (115th Congress), Alabama",Total,50000US0101,1,699595,628.0,536070,435.0,688770,1226,526450,1047,101
1,"Congressional District 1 (115th Congress), Alabama",Not Hispanic or Latino,50000US0101,2,678770,665.0,522325,447.0,672665,826,516855,680,101
2,"Congressional District 1 (115th Congress), Alabama",American Indian or Alaska Native Alone,50000US0101,3,6630,570.0,5210,455.0,6600,552,5180,438,101
3,"Congressional District 1 (115th Congress), Alabama",Asian Alone,50000US0101,4,10145,372.0,7405,266.0,6985,553,4710,426,101
4,"Congressional District 1 (115th Congress), Alabama",Black or African American Alone,50000US0101,5,193805,1196.0,140300,551.0,193165,1213,139660,583,101


In [13]:
cd_totals = (
    cd_demographics[
        cd_demographics["LNTITLE"] == "Total"
    ]
    [[ "fips", "CVAP_EST"]]
    .rename(columns = { "CVAP_EST": "district_total" })
)

assert cd_totals["fips"].nunique() == len(cd_totals)

cd_totals.head()

Unnamed: 0,fips,district_total
0,101,526450
13,102,514995
26,103,532720
39,104,510100
52,105,530085


In [14]:
cd_demographic_pcts = (
    cd_demographics[
        cd_demographics["LNTITLE"] != "Total"
    ]
    [[
        "GEONAME",
        "fips",
        "LNTITLE",
        "CVAP_EST",        
    ]]
    .rename(columns = {
        "LNTITLE": "demographic"
    })
    .merge(
        cd_totals,
        how = "left",
        on = "fips",
        validate = "m:1"
    )
    .assign(
        pct = lambda df: (df["CVAP_EST"] * 100 / df["district_total"]).round(2)
    )
)

cd_demographic_pcts.head()

Unnamed: 0,GEONAME,fips,demographic,CVAP_EST,district_total,pct
0,"Congressional District 1 (115th Congress), Alabama",101,Not Hispanic or Latino,516855,526450,98.18
1,"Congressional District 1 (115th Congress), Alabama",101,American Indian or Alaska Native Alone,5180,526450,0.98
2,"Congressional District 1 (115th Congress), Alabama",101,Asian Alone,4710,526450,0.89
3,"Congressional District 1 (115th Congress), Alabama",101,Black or African American Alone,139660,526450,26.53
4,"Congressional District 1 (115th Congress), Alabama",101,Native Hawaiian or Other Pacific Islander Alone,115,526450,0.02


## Compare close and not close House races

In [15]:
house_comparison = (
    pd.concat([
        house_race_details.assign(category = "all"),
        close_races.assign(category = "close"),
        other_races.assign(category = "other"),
    ])
    .merge(
        cd_demographic_pcts,
        how = "left",
        on = "fips",
    )
    .groupby([
        "demographic",
        "category"
    ])
    [[
        "CVAP_EST",
        "district_total"
    ]]
    .sum()
    .assign(pct = lambda df: (df["CVAP_EST"] * 100 / df["district_total"]).round(3))
    .unstack()
    [[ "CVAP_EST", "pct" ]]
    .sort_values(("pct", "close"), ascending = False)
    .assign(
        close_vs_all = lambda df: df[("pct", "close")] - df[("pct", "all")]
    )
    .reset_index()
)

house_comparison

Unnamed: 0_level_0,demographic,CVAP_EST,CVAP_EST,CVAP_EST,pct,pct,pct,close_vs_all
category,Unnamed: 1_level_1,all,close,other,all,close,other,Unnamed: 8_level_1
0,Not Hispanic or Latino,198637240,22634830,176002410,88.634,89.863,88.478,1.229
1,White Alone,156585940,18930760,137655180,69.87,75.158,69.201,5.288
2,Hispanic or Latino,25472225,2553245,22918980,11.366,10.137,11.522,-1.229
3,Black or African American Alone,27651255,2040750,25610505,12.338,8.102,12.875,-4.236
4,Asian Alone,8993485,989000,8004485,4.013,3.926,4.024,-0.087
5,American Indian or Alaska Native Alone,1521085,259140,1261945,0.679,1.029,0.634,0.35
6,American Indian or Alaska Native and White,1134245,134645,999600,0.506,0.535,0.503,0.029
7,Asian and White,806730,89380,717350,0.36,0.355,0.361,-0.005
8,Black or African American and White,741700,79090,662610,0.331,0.314,0.333,-0.017
9,Remainder of Two or More Race Responses,665230,61050,604180,0.297,0.242,0.304,-0.055


In [16]:
house_comparison[
    house_comparison["demographic"].str.contains(r"Alone|^Hispanic or Latino$")
]

Unnamed: 0_level_0,demographic,CVAP_EST,CVAP_EST,CVAP_EST,pct,pct,pct,close_vs_all
category,Unnamed: 1_level_1,all,close,other,all,close,other,Unnamed: 8_level_1
1,White Alone,156585940,18930760,137655180,69.87,75.158,69.201,5.288
2,Hispanic or Latino,25472225,2553245,22918980,11.366,10.137,11.522,-1.229
3,Black or African American Alone,27651255,2040750,25610505,12.338,8.102,12.875,-4.236
4,Asian Alone,8993485,989000,8004485,4.013,3.926,4.024,-0.087
5,American Indian or Alaska Native Alone,1521085,259140,1261945,0.679,1.029,0.634,0.35
10,Native Hawaiian or Other Pacific Islander Alone,321849,30340,291509,0.144,0.12,0.147,-0.024


In [17]:
cd_demographic_pcts[
    (cd_demographic_pcts["demographic"] == "Black or African American Alone") &
    (cd_demographic_pcts["fips"].isin(close_races["fips"])) &
    (cd_demographic_pcts["pct"] <= 2)
][["GEONAME", "demographic", "pct"]].sort_values("pct", ascending = True)

Unnamed: 0,GEONAME,demographic,pct
2799,"Congressional District (at Large) (115th Congress), Montana",Black or African American Alone,0.36
2175,"Congressional District 2 (115th Congress), Maine",Black or African American Alone,0.44
915,"Congressional District 3 (115th Congress), Colorado",Black or African American Alone,0.82
4815,"Congressional District 4 (115th Congress), Utah",Black or African American Alone,1.22
819,"Congressional District 48 (115th Congress), California",Black or African American Alone,1.27
4995,"Congressional District 3 (115th Congress), Washington",Black or African American Alone,1.37
3075,"Congressional District 2 (115th Congress), New Mexico",Black or African American Alone,1.82
783,"Congressional District 45 (115th Congress), California",Black or African American Alone,1.86
2559,"Congressional District 1 (115th Congress), Minnesota",Black or African American Alone,1.91


In [18]:
tossup_races = close_races[
    close_races["max_classic_prob"] < 60
].copy()

In [19]:
cd_demographic_pcts[
    (cd_demographic_pcts["demographic"] == "White Alone") &
    (cd_demographic_pcts["fips"].isin(tossup_races["fips"])) &
    (cd_demographic_pcts["pct"] >= 85)
][["GEONAME", "demographic", "pct"]].sort_values("pct", ascending = False)

Unnamed: 0,GEONAME,demographic,pct
2561,"Congressional District 1 (115th Congress), Minnesota",White Alone,92.76
3353,"Congressional District 22 (115th Congress), New York",White Alone,91.27
2465,"Congressional District 7 (115th Congress), Michigan",White Alone,90.72
3317,"Congressional District 19 (115th Congress), New York",White Alone,88.76
2081,"Congressional District 6 (115th Congress), Kentucky",White Alone,87.61
2477,"Congressional District 8 (115th Congress), Michigan",White Alone,87.21
1985,"Congressional District 2 (115th Congress), Kansas",White Alone,86.94


# Senate analysis

In [20]:
senate_candidates = pd.read_csv("../data/fivethirtyeight/senate_candidate_odds.csv")
senate_candidates.head()

Unnamed: 0,candidate,class,classic_prob,party,state
0,Kyrsten Sinema,1,61.106,D,AZ
1,Angela Green,1,0.0,G,AZ
2,Martha McSally,1,38.894,R,AZ
3,Dianne Feinstein,1,98.378,D,CA
4,Kevin de Leon,1,1.622,D,CA


In [21]:
senate_race_details = (
    senate_candidates
    .groupby([
        "state",
        "class",
    ])
    .pipe(lambda grp: pd.DataFrame({
        "candidates": grp.size(),
        "max_classic_prob": grp["classic_prob"].max()
    }))
    .reset_index()
)

senate_race_details["fips"] = senate_race_details["state"].apply(fips_cross_dict.get)

senate_race_details.head()

Unnamed: 0,state,class,candidates,max_classic_prob,fips
0,AZ,1,3,61.106,4
1,CA,1,2,98.378,6
2,CT,1,3,99.478,9
3,DE,1,3,99.892,10
4,FL,1,2,63.442,12


In [22]:
close_senate_races = senate_race_details[
    (senate_race_details["max_classic_prob"] < 75)
].copy()

other_senate_races = senate_race_details[
    (senate_race_details["max_classic_prob"] >= 75)
]

print(
    f"There are {len(close_senate_races)} close Senate races, "
    f"and {len(other_senate_races)} other races."
)

There are 6 close Senate races, and 29 other races.


## Load and join state demographics

In [23]:
state_demographics = pd.read_csv("../data/census/State.csv")
state_demographics["fips"] = state_demographics["GEOID"].apply(lambda x: x.split("US")[-1])
state_demographics.head(5)

Unnamed: 0,GEONAME,LNTITLE,GEOID,LNNUMBER,TOT_EST,TOT_MOE,ADU_EST,ADU_MOE,CIT_EST,CIT_MOE,CVAP_EST,CVAP_MOE,fips
0,Alabama,Total,04000US01,1,4841165,,3735975,447.0,4734500,2609,3639495,2232,1
1,Alabama,Not Hispanic or Latino,04000US01,2,4647660,399.0,3617085,475.0,4602635,1939,3577115,1708,1
2,Alabama,American Indian or Alaska Native Alone,04000US01,3,21960,1106.0,17545,846.0,21910,1096,17500,834,1
3,Alabama,Asian Alone,04000US01,4,61675,1263.0,47015,857.0,38605,1262,26850,983,1
4,Alabama,Black or African American Alone,04000US01,5,1278750,2233.0,951850,1133.0,1271600,2379,945515,1311,1


In [24]:
state_totals = (
    state_demographics[
        state_demographics["LNTITLE"] == "Total"
    ]
    [[ "fips", "CVAP_EST"]]
    .rename(columns = { "CVAP_EST": "state_total" })
)

assert state_demographics["fips"].nunique() == len(state_totals)

state_totals.head()

Unnamed: 0,fips,state_total
0,1,3639495
13,2,527810
26,4,4613575
39,5,2175340
52,6,24582605


In [25]:
state_demographic_pcts = (
    state_demographics[
        state_demographics["LNTITLE"] != "Total"
    ]
    [[
        "GEONAME",
        "fips",
        "LNTITLE",
        "CVAP_EST",        
    ]]
    .rename(columns = {
        "LNTITLE": "demographic"
    })
    .merge(
        state_totals,
        how = "left",
        on = "fips",
        validate = "m:1"
    )
    .assign(
        pct = lambda df: (df["CVAP_EST"] * 100 / df["state_total"]).round(3)
    )
)

state_demographic_pcts.head()

Unnamed: 0,GEONAME,fips,demographic,CVAP_EST,state_total,pct
0,Alabama,1,Not Hispanic or Latino,3577115,3639495,98.286
1,Alabama,1,American Indian or Alaska Native Alone,17500,3639495,0.481
2,Alabama,1,Asian Alone,26850,3639495,0.738
3,Alabama,1,Black or African American Alone,945515,3639495,25.979
4,Alabama,1,Native Hawaiian or Other Pacific Islander Alone,955,3639495,0.026


## Compare close and not close Senate races

In [26]:
senate_comparison = (
    pd.concat([
        pd.DataFrame({ "fips": state_totals["fips"].values }).assign(category = "all"),
        close_senate_races[["fips"]].assign(category = "close"),
        other_senate_races[["fips"]].assign(category = "other"),
    ])
    .merge(
        state_demographic_pcts,
        how = "left",
        on = "fips",
    )
    .groupby([
        "demographic",
        "category"
    ])
    [[
        "CVAP_EST",
        "state_total"
    ]]
    .sum()
    .assign(pct = lambda df: (df["CVAP_EST"] * 100 / df["state_total"]).round(3))
    .unstack()
    [[ "CVAP_EST", "pct" ]]
    .sort_values(("pct", "close"), ascending = False)
    .assign(
        close_vs_all = lambda df: df[("pct", "close")] - df[("pct", "all")]
    )
    .reset_index()
)

senate_comparison

Unnamed: 0_level_0,demographic,CVAP_EST,CVAP_EST,CVAP_EST,pct,pct,pct,close_vs_all
category,Unnamed: 1_level_1,all,close,other,all,close,other,Unnamed: 8_level_1
0,Not Hispanic or Latino,199128785,26572845,124510265,87.6,86.579,87.1,-1.021
1,White Alone,156808060,21503275,98163800,68.983,70.062,68.669,1.079
2,Hispanic or Latino,28186855,4119090,18441305,12.4,13.421,12.9,1.021
3,Black or African American Alone,27891260,3697665,15643685,12.27,12.048,10.943,-0.222
4,Asian Alone,9009930,634230,7402400,3.964,2.066,5.178,-1.898
5,American Indian or Alaska Native Alone,1522315,290455,710665,0.67,0.946,0.497,0.276
6,American Indian or Alaska Native and White,1135030,142390,613405,0.499,0.464,0.429,-0.035
7,Black or African American and White,746105,89695,499430,0.328,0.292,0.349,-0.036
8,Asian and White,810000,88535,589780,0.356,0.288,0.413,-0.068
9,Remainder of Two or More Race Responses,667240,73570,493895,0.294,0.24,0.345,-0.054


In [27]:
senate_comparison[
    senate_comparison["demographic"].str.contains(r"Alone|^Hispanic or Latino$")
]

Unnamed: 0_level_0,demographic,CVAP_EST,CVAP_EST,CVAP_EST,pct,pct,pct,close_vs_all
category,Unnamed: 1_level_1,all,close,other,all,close,other,Unnamed: 8_level_1
1,White Alone,156808060,21503275,98163800,68.983,70.062,68.669,1.079
2,Hispanic or Latino,28186855,4119090,18441305,12.4,13.421,12.9,1.021
3,Black or African American Alone,27891260,3697665,15643685,12.27,12.048,10.943,-0.222
4,Asian Alone,9009930,634230,7402400,3.964,2.066,5.178,-1.898
5,American Indian or Alaska Native Alone,1522315,290455,710665,0.67,0.946,0.497,0.276
10,Native Hawaiian or Other Pacific Islander Alone,322160,33120,253355,0.142,0.108,0.177,-0.034


---

---

---