In [1]:
import ibis
from pathlib import Path
import ibis_widget
import election_data as ed
from ibis.expr import types as ir

ibis_widget.install()
ibis.options.interactive = True

from ibis import _

In [2]:
# see https://apps.elections.virginia.gov/SBE_CSV/ELECTIONS/ELECTIONRESULTS/
# There are other races available, such as primaries.
urls = {
    2017: "https://apps.elections.virginia.gov/SBE_CSV/ELECTIONS/ELECTIONRESULTS/2017/2017%20November%20General.csv",
    2019: "https://apps.elections.virginia.gov/SBE_CSV/ELECTIONS/ELECTIONRESULTS/2019/2019%20November%20General.csv",
    2021: "https://apps.elections.virginia.gov/SBE_CSV/ELECTIONS/ELECTIONRESULTS/2021/2021%20November%20General%20.csv",
    # 2023 isn't available yet :( https://apps.elections.virginia.gov/SBE_CSV/ELECTIONS/ELECTIONRESULTS/2023/
}
paths = {year: Path(f"../data/va/{year}.csv") for year in urls}
for year, url in urls.items():
    path = paths[year]
    path.parent.mkdir(parents=True, exist_ok=True)
    if not path.exists():
        print(f"Downloading {year} results to {path}")
        !curl -o {path} {url}

In [3]:
raws = [ibis.read_csv(paths[year]).mutate(year=year) for year in urls]
raw = ibis.union(*raws)
raw.widget()

┏━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┓
┃[1m [0m[1m__row_id[0m[1m [0m┃[1m [0m[1mCandidateUid[0m[1m                          [0m[1m [0m┃[1m [0m[1mFirstName[0m[1m [0m┃[1m [0m[1mMiddleName[0m[1m [0m┃[1m [0m[1mLastName[0m[1m      [0m[1m [0m┃[1m [0m[1mSuffix[0m[1m [0m┃[1m [0m[1mTOTAL_VOTES[0m[1m [0m┃[1m [0m[1mParty[0m[1m      [0m[1m [0m┃[1m [0m[1mWriteInVote[0m[1m [0m┃[1m [0m[1mLocalityUid[0m[1m                           [0m[1m [0m┃[1m [0m[1mLocalityCode

In [4]:
t = raw
assert (t.ElectionType == "General").all().execute()
t = t.drop("ElectionType")
t = t.drop("ElectionName")

t = t.drop(
    "CandidateUid",
    "LocalityUid",
    "LocalityCode",
    "PrecinctUid",
    "DistrictUid",
    "OfficeUid",
    "ElectionUid",
)
t = t.mutate(votes=_.TOTAL_VOTES).drop("TOTAL_VOTES")
t = t.mutate(election_date=_.ElectionDate.date()).drop("ElectionDate")

assert (t.election_date.nunique()).execute() == len(urls)
t = t.cache()
t.widget()

┏━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━┓
┃[1m [0m[1m__row_id[0m[1m [0m┃[1m [0m[1mFirstName[0m[1m [0m┃[1m [0m[1mMiddleName[0m[1m [0m┃[1m [0m[1mLastName[0m[1m      [0m[1m [0m┃[1m [0m[1mSuffix[0m[1m [0m┃[1m [0m[1mParty[0m[1m      [0m[1m [0m┃[1m [0m[1mWriteInVote[0m[1m [0m┃[1m [0m[1mLocalityName[0m[1m   [0m[1m [0m┃[1m [0m[1mPrecinctName[0m[1m  [0m[1m [0m┃[1m [0m[1mDistrictType[0m[1m      [0m[1m [0m┃[1m [0m[1mDistrictName[0m[1m [0m┃[1m [0m[1mOfficeTitle[0m[1m                    [0m[1m [0m┃[1m [0m[1myear[0m[1m [0m[1m [0m┃[1m [0m[1mvotes[0m[1m [0m┃[1m [0m[1melection_date[0m[1m [0m┃
┡━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇

In [5]:
def make_county_fips_lookup() -> ibis.Table:
    t = ibis.read_parquet("../data/cleaned.parquet")
    return t.select(
        "state_po",
        "county_name",
        "county_fips",
    ).distinct()


def add_county_fips(t: ibis.Table) -> ibis.Table:
    lookup = make_county_fips_lookup()
    return t.left_join(lookup, ["state_po", "county_name"]).drop(
        "state_po_right", "county_name_right"
    )


t = t.mutate(
    state_po=ibis.literal("VA"),
    county_name=_.LocalityName.upper().replace(" & ", " AND ").replace(" COUNTY", ""),
).drop("LocalityName")
t = add_county_fips(t)

In [6]:
t = t.mutate(
    candidate=(
        _.FirstName.fill_null("")
        + " "
        + _.MiddleName.fill_null("")
        + " "
        + _.LastName.fill_null("")
        + " "
        + _.Suffix.fill_null("")
    )
    .re_replace(r"\s+", " ")
    .strip()
    .upper(),
).drop("FirstName", "MiddleName", "LastName", "Suffix")

assert (t.filter(_.WriteInVote != 0).candidate == "WRITE IN VOTES").all().execute()
assert (t.filter(_.WriteInVote == 0).candidate != "WRITE IN VOTES").all().execute()
assert (t.filter(_.candidate == "WRITE IN VOTES").WriteInVote == 1).all().execute()
assert (t.filter(_.candidate != "WRITE IN VOTES").WriteInVote == 0).all().execute()

t = t.mutate(
    writein=_.WriteInVote == 1,
    candidate=ibis.case().when(_.WriteInVote == 1, "WRITEIN").else_(_.candidate).end(),
).drop("WriteInVote")
t.widget()

┏━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓
┃[1m [0m[1m__row_id[0m[1m [0m┃[1m [0m[1mParty[0m[1m      [0m[1m [0m┃[1m [0m[1mPrecinctName[0m[1m           [0m[1m [0m┃[1m [0m[1mDistrictType[0m[1m      [0m[1m [0m┃[1m [0m[1mDistrictName[0m[1m [0m┃[1m [0m[1mOfficeTitle[0m[1m                                                    [0m[1m [0m┃[1m [0m[1myear[0m[1m [0m[1m [0m┃[1m [0m[1mvotes[0m[1m [0m┃[1m [0m[1melection_date[0m[1m [0m┃[1m [0m[1mstate_po[0m[1m [0m┃[1m [0m[1mcounty_name[0m[1m      [0m[1m [0m┃[1m [0m[1mcounty_fips[0m[1m [0m┃[1m [0m[1mcandidate[0m[1m                  [0m[1m [0m┃[1m [0m[1mwritein[0m[1m [0m┃
┡━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━

In [7]:
t.Party.topk(10).execute()

Unnamed: 0,Party,CountStar()
0,,57780
1,Democratic,37692
2,Republican,36698
3,Independent,35117
4,Libertarian,3337
5,Liberation,3079
6,Write-In,476
7,Green,172


In [8]:
# when party is write-in, that isn't a writein *candidate*:
# Their name actually appeared on the ballot, but they chose to
# use the write-in party.
assert (~t.filter(_.Party == "Write-In").writein).all().execute()

In [9]:
party_mapping = {
    "Democratic": ("DEMOCRAT", "DEMOCRAT"),
    "Republican": ("REPUBLICAN", "REPUBLICAN"),
    "Libertarian": ("LIBERTARIAN", "LIBERTARIAN"),
    "Liberation": ("OTHER", "LIBERATION"),
    "Green": ("GREEN", "GREEN"),
    "Independent": ("INDEPENDENT", "INDEPENDENT"),
    "Write-In": ("OTHER", None),
}
assert t.Party.isin(list(party_mapping)).all().execute()
simplified = ed.util.cases(*[(t.Party == k, v[0]) for k, v in party_mapping.items()])
detailed = ed.util.cases(*[(t.Party == k, v[1]) for k, v in party_mapping.items()])
t = t.mutate(party_simplified=simplified, party_detailed=detailed).drop("Party")
ed.tests.test_party(t)

In [14]:
gold = ibis.read_parquet("../data/cleaned.parquet")
(
    gold.filter(
        True,
        # _.state_po == "VA",
        # _.candidate.length() > 10,
        # _.writein,
        # _.party_simplified != "OTHER",
        # _.party_simplified != _.party_detailed,
        # _.party_detailed == "LIBERATION",
        # _.party_simplified == "INDEPENDENT",
        # _.party_simplified == "INDEPENDENT",
    )
    .select(
        # "state_po",
        "party_simplified",
        "party_detailed",
    )
    .value_counts()
).widget()

┏━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃[1m [0m[1m__row_id[0m[1m [0m┃[1m [0m[1mparty_simplified[0m[1m [0m┃[1m [0m[1mparty_detailed[0m[1m        [0m[1m [0m┃[1m [0m[1mparty_simplified_party_detailed_count[0m[1m [0m┃
┡━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ [2mint64[0m    │ [2mstring[0m           │ [2mstring[0m                 │ [2mint64[0m                                 │
├──────────┼──────────────────┼────────────────────────┼───────────────────────────────────────┤
│        [1;36m0[0m │ [32mINDEPENDENT     [0m │ [32mINDEPENDENT           [0m │                                [1;36m738796[0m │
│        [1;36m1[0m │ [32mOTHER           [0m │ [32mUNITY                 [0m │                                 [1;36m24156[0m │
│        [1;36m2[0m │ [32mOTHER           [0m │ [32mFREEDOM AND PROSPERITY[0m │                    

In [16]:
gold.mode.topk(30).mode.execute().to_list()

['TOTAL',
 'ELECTION DAY',
 'ABSENTEE',
 'PROVISIONAL',
 'EARLY',
 'NOT ABSENTEE',
 'ABSENTEE BY MAIL',
 'UNSPECIFIED',
 'ONE STOP',
 'IN-PERSON',
 'MAIL',
 'WRITE-IN',
 'MAIL-IN CANVASS 1',
 'MAIL-IN CANVASS 2',
 'EARLY VOTING - IVO',
 'ELECTION DAY - IVO',
 'ELECTION DAY - OSS',
 'EARLY VOTING - OSS',
 '2ND ABSENTEE',
 'ADVANCE IN PERSON',
 'FAILSAFE',
 'FAILSAFE PROVISIONAL',
 'IN PERSON ABSENTEE',
 'MACHINE',
 'ADVANCE IN PERSON 2',
 'MAIL BALLOTS',
 'ADVANCE IN PERSON 1',
 'ABSENTEE / MILITARY',
 'AFFIDAVIT',
 'ADVANCE IN PERSON 3']

In [13]:
# ported from https://github.com/MEDSL/replication-scripts/blob/634832bc7c2df2c0a2238bc16781d07d581d3a3a/va2018.py


# Fill NA values with an empty string
t = t.fill_null("")

# Replace specific county name
t = t.mutate(
    county_name=ibis.case()
    .when(t.county_name == "KING & QUEEN", "KING AND QUEEN")
    .else_(t.county_name)
    .end()
)

# Load and merge FIPS codes
fips = con.table("../../../help-files/county-fips-codes.csv")
fips = fips.mutate(state=fips.state.upper())
t = t.join(fips, ["state", "county_name"], how="left")
t = t.mutate(county_fips=t.county_fips.cast("string").zfill(5))

# Load jurisdiction FIPS and merge based on conditions
juris_fips = con.table("../../../help-files/jurisdiction-fips-codes.csv")
juris_fips = juris_fips.mutate(state=juris_fips.state.upper())

states_w_juris = (
    juris_fips.filter(juris_fips.jurisdiction_fips.length() > 5)
    .state.distinct()
    .to_list()
)
if t.state.distinct().execute()[0] not in states_w_juris:
    t = t.mutate(jurisdiction_fips=t.county_fips, jurisdiction_name=t.county_name)
else:
    juris_fips = juris_fips.mutate(
        county_fips=juris_fips.jurisdiction_fips.str.zfill(10).substr(0, 5)
    )
    t = t.join(juris_fips, ["state", "county_fips", "jurisdiction_name"], how="left")
    t = t.mutate(
        jurisdiction_fips=ibis.ifelse(
            t.jurisdiction_fips.isnull(),
            "Missing Jurisdiction FIPS",
            t.jurisdiction_fips,
        )
    )

# Pad and clean district codes
t = t.mutate(
    district=t.district.zfill(3).replace(
        {"00A": "A", "00B": "B", "00C": "C", "000": ""}
    )
)

# Set date and readme_check values
t = t.mutate(date="2018-11-06", readme_check="FALSE")

# Merge magnitudes data
mags = con.table("magnitudes.csv").fillna("")
t = t.join(mags, ["office", "jurisdiction_name", "district"], how="left")
t = t.mutate(magnitude=t.magnitude.fillna(1).cast("int64"))

# Write-in candidate replacements
t = t.mutate(
    candidate=t.candidate.replace("[WRITE-IN]", "WRITEIN", regex=False).replace(
        r"\.", "", regex=True
    )
)

# Apply specific conditions for certain office values
t = t.mutate(
    district=ibis.case()
    .when(t.office == "MEMBER BOARD OF SUPERVISORS AT LARGE", "AT-LARGE")
    .else_(t.district)
    .end(),
    office=ibis.case()
    .when(
        t.office == "MEMBER BOARD OF SUPERVISORS AT LARGE",
        "MEMBER BOARD OF SUPERVISORS",
    )
    .else_(t.office)
    .end(),
)

# Load amendments data and concatenate
ammendments = con.table("va-18-ammendments.csv", schema=official_dtypes)
t = t.union(ammendments)

# Replace boolean values with string equivalents and trim extra spaces
t = t.replace({True: "TRUE", False: "FALSE"})
t = t.applymap(lambda x: x.strip() if isinstance(x, str) else x)

# Export to CSV
t.to_csv(
    "2018-va-precinct-general-updated.csv", quoting=csv.QUOTE_NONNUMERIC, index=False
)


IbisTypeError: Cannot fill_null on column 'year' of type int16 with a value of type string - pass in an explicit mapping of fill values to `fill_null` instead.