In [2]:
import ibis
from pathlib import Path
import ibis_widget
import election_data as ed
from ibis.expr import types as ir

ibis_widget.install()
ibis.options.interactive = True

from ibis import _

In [3]:
# see https://apps.elections.virginia.gov/SBE_CSV/ELECTIONS/ELECTIONRESULTS/
# There are other races available, such as primaries.
urls = {
    2017: "https://apps.elections.virginia.gov/SBE_CSV/ELECTIONS/ELECTIONRESULTS/2017/2017%20November%20General.csv",
    2019: "https://apps.elections.virginia.gov/SBE_CSV/ELECTIONS/ELECTIONRESULTS/2019/2019%20November%20General.csv",
    2021: "https://apps.elections.virginia.gov/SBE_CSV/ELECTIONS/ELECTIONRESULTS/2021/2021%20November%20General%20.csv",
    # 2023 isn't available yet :( https://apps.elections.virginia.gov/SBE_CSV/ELECTIONS/ELECTIONRESULTS/2023/
}
paths = {year: Path(f"../data/va/{year}.csv") for year in urls}
for year, url in urls.items():
    path = paths[year]
    path.parent.mkdir(parents=True, exist_ok=True)
    if not path.exists():
        print(f"Downloading {year} results to {path}")
        !curl -o {path} {url}

In [31]:
raws = [ibis.read_csv(paths[year]).mutate(year=year) for year in urls]
raw = ibis.union(*raws)
raw.widget()

┏━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┓
┃[1m [0m[1m__row_id[0m[1m [0m┃[1m [0m[1mCandidateUid[0m[1m                          [0m[1m [0m┃[1m [0m[1mFirstName[0m[1m [0m┃[1m [0m[1mMiddleName[0m[1m [0m┃[1m [0m[1mLastName[0m[1m      [0m[1m [0m┃[1m [0m[1mSuffix[0m[1m [0m┃[1m [0m[1mTOTAL_VOTES[0m[1m [0m┃[1m [0m[1mParty[0m[1m      [0m[1m [0m┃[1m [0m[1mWriteInVote[0m[1m [0m┃[1m [0m[1mLocalityUid[0m[1m                           [0m[1m [0m┃[1m [0m[1mLo

In [33]:
def make_county_fips_lookup() -> ibis.Table:
    t = ibis.read_parquet("../data/cleaned.parquet")
    return t.select(
        "state_po",
        "county_name",
        "county_fips",
    ).distinct()


def add_county_fips(t: ibis.Table) -> ibis.Table:
    lookup = make_county_fips_lookup()
    return t.left_join(lookup, ["state_po", "county_name"]).drop(
        "state_po_right", "county_name_right"
    )


t = raw
assert (t.ElectionType == "General").all().execute()
t = t.drop("ElectionType")
t = t.drop("ElectionName")

t = t.drop(
    "CandidateUid",
    "LocalityUid",
    "LocalityCode",
    "PrecinctUid",
    "DistrictUid",
    "OfficeUid",
    "ElectionUid",
)
t = t.mutate(
    election_date=_.ElectionDate.date(),
    state_po=ibis.literal("VA"),
    county_name=_.LocalityName.replace(" & ", " and "),
    candidate=(_.FirstName + " " + _.MiddleName + " " + _.LastName + " " + _.Suffix)
    .re_replace(r"\s+", " ")
    .strip()
    .upper(),
).drop("ElectionDate", "LocalityName", "FirstName", "MiddleName", "LastName", "Suffix")

assert (t.election_date.nunique() == len(urls)).execute()

t = add_county_fips(t)
t.widget()

┏━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1m__row_id[0m[1m [0m┃[1m [0m[1mTOTAL_VOTES[0m[1m [0m┃[1m [0m[1mParty[0m[1m      [0m[1m [0m┃[1m [0m[1mWriteInVote[0m[1m [0m┃[1m [0m[1mPrecinctName[0m[1m                     [0m[1m [0m┃[1m [0m[1mDistrictType[0m[1m      [0m[1m [0m┃[1m [0m[1mDistrictName[0m[1m [0m┃[1m [0m[1mOfficeTitle[0m[1m                                                    [0m[1m [0m┃[1m [0m[1myear[0m[1m [0m[1m [0m┃[1m [0m[1melection_date[0m[1m [0m┃[1m [0m[1mstate_po[0m[1m [0m┃[1m [0m[1mcounty_name[0m[1m      [0m[1m [0m┃[1m [0m[1mcandidate[0m[1m [0m┃[1m [0m[1mcounty_fips[0m[1m [0m┃
┡━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━

In [16]:
assert (t.filter(_.WriteInVote != 0).LastName == "WRITE IN VOTES").all().execute()
assert (t.filter(_.WriteInVote == 0).LastName != "WRITE IN VOTES").all().execute()
assert (t.filter(_.LastName == "WRITE IN VOTES").WriteInVote == 1).all().execute()
assert (t.filter(_.LastName != "WRITE IN VOTES").WriteInVote == 0).all().execute()
t = t.mutate(
    writein=_.WriteInVote == 1,
    candidate=ibis.case().when(_.WriteInVote == 1, "WRITEIN").else_(),
).drop("WriteInVote")

In [23]:
t.widget()

┏━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1m__row_id[0m[1m [0m┃[1m [0m[1mFirstName[0m[1m [0m┃[1m [0m[1mMiddleName[0m[1m [0m┃[1m [0m[1mLastName[0m[1m [0m┃[1m [0m[1mSuffix[0m[1m [0m┃[1m [0m[1mTOTAL_VOTES[0m[1m [0m┃[1m [0m[1mParty[0m[1m      [0m[1m [0m┃[1m [0m[1mWriteInVote[0m[1m [0m┃[1m [0m[1mPrecinctName[0m[1m                     [0m[1m [0m┃[1m [0m[1mDistrictType[0m[1m      [0m[1m [0m┃[1m [0m[1mDistrictName[0m[1m [0m┃[1m [0m[1mOfficeTitle[0m[1m              [0m[1m [0m┃[1m [0m[1mElectionType[0m[1m [0m┃[1m [0m[1mElectionName[0m[1m         [0m[1m [0m┃[1m [0m[1melection_date[0m[1m [0m┃[1m [0m

In [7]:
t.filter(_.county_fips.isnull())

In [None]:
cleaned = ed.make_results(
    t,
    year=2017,
    date="2017-11-07",
    state_po="VA",
    county_name=_.LocalityName.replace(" & ", " and "),
)

In [22]:
gold = ibis.read_parquet("../data/cleaned.parquet")
gold.filter(
    True,
    _.state_po == "VA",
    _.candidate.length() > 10,
).widget()

┏━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━┓
┃[1m [0m[1m__row_id[0m[1m [0m┃[1m [0m[1myear[0m[1m [0m[1m [0m┃[1m [0m[1mdate[0m[1m      [0m[1m [0m┃[1m [0m[1mstate_po[0m[1m [0m┃[1m [0m[1mcounty_name[0m[1m [0m┃[1m [0m[1mcounty_fips[0m[1m [0m┃[1m [0m[1mjurisdiction_name[0m[1m [0m┃[1m [0m[1mjurisdiction_fips[0m[1m [0m┃[1m [0m[1mdistrict[0m[1m [0m┃[1m [0m[1moffice[0m[1m     [0m[1m [0m┃[1m [0m[1mmagnitude[0m[1m [0m┃[1m [0m[1mspecial[0m[1m [0m┃[1m [0m[1mstage[0m[1m [0m[1m [0m┃[1m [0m[1mprecinct[0m[1m                     [0m[1m [0m┃[1m [0m[1mwritein[0m[1m [0m┃[1m [0m[1mcandidate[0m[1m        [0m[1m [0m┃[1m [0m[1mparty_detailed[0m

In [4]:
# ported from https://github.com/MEDSL/replication-scripts/blob/634832bc7c2df2c0a2238bc16781d07d581d3a3a/va2018.py


# Fill NA values with an empty string
t = t.fill_null("")

# Replace specific county name
t = t.mutate(
    county_name=ibis.case()
    .when(t.county_name == "KING & QUEEN", "KING AND QUEEN")
    .else_(t.county_name)
    .end()
)

# Load and merge FIPS codes
fips = con.table("../../../help-files/county-fips-codes.csv")
fips = fips.mutate(state=fips.state.upper())
t = t.join(fips, ["state", "county_name"], how="left")
t = t.mutate(county_fips=t.county_fips.cast("string").zfill(5))

# Load jurisdiction FIPS and merge based on conditions
juris_fips = con.table("../../../help-files/jurisdiction-fips-codes.csv")
juris_fips = juris_fips.mutate(state=juris_fips.state.upper())

states_w_juris = (
    juris_fips.filter(juris_fips.jurisdiction_fips.length() > 5)
    .state.distinct()
    .to_list()
)
if t.state.distinct().execute()[0] not in states_w_juris:
    t = t.mutate(jurisdiction_fips=t.county_fips, jurisdiction_name=t.county_name)
else:
    juris_fips = juris_fips.mutate(
        county_fips=juris_fips.jurisdiction_fips.str.zfill(10).substr(0, 5)
    )
    t = t.join(juris_fips, ["state", "county_fips", "jurisdiction_name"], how="left")
    t = t.mutate(
        jurisdiction_fips=ibis.ifelse(
            t.jurisdiction_fips.isnull(),
            "Missing Jurisdiction FIPS",
            t.jurisdiction_fips,
        )
    )

# Pad and clean district codes
t = t.mutate(
    district=t.district.zfill(3).replace(
        {"00A": "A", "00B": "B", "00C": "C", "000": ""}
    )
)

# Set date and readme_check values
t = t.mutate(date="2018-11-06", readme_check="FALSE")

# Merge magnitudes data
mags = con.table("magnitudes.csv").fillna("")
t = t.join(mags, ["office", "jurisdiction_name", "district"], how="left")
t = t.mutate(magnitude=t.magnitude.fillna(1).cast("int64"))

# Write-in candidate replacements
t = t.mutate(
    candidate=t.candidate.replace("[WRITE-IN]", "WRITEIN", regex=False).replace(
        r"\.", "", regex=True
    )
)

# Apply specific conditions for certain office values
t = t.mutate(
    district=ibis.case()
    .when(t.office == "MEMBER BOARD OF SUPERVISORS AT LARGE", "AT-LARGE")
    .else_(t.district)
    .end(),
    office=ibis.case()
    .when(
        t.office == "MEMBER BOARD OF SUPERVISORS AT LARGE",
        "MEMBER BOARD OF SUPERVISORS",
    )
    .else_(t.office)
    .end(),
)

# Load amendments data and concatenate
ammendments = con.table("va-18-ammendments.csv", schema=official_dtypes)
t = t.union(ammendments)

# Replace boolean values with string equivalents and trim extra spaces
t = t.replace({True: "TRUE", False: "FALSE"})
t = t.applymap(lambda x: x.strip() if isinstance(x, str) else x)

# Export to CSV
t.to_csv(
    "2018-va-precinct-general-updated.csv", quoting=csv.QUOTE_NONNUMERIC, index=False
)


AttributeError: 'Table' object has no attribute 'fill_null'