In [33]:
import csv
import glob
from collections import OrderedDict
import os
import time
from decimal import *
import pprint
pp = pprint.PrettyPrinter(indent=4)

In [34]:
primary = True
datadir = "snapshots/"
getcontext().prec = 5

In [35]:
headers = [
    "id", "raceid", "racetype", "racetypeid", "ballotorder", "candidateid",
    "description", "delegatecount", "electiondate", "electtotal", "electwon",
    "fipscode", "first", "incumbent", "initialization_data", "is_ballot_measure",
    "last", "lastupdated", "level", "national", "officeid", "officename", "party",
    "polid", "polnum", "precinctsreporting", "precinctsreportingpct", "precinctstotal",
    "reportingunitid", "reportingunitname", "runoff", "seatname", "seatnum", "statename",
    "statepostal", "test", "uncontested", "votecount", "votepct", "winner"
]

# blankrow = OrderedDict()
# for item in headers:
#     blankrow[item] = ""

In [36]:
folders = sorted(list(glob.glob(datadir + "*")), reverse=True)    # Find the latest time-stamped folder
folder = folders[0] + "/"
if not os.path.exists(folder + "done"):
    time.sleep(10)   # Try to beat a race condition
    if not os.path.exists(folder + "done"):
        print(quit)

In [37]:
print("Parsing " + folder)

Parsing snapshots\20180830-094716/


In [38]:
# masterinfo = []
masterraces = OrderedDict()
# mastercandidates = OrderedDict()
mastercandidates = OrderedDict()
masterunits = OrderedDict()

with open(folder + "info.txt", encoding="utf-8") as f:
    rows = f.readlines()
for row in rows:
    row = row.strip()
    row = row[1:-1]   # Lose [] line wrappers
    if "[" in row:    # Stupid unicode fix
        print("Faulty row with extra character: " + row)
        row = str(row[row.find("[")+1:])
        print("Fixed row: " + row)
    if len(row) > 4:   # If not a blank row
        # masterinfo.append(row)   # keep a copy of everything parsed
        if row[0] == "r":    # If we have a race identifier
            fields = row.split("|")
            fields = [item.strip() for item in fields]   # Lose any extra whitespace
            junk, junk, junk, racename, electiontype, raceid = fields
            masterraces[raceid] = {}
            masterraces[raceid]["electiontype"] = electiontype
            masterraces[raceid]["racename"] = racename
            masterraces[raceid]["Candidates"] = OrderedDict()
            masterraces[raceid]['Counties'] = OrderedDict()
        elif row[0] == "c":   # If we have a candidate identifier
            fields = row.split("|")
            fields = [item.strip() for item in fields]   # Lose any extra whitespace            
            junk, junk, junk, raceid, candidatelastname, candidatefirstname, candidateid = fields
            masterraces[raceid]['Candidates'][candidateid] = {}
            masterraces[raceid]['Candidates'][candidateid]['firstname'] = candidatefirstname
            masterraces[raceid]['Candidates'][candidateid]['lastname'] = candidatelastname
            mastercandidates[candidateid] = raceid
        elif row[0] == "u":
            fields = row.split("|")
            fields = [item.strip() for item in fields]   # Lose any extra whitespace            
            junk, junk, junk, unitname, unitid = fields
            masterunits[unitid] = unitname
        elif row[0] == "p":
            fields = row.split("|")
            fields = [item.strip() for item in fields]   # Lose any extra whitespace
            junk, junk, junk, raceid, unitid, precincts = fields
            precincts = int(precincts)
            masterraces[raceid]['Counties'][unitid] = OrderedDict()
            masterraces[raceid]['Counties'][unitid]['Precincts'] = precincts
        else:
            print("Found non-conforming row: " + row)

Faulty row with extra character: [r|FLDOS|1|United States Senator|Republican Primary|120001
Fixed row: r|FLDOS|1|United States Senator|Republican Primary|120001


In [39]:
masterlist = []
votedict = {}
with open(folder + "votes.txt", "r") as f:
    rows = f.readlines()
for row in rows:
    row = row.strip()
    row = row[1:-1]
    fields = row.split("|")
    fields = [item.strip() for item in fields]   # Lose any extra whitespace
    junk, junk, seqno, status, raceid, reportingunitid, precinctsreporting, candidateid, votes = fields
    line = OrderedDict()   # Initialize variable
    for item in headers:
        line[item] = ""
    votes = int(votes)
    precinctsreporting = int(precinctsreporting)
    lookups = {
        "id": "floridaofficial-" + raceid + "-" + reportingunitid,
        "raceid": raceid,
        "racetype": masterraces[raceid]['electiontype'],
        "racetypeid": masterraces[raceid]['electiontype'],
        "ballotorder": int(seqno),
        "candidateid": candidateid,
        "first": masterraces[raceid]['Candidates'][candidateid]['firstname'],
        "last": masterraces[raceid]['Candidates'][candidateid]['lastname'],
        "national": "FALSE",
        "officename": masterraces[raceid]["racename"],
        "polid": "floridaofficial-" + raceid,
        "precinctsreporting": int(precinctsreporting),
        "precinctstotal": masterraces[raceid]['Counties'][reportingunitid]['Precincts'],
        "precinctsreportingpct": Decimal(precinctsreporting) / Decimal(masterraces[raceid]['Counties'][reportingunitid]['Precincts']),
        "reportingunitid": reportingunitid,
        "reportingunitname": masterunits[reportingunitid],
        "statename": "Florida",
        "statepostal": "FL",
        "votecount": int(votes)
    }
    for key in lookups:
        line[key] = lookups[key]
    if line["id"] not in votedict:
        votedict[line["id"]] = 0
    votedict[line["id"]] += votes
    # print(line)
    masterlist.append(line)

In [40]:
"""
        NEED TO BUILD FIELD DESCRIPTIONS. Asking on AP for copyright.          

        Skipping description, delegatecount, electiondate, electtotal, electwon,
        incumbent, initalization_data, is_ballot_measure, last_updated, level,
        officeid, party, runoff, seatname, seatnum, test, uncontested, winner
        Really should look at last_updated, electiondate, party
        
"""

'\n        NEED TO BUILD FIELD DESCRIPTIONS. Asking on AP for copyright.          \n\n        Skipping description, delegatecount, electiondate, electtotal, electwon,\n        incumbent, initalization_data, is_ballot_measure, last_updated, level,\n        officeid, party, runoff, seatname, seatnum, test, uncontested, winner\n        Really should look at last_updated, electiondate, party\n        \n'

In [41]:
for counter, row in enumerate(masterlist):
    # masterlist[counter][row["votepct"]] = Decimal(row['votecount']) / Decimal(votedict[row['id']])
    masterlist[counter]["votepct"] = Decimal(row['votecount']) / Decimal(votedict[row['id']])


In [42]:
print(row)

OrderedDict([('id', 'floridaofficial-552008-12071'), ('raceid', '552008'), ('racetype', 'Nonpartisan Primary'), ('racetypeid', 'Nonpartisan Primary'), ('ballotorder', 999894), ('candidateid', '70243'), ('description', ''), ('delegatecount', ''), ('electiondate', ''), ('electtotal', ''), ('electwon', ''), ('fipscode', ''), ('first', 'John'), ('incumbent', ''), ('initialization_data', ''), ('is_ballot_measure', ''), ('last', 'McGowan'), ('lastupdated', ''), ('level', ''), ('national', 'FALSE'), ('officeid', ''), ('officename', 'Circuit Judge, 20th Judicial Circuit, Group 8'), ('party', ''), ('polid', 'floridaofficial-552008'), ('polnum', ''), ('precinctsreporting', 127), ('precinctsreportingpct', Decimal('1')), ('precinctstotal', 127), ('reportingunitid', '12071'), ('reportingunitname', 'Lee'), ('runoff', ''), ('seatname', ''), ('seatnum', ''), ('statename', 'Florida'), ('statepostal', 'FL'), ('test', ''), ('uncontested', ''), ('votecount', 55314), ('votepct', Decimal('0.57512')), ('winn

In [43]:
with open(folder + "fl-elex.csv", "w", newline="") as f:
    writer = csv.writer(f)  # Save as CSV
    writer.writerow(headers)
    for row in masterlist:
        writer.writerow(row.values())

In [None]:
# masterlist = []
with open(folder + "votes.txt") as f:
    rows = f.readlines()
with open(folder + "resultsv2.txt", "w", newline="") as f:
    writer = csv.writer(f, delimiter="\t")
    fieldnames = ["ElectionDate", "PartyCode", "PartyName", "RaceCode", "RaceName", "CountyCode",
                  "CountyName", "Juris1num", "Juris2num", "Precincts", "PrecinctsReporting",
                  "CanNameLast", "CanNameFirst", "CanNameMiddle", "CanVotes"]
    writer.writerow(fieldnames)
    for row in rows:
        row = row.strip()
        row = row[1:-1]
        # masterlist.append(row)
        fields = row.split("|")
        status = fields[3]
        raceid = fields[4]
        unitid = fields[5]
        precinctsr = fields[6]
        candidateid = fields[7]
        votes = fields[8]

        line = OrderedDict()
        for field in fieldnames:
            line[field] = "HEY"
        line['PartyCode'] = masterraces[raceid]['electiontype']
        line['PartyName'] = masterraces[raceid]['electiontype']
        line['RaceCode'] = raceid
        line['RaceName'] = masterraces[raceid]["racename"]
        line['CountyCode'] = unitid
        line['CountyName'] = masterunits[unitid]
        line['Precincts'] = masterraces[raceid]['Counties'][unitid]['Precincts']
        line['PrecinctsReporting'] = precinctsr
        line['CanNameLast'] = masterraces[raceid]['Candidates'][candidateid]['lastname']
        line['CanNameFirst'] = masterraces[raceid]['Candidates'][candidateid]['firstname']
        line['CanVotes'] = votes
        target = line.values()
        writer.writerow(target)
