Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cdf Json Fixes #506

Merged
merged 37 commits into from
Sep 25, 2020
Merged
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
427c485
plumbing for xml cdf reading
moldover Jul 31, 2020
36f53cc
add contestId to json and xml CDF parse logic
moldover Aug 3, 2020
687b25b
dont parse Candidate data from CDF at runtime - this enforces the sam…
moldover Aug 3, 2020
4f39417
Handle ContestSelections in XML correctly
moldover Aug 5, 2020
a4cf53d
Merge branch 'develop' into xml_cdf
moldover Aug 23, 2020
6d74890
add NIST example 2 (seems to be correctly formed)
moldover Aug 24, 2020
b071b55
fill out CDF XML class definitions needed for tabulation
moldover Aug 24, 2020
865d104
add some helpers
moldover Aug 24, 2020
f2a7791
cleanup pre-processing code
moldover Aug 24, 2020
e9d9b80
add more class fields around contest selections
moldover Aug 31, 2020
05ff970
handle write-ins
moldover Aug 31, 2020
eae666a
get first 2 Unisyn regression tests working :)
moldover Aug 31, 2020
d504d23
fix first NIST CDF XML test
moldover Aug 31, 2020
2bbfcdb
cleanup test names
moldover Aug 31, 2020
3e1c591
parse GpUnit from CVRs
moldover Aug 31, 2020
598f52a
remove un-used assets
moldover Aug 31, 2020
c5384ce
Merge branch 'develop' into xml_cdf
moldover Aug 31, 2020
a5638ec
updates for PR #504
moldover Sep 2, 2020
c6e0a52
updated data from Unisyn with GpUnit (precinct) parsing validated
moldover Sep 3, 2020
c86c840
add more Unisyn regression tests
moldover Sep 5, 2020
b25d4e3
tabulate all elections - in practice we should not see more than one
moldover Sep 5, 2020
d7bbc3e
updates for PR #504
moldover Sep 5, 2020
0886035
better logging
moldover Sep 12, 2020
c7df230
first cut at fixing JSON CDF reader:
moldover Sep 12, 2020
ea1e69c
update ResultsWriter CVR generation code to create Candidate objects …
moldover Sep 12, 2020
6583599
updates for PR #504
moldover Sep 13, 2020
3d1e7e0
fix typo
moldover Sep 13, 2020
5ab9ac9
update test asset with contest name
moldover Sep 13, 2020
662ee9d
better handling for cdf reader parse errors
moldover Sep 13, 2020
28246f3
Merge branch 'json_cdf' into xml_cdf
moldover Sep 13, 2020
fca0371
update all CDF json assets
moldover Sep 13, 2020
57d5d89
Updates to CDF parsing logic:
moldover Sep 15, 2020
83ba2b1
Merge branch 'develop' into json_cdf
moldover Sep 20, 2020
41fb6e9
cleanup for PR #506
moldover Sep 21, 2020
845baf2
updates for PR #505
moldover Sep 23, 2020
ca15c7a
Merge branch 'develop' into json_cdf
moldover Sep 23, 2020
b656d11
Throw if undeclared write-in is found but label has not been defined.
moldover Sep 24, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
768 changes: 665 additions & 103 deletions src/main/java/network/brightspots/rcv/CommonDataFormatReader.java

Large diffs are not rendered by default.

43 changes: 20 additions & 23 deletions src/main/java/network/brightspots/rcv/ContestConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ class ContestConfig {
private static final int MAX_MULTI_SEAT_BOTTOMS_UP_PERCENTAGE_THRESHOLD = 100;
private static final long MIN_RANDOM_SEED = -140737488355328L;
private static final long MAX_RANDOM_SEED = 140737488355327L;
private static final String JSON_EXTENSION = ".json";
private static final String MAX_SKIPPED_RANKS_ALLOWED_UNLIMITED_OPTION = "unlimited";
private static final String MAX_RANKINGS_ALLOWED_NUM_CANDIDATES_OPTION = "max";
static final String SUGGESTED_MAX_RANKINGS_ALLOWED = MAX_RANKINGS_ALLOWED_NUM_CANDIDATES_OPTION;
Expand All @@ -100,9 +99,7 @@ private ContestConfig(RawContestConfig rawConfig, String sourceDirectory) {
}

static boolean isCdf(CvrSource source) {
return getProvider(source) == Provider.CDF
&& source.getFilePath() != null
&& source.getFilePath().toLowerCase().endsWith(JSON_EXTENSION);
return getProvider(source) == Provider.CDF;
}

static ContestConfig loadContestConfig(RawContestConfig rawConfig, String sourceDirectory) {
Expand Down Expand Up @@ -209,6 +206,15 @@ static boolean passesBasicCvrSourceValidation(CvrSource source) {
Logger.log(Level.SEVERE, "overvoteDelimiter is invalid.");
}
} else {
if (provider == Provider.CDF) {
if (!source.getFilePath().toLowerCase().endsWith(".xml") && !source.getFilePath()
.toLowerCase().endsWith(".json")) {
Logger
.severe("CDF source files must be .json or .xml! Unexpected file extension for: %s",
source.getFilePath());
sourceValid = false;
}
}
if (fieldIsDefinedButShouldNotBeForProvider(
source.getFirstVoteColumnIndex(),
"firstVoteColumnIndex",
Expand Down Expand Up @@ -250,17 +256,19 @@ static boolean passesBasicCvrSourceValidation(CvrSource source) {
}
}

if (isNullOrBlank(source.getContestId()) &&
(provider == Provider.DOMINION || provider == Provider.HART
|| provider == Provider.CLEAR_BALLOT)) {
boolean providerRequiresContestId = provider == Provider.DOMINION ||
provider == Provider.HART ||
provider == Provider.CLEAR_BALLOT ||
provider == Provider.CDF;

if (isNullOrBlank(source.getContestId()) && providerRequiresContestId) {
sourceValid = false;
Logger.log(
Level.SEVERE,
String.format("contestId must be defined for CVR source with provider \"%s\"!",
getProvider(source).toString()));
} else if (
!(provider == Provider.DOMINION || provider == Provider.HART
|| provider == Provider.CLEAR_BALLOT) &&
!(providerRequiresContestId) &&
fieldIsDefinedButShouldNotBeForProvider(
source.getContestId(),
"contestId",
Expand Down Expand Up @@ -1025,22 +1033,11 @@ void setCandidateExclusionStatus(String candidateCode, boolean excluded) {
}

// perform pre-processing on candidates:
// 1) if there are any CDF input sources extract candidates names from them
// 2) build map of candidate ID to candidate name
// 3) generate tie-break ordering if needed
// 1) build map of candidate ID to candidate name
// 2) generate tie-break ordering if needed
// 3) add uwi candidate if needed
private void processCandidateData() {
candidateCodeToNameMap = new HashMap<>();

for (RawContestConfig.CvrSource source : rawConfig.cvrFileSources) {
// for any CDF sources extract candidate names
if (isCdf(source)) {
String cvrPath = resolveConfigPath(source.getFilePath());
CommonDataFormatReader reader = new CommonDataFormatReader(cvrPath, this);
candidateCodeToNameMap = reader.getCandidates();
candidatePermutation.addAll(candidateCodeToNameMap.keySet());
}
}

if (rawConfig.candidates != null) {
for (RawContestConfig.Candidate candidate : rawConfig.candidates) {
String code = candidate.getCode();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ public void buttonCvrFilePathClicked() {
case CDF -> {
FileChooser fc = new FileChooser();
fc.setInitialDirectory(new File(FileUtils.getUserDirectory()));
fc.getExtensionFilters().add(new ExtensionFilter("JSON files", "*.json"));
fc.getExtensionFilters().add(new ExtensionFilter("JSON and XML files", "*.json", "*.xml"));
fc.setTitle("Select " + provider + " Cast Vote Record File");
openFile = fc.showOpenDialog(GuiContext.getInstance().getMainWindow());
}
Expand Down Expand Up @@ -902,12 +902,7 @@ public LocalDate fromString(String string) {
textFieldCvrPrecinctCol.setDisable(false);
textFieldCvrOvervoteDelimiter.setDisable(false);
}
case CDF -> {
buttonAddCvrFile.setDisable(false);
textFieldCvrFilePath.setDisable(false);
buttonCvrFilePath.setDisable(false);
}
case CLEAR_BALLOT, DOMINION, HART -> {
case CLEAR_BALLOT, DOMINION, HART, CDF -> {
buttonAddCvrFile.setDisable(false);
textFieldCvrFilePath.setDisable(false);
buttonCvrFilePath.setDisable(false);
Expand Down
12 changes: 12 additions & 0 deletions src/main/java/network/brightspots/rcv/Logger.java
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,18 @@ static void log(Level level, String format, Object... obj) {
logger.log(level, String.format(format, obj));
}

static void info(String format, Object... obj) {
logger.log(Level.INFO, String.format(format, obj));
}

static void warning(String format, Object... obj) {
logger.log(Level.WARNING, String.format(format, obj));
}

static void severe(String format, Object... obj) {
logger.log(Level.SEVERE, String.format(format, obj));
}

// add logging to the provided text area for display to user in the GUI
static void addGuiLogging(TextArea textArea) {
java.util.logging.Handler guiHandler =
Expand Down
49 changes: 32 additions & 17 deletions src/main/java/network/brightspots/rcv/ResultsWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ class ResultsWriter {
private static final String CDF_GPU_ID_FORMAT = "gpu-%d";
private static final String CDF_REPORTING_DEVICE_ID = "rd-001";

private static final Map<String, String> candidateCodeToCdfId = new HashMap<>();
private static final Map<String, String> candidateCodeToContestSelectionId = new HashMap<>();
private static final Map<String, String> candidateCodeToCandidateId = new HashMap<>();

// number of rounds needed to elect winner(s)
private int numRounds;
Expand Down Expand Up @@ -136,18 +137,27 @@ private static String generateCvrSnapshotId(String cvrId, Integer round) {
: String.format("ballot-%s", cvrId);
}

private static String getCdfIdForCandidateCode(String code) {
String id = candidateCodeToCdfId.get(code);
// generates an internal ContestSelectionId based on a candidate code
private static String getContestSelectionIdForCandidateCode(String code) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please keep "Cdf" in this method's name for clarity

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah sorry about that @tarheel! I did not give you much explanation.

Among the important things:

  1. All static election objects (Candidates, Contests, ContestIds) are parsed at beginning of cvr processing. This simplifies and clarifies the parsing somewhat.
  2. CVRContestSelections now contain links to Candidate objects. During cvr processing these links are used to look up the linked Candidate. From there, Candidate Name field is used to assign the rankings and create the CastVoteRecord for tabulation. Previously, Candidate names were embedded directly in the CVRContestSelection objects using a "Code" object with a 'vendor-label' field on it. This is non-standard, and while it's conceivable a vendor might do this (Code objects allow flexibility) it's not the primary way CDF encodes information about a Candidate. The primary way is by linking to a Candidate object.
  3. Candidate names are stored in config - not pulled from CDF file at tabulation time. This normalizes candidate logic across formats.
  4. Configs do not use CDF internal ids to as candidate codes. It's important to understand that CDF internal object links (e.g. CandidateIds contained in a CVRContestSelection in CDF should be used only within the context of the CDF. They are generated by the cvr report generating program, and should not appear in our config files.
  5. Filter contest tabulation by contest ID as specified in the config file

So to your second question: yes it would be possible for these methods to share an interface. I think the real move is for all reader objects to share an interface. I don't think what you propose would be difficult, nevertheless it's outside the scope of this PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you create a follow-up issue (not for this deadline) to refactor this class?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, we already have such a ticket: #251

String id = candidateCodeToContestSelectionId.get(code);
if (id == null) {
id =
code.startsWith("cs-")
? code
: String.format("cs-%s", sanitizeStringForOutput(code).toLowerCase());
candidateCodeToCdfId.put(code, id);
id = String.format("cs-%s", sanitizeStringForOutput(code).toLowerCase());
candidateCodeToContestSelectionId.put(code, id);
}
return id;
}

// generates an internal CandidateId based on a candidate code
private static String getCandidateIdForCandidateCode(String code) {
moldover marked this conversation as resolved.
Show resolved Hide resolved
String id = candidateCodeToCandidateId.get(code);
if (id == null) {
id = String.format("c-%s", sanitizeStringForOutput(code).toLowerCase());
candidateCodeToCandidateId.put(code, id);
}
return id;
}


// Instead of a map from rank to list of candidates, we need a sorted list of candidates
// with the ranks they were given. (Ordinarily a candidate will have only a single rank, but they
// could have multiple ranks if the ballot duplicates the candidate, i.e. assigns them multiple
Expand Down Expand Up @@ -751,7 +761,7 @@ private Map<String, Object> generateCvrSnapshotMap(

selectionMapList.add(
Map.ofEntries(
entry("ContestSelectionId", getCdfIdForCandidateCode(candidateCode)),
entry("ContestSelectionId", getContestSelectionIdForCandidateCode(candidateCode)),
entry("SelectionPosition", selectionPositionMapList),
entry("@type", "CVR.CVRContestSelection")));
}
Expand All @@ -772,31 +782,36 @@ private Map<String, Object> generateCvrSnapshotMap(
private Map<String, Object> generateCdfMapForElection() {
HashMap<String, Object> electionMap = new HashMap<>();

// containers for election-level data
List<Map<String, Object>> contestSelections = new LinkedList<>();
List<Map<String, Object>> candidates = new LinkedList<>();

// iterate all candidates and create Candidate and ContestSelection objects for them
List<String> candidateCodes = new LinkedList<>(config.getCandidateCodeList());
Collections.sort(candidateCodes);
for (String candidateCode : candidateCodes) {
Map<String, String> codeMap =
candidates.add(
Map.ofEntries(
entry("@type", "CVR.Code"),
entry("Type", "other"),
entry("OtherType", "vendor-label"),
entry("Value", config.getNameForCandidateCode(candidateCode)));
entry("@id", getCandidateIdForCandidateCode(candidateCode)),
entry("Name", candidateCode)));

contestSelections.add(
Map.ofEntries(
entry("@id", getCdfIdForCandidateCode(candidateCode)),
entry("@id", getContestSelectionIdForCandidateCode(candidateCode)),
entry("@type", "CVR.ContestSelection"),
entry("Code", new Map[]{codeMap})));
entry("CandidateIds", new String[]{getCandidateIdForCandidateCode(candidateCode)})));
}

Map<String, Object> contestJson =
Map.ofEntries(
entry("@id", CDF_CONTEST_ID),
entry("@type", "CVR.CandidateContest"),
entry("ContestSelection", contestSelections),
entry("@type", "CVR.CandidateContest"));
entry("Name", config.getContestName())
);

electionMap.put("@id", CDF_ELECTION_ID);
electionMap.put("Candidate", candidates);
electionMap.put("Contest", new Map[]{contestJson});
electionMap.put("ElectionScopeId", CDF_GPU_ID);
electionMap.put("@type", "CVR.Election");
Expand Down
8 changes: 6 additions & 2 deletions src/main/java/network/brightspots/rcv/TabulatorSession.java
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,8 @@ private List<CastVoteRecord> parseCastVoteRecords(ContestConfig config, Set<Stri
try {
if (ContestConfig.isCdf(source)) {
Logger.log(Level.INFO, "Reading CDF cast vote record file: %s...", cvrPath);
new CommonDataFormatReader(cvrPath, config).parseCvrFile(castVoteRecords);
new CommonDataFormatReader(cvrPath, config, source.getContestId())
.parseCvrFile(castVoteRecords);
continue;
} else if (ContestConfig.getProvider(source) == Provider.CLEAR_BALLOT) {
Logger
Expand Down Expand Up @@ -335,8 +336,11 @@ private List<CastVoteRecord> parseCastVoteRecords(ContestConfig config, Set<Stri
encounteredSourceProblem = true;
} catch (CvrParseException e) {
encounteredSourceProblem = true;
} catch (Exception exception) {
Logger.severe("Unexpected error parsing source file %s", cvrPath);
moldover marked this conversation as resolved.
Show resolved Hide resolved
encounteredSourceProblem = true;
}
}
}

if (encounteredSourceProblem) {
Logger.log(Level.SEVERE, "Parsing cast vote records failed!");
Expand Down
62 changes: 59 additions & 3 deletions src/test/java/network/brightspots/rcv/TabulatorTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;

import java.io.BufferedReader;
import java.io.File;
Expand Down Expand Up @@ -117,9 +118,10 @@ private static String getTestFilePath(String stem, String suffix) {
// helper function to support running various tabulation tests
private static void runTabulationTest(String stem) {
String configPath = getTestFilePath(stem, "_config.json");
Logger.info("Running tabulation test: %s\nTabulating config file: %s...", stem, configPath);
TabulatorSession session = new TabulatorSession(configPath);
session.tabulate();

Logger.info("Examining tabulation test results...");
String timestampString = session.getTimestampString();
ContestConfig config = ContestConfig.loadContestConfig(configPath);
assertNotNull(config);
Expand All @@ -140,7 +142,6 @@ private static void runTabulationTest(String stem) {
"_contest_" + config.rawConfig.cvrFileSources.get(0).getContestId() + "_expected.csv");
assertTrue(fileCompare(session.getConvertedFilesWritten().get(0), expectedPath));
}

// test passed so cleanup test output folder
File outputFolder = new File(session.getOutputPath());
if (outputFolder.listFiles() != null) {
Expand All @@ -156,6 +157,7 @@ private static void runTabulationTest(String stem) {
}
}
}
Logger.info("Test complete.");
}

private static void compareJsons(
Expand Down Expand Up @@ -183,14 +185,68 @@ private static void compareJson(
+ "_expected_"
+ jsonType
+ ".json");
assertTrue(fileCompare(expectedPath, actualOutputPath));
Logger.info("Comparing files:\nGenerated: %s\nReference: %s", actualOutputPath, expectedPath);
if (fileCompare(expectedPath, actualOutputPath)) {
Logger.info("Files are equal.");
} else {
Logger.info("Files are different.");
fail();
}
}

@BeforeAll
static void setup() {
Logger.setup();
}

@Test
@DisplayName("NIST XML CDF 2")
void nistXmlCdf2() {
runTabulationTest("nist_xml_cdf_2");
}

@Test
@DisplayName("unisyn_xml_cdf_city_tax_collector")
void unisynXmlCdfCityTaxCollector() {
runTabulationTest("unisyn_xml_cdf_city_tax_collector");
}

@Test
@DisplayName("unisyn_xml_cdf_city_mayor")
void unisynXmlCdfCityMayor() {
runTabulationTest("unisyn_xml_cdf_city_mayor");
}

@Test
@DisplayName("unisyn_xml_cdf_city_council_member")
void unisynXmlCdfCityCouncilMember() {
runTabulationTest("unisyn_xml_cdf_city_council_member");
}

@Test
@DisplayName("unisyn_xml_cdf_city_chief_of_police")
void unisynXmlCdfCityChiefOfPolice() {
runTabulationTest("unisyn_xml_cdf_city_chief_of_police");
}

@Test
@DisplayName("unisyn_xml_cdf_city_coroner")
void unisynXmlCdfCityCoroner() {
runTabulationTest("unisyn_xml_cdf_city_coroner");
}

@Test
@DisplayName("unisyn_xml_cdf_county_sheriff")
void unisynXmlCdfCountySheriff() {
runTabulationTest("unisyn_xml_cdf_county_sheriff");
}

@Test
@DisplayName("unisyn_xml_cdf_county_coroner")
void unisynXmlCdfCountyCoroner() {
runTabulationTest("unisyn_xml_cdf_county_coroner");
}

@Test
@DisplayName("Clear Ballot - Kansas Primary")
void testClearBallotKansasPrimary() {
Expand Down
Loading