diff --git a/.github/workflows/acceptance_test.yml b/.github/workflows/acceptance_test.yml index c0fe1d3556..4759f4172e 100644 --- a/.github/workflows/acceptance_test.yml +++ b/.github/workflows/acceptance_test.yml @@ -1,5 +1,4 @@ name: Rule acceptance tests - on: pull_request: branches: [ master ] @@ -16,16 +15,24 @@ on: - 'web/**' - '.github/workflows/web_**.yml' - '.github/workflows/stg_web_**.yml' + types: [ opened, synchronize, reopened, ready_for_review ] concurrency: group: ${{ github.head_ref }} cancel-in-progress: true jobs: + fail_if_pull_request_is_draft: # Fails in order to indicate that pull request needs to be marked as ready to review to pass. + if: github.event.pull_request.draft == true + runs-on: ubuntu-latest + steps: + - name: Fail if PR is a draft + run: exit 1 pre_ci: name: Prepare CI environment + if: github.event.pull_request.draft == false # Skip this job and its dependencies if the PR is draft runs-on: ubuntu-latest steps: - name: Checkout Project - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: # We need to fetch with a depth of 2 for pull_request so we can do HEAD^2 fetch-depth: 2 @@ -48,35 +55,35 @@ jobs: needs: pre_ci runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: gradle/wrapper-validation-action@v1 + - uses: actions/checkout@v4 + - uses: gradle/actions/wrapper-validation@v3 pack-snapshot: needs: [ validate-gradle-wrapper ] runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up JDK 11 - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: java-version: '11' distribution: 'temurin' - name: Cache Gradle packages - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.gradle/caches key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle') }} restore-keys: ${{ runner.os }}-gradle + - name: Set up Gradle + uses: gradle/actions/setup-gradle@v3 - name: Package cli app jar with Gradle - uses: gradle/gradle-build-action@v2 - with: - arguments: shadowJar + run: ./gradlew shadowJar - name: Persist gtfs-validator snapshot jar - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: gtfs-validator-snapshot path: cli/build/libs/gtfs-validator-*-cli.jar - name: Persist comparator snapshot jar - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: comparator-snapshot path: output-comparator/build/libs/output-comparator-*-cli.jar @@ -84,26 +91,26 @@ jobs: needs: [ validate-gradle-wrapper ] runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: ref: master - name: Set up JDK 11 - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: java-version: '11' distribution: 'temurin' - name: Cache Gradle packages - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.gradle/caches key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle') }} restore-keys: ${{ runner.os }}-gradle + - name: Set up Gradle + uses: gradle/actions/setup-gradle@v3 - name: Package cli app jar with Gradle - uses: gradle/gradle-build-action@v2 - with: - arguments: shadowJar + run: ./gradlew shadowJar - name: Persist gtfs-validator jar from master branch - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: gtfs-validator-master path: cli/build/libs/gtfs-validator-*-cli.jar @@ -113,7 +120,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install dependencies run: | pip install -r scripts/mobility-database-harvester/requirements.txt @@ -125,7 +132,7 @@ jobs: echo "matrix=$DATASETS" >> $GITHUB_OUTPUT - name: Persist metadata if: always() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: datasets_metadata path: scripts/mobility-database-harvester/datasets_metadata @@ -139,17 +146,22 @@ jobs: strategy: matrix: ${{ fromJson(needs.fetch-urls.outputs.matrix) }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Download .jar file from master branch - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: gtfs-validator-master path: gtfs-validator-master - name: Download latest changes .jar file from previous job - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: gtfs-validator-snapshot path: gtfs-validator-snapshot + - name: Extract and concatenate IDs + run: | + concatenated_ids=$(bash ./scripts/extract_ids.sh '${{ matrix.data }}') + echo "CONCATENATED_IDS=$concatenated_ids" >> $GITHUB_ENV + echo "CONCATENATED_IDS=$concatenated_ids" - name: Run validators on queued URLs run: | queue="${{ matrix.data }}" @@ -157,25 +169,35 @@ jobs: env: OUTPUT_BASE: ${{ github.sha }} - name: Persist reports - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: - name: reports_all + name: reports_${{ env.CONCATENATED_IDS }} path: ${{ github.sha }}/output + merge-reports-artifacts: + runs-on: ubuntu-latest + needs: [ get-reports ] + steps: + - name: Merge Artifacts + uses: actions/upload-artifact/merge@v4 + with: + name: reports_all + pattern: reports_* + delete-merged: true compare-outputs: - needs: [ get-reports ] + needs: [ merge-reports-artifacts ] runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Download comparator .jar file from previous job - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: comparator-snapshot - name: Retrieve reports from previous job - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: reports_all - name: Retrieve gtfs latest versions from previous job - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: datasets_metadata - name: Generate acceptance report test @@ -193,7 +215,7 @@ jobs: --run_id ${{github.run_id}} - name: Persist acceptance test reports if: always() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: acceptance_test_report path: acceptance-test-output @@ -201,7 +223,7 @@ jobs: id: generate-comment if: always() run: | - PR_COMMENT=$(< acceptance-test-output/acceptance_report_summary.txt) + PR_COMMENT=$(< acceptance-test-output/acceptance_report_summary.md) echo "PR_COMMENT<> $GITHUB_ENV echo "$PR_COMMENT" >> $GITHUB_ENV echo "EOF" >> $GITHUB_ENV diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/io/ValidationReportDeserializer.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/io/ValidationReportDeserializer.java index 693918e9f4..3197be8f81 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/io/ValidationReportDeserializer.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/io/ValidationReportDeserializer.java @@ -42,6 +42,8 @@ public class ValidationReportDeserializer implements JsonDeserializer { private static final String NOTICES_MEMBER_NAME = "notices"; + private static final String SUMMARY_MEMBER_NAME = "summary"; + private static final String VALIDATION_TIME_MEMBER_NAME = "validationTimeSeconds"; @Override public ValidationReport deserialize( @@ -49,12 +51,19 @@ public ValidationReport deserialize( Set notices = new LinkedHashSet<>(); JsonObject rootObject = json.getAsJsonObject(); // Note that the json file contains the summary in addition to the notices, but it is ignored - // since currently the report comparison is only on the notices + // since currently the report comparison is only on the notices and the validation time. + Double validationTimeSeconds = null; + if (rootObject.has(SUMMARY_MEMBER_NAME)) { + JsonObject summaryObject = rootObject.getAsJsonObject(SUMMARY_MEMBER_NAME); + if (summaryObject.has(VALIDATION_TIME_MEMBER_NAME)) { + validationTimeSeconds = summaryObject.get(VALIDATION_TIME_MEMBER_NAME).getAsDouble(); + } + } JsonArray noticesArray = rootObject.getAsJsonArray(NOTICES_MEMBER_NAME); for (JsonElement childObject : noticesArray) { notices.add(Notice.GSON.fromJson(childObject, NoticeReport.class)); } - return new ValidationReport(notices); + return new ValidationReport(notices, validationTimeSeconds); } public static JsonObject serialize( diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/model/ValidationReport.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/model/ValidationReport.java index 1b8568645d..5ea4c76a35 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/model/ValidationReport.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/model/ValidationReport.java @@ -41,6 +41,7 @@ public class ValidationReport { .serializeSpecialFloatingPointValues() .create(); private final Set notices; + private final Double validationTimeSeconds; /** * Public constructor needed for deserialization by {@code ValidationReportDeserializer}. Only @@ -49,7 +50,19 @@ public class ValidationReport { * @param noticeReports set of {@code NoticeReport}s */ public ValidationReport(Set noticeReports) { + this(noticeReports, null); + } + + /** + * Public constructor needed for deserialization by {@code ValidationReportDeserializer}. Only + * stores information for error {@code NoticeReport}. + * + * @param noticeReports set of {@code NoticeReport}s + * @param validationTimeSeconds the time taken to validate the GTFS dataset + */ + public ValidationReport(Set noticeReports, Double validationTimeSeconds) { this.notices = Collections.unmodifiableSet(noticeReports); + this.validationTimeSeconds = validationTimeSeconds; } /** @@ -69,6 +82,10 @@ public Set getNotices() { return notices; } + public Double getValidationTimeSeconds() { + return validationTimeSeconds; + } + /** * Determines if two validation reports are equal regardless of the order of the fields in the set * of {@code NoticeReport}. diff --git a/main/src/main/java/org/mobilitydata/gtfsvalidator/report/JsonReportSummary.java b/main/src/main/java/org/mobilitydata/gtfsvalidator/report/JsonReportSummary.java index 1b61eec79a..4732ad4bde 100644 --- a/main/src/main/java/org/mobilitydata/gtfsvalidator/report/JsonReportSummary.java +++ b/main/src/main/java/org/mobilitydata/gtfsvalidator/report/JsonReportSummary.java @@ -33,6 +33,7 @@ public class JsonReportSummary { private JsonReportFeedInfo feedInfo; private List agencies; private Set files; + private Double validationTimeSeconds; @SerializedName("counts") private JsonReportCounts jsonReportCounts; @@ -65,6 +66,7 @@ public JsonReportSummary( if (feedMetadata != null) { if (feedMetadata.feedInfo != null) { this.feedInfo = new JsonReportFeedInfo(feedMetadata.feedInfo); + this.validationTimeSeconds = feedMetadata.validationTimeSeconds; } else { logger.atSevere().log( "No feed info for feed " diff --git a/main/src/main/java/org/mobilitydata/gtfsvalidator/report/model/FeedMetadata.java b/main/src/main/java/org/mobilitydata/gtfsvalidator/report/model/FeedMetadata.java index fc12302f25..b55dd11edb 100644 --- a/main/src/main/java/org/mobilitydata/gtfsvalidator/report/model/FeedMetadata.java +++ b/main/src/main/java/org/mobilitydata/gtfsvalidator/report/model/FeedMetadata.java @@ -41,6 +41,8 @@ public class FeedMetadata { public ArrayList agencies = new ArrayList<>(); private ImmutableSortedSet filenames; + public double validationTimeSeconds; + // List of features that only require checking the presence of one record in the file. private final List> FILE_BASED_FEATURES = List.of( diff --git a/main/src/main/java/org/mobilitydata/gtfsvalidator/runner/ValidationRunner.java b/main/src/main/java/org/mobilitydata/gtfsvalidator/runner/ValidationRunner.java index bd2fdbbb20..1127a25533 100644 --- a/main/src/main/java/org/mobilitydata/gtfsvalidator/runner/ValidationRunner.java +++ b/main/src/main/java/org/mobilitydata/gtfsvalidator/runner/ValidationRunner.java @@ -134,21 +134,22 @@ public Status run(ValidationRunnerConfig config) { } FeedMetadata feedMetadata = FeedMetadata.from(feedContainer, gtfsInput.getFilenames()); closeGtfsInput(gtfsInput, noticeContainer); + feedMetadata.validationTimeSeconds = (System.nanoTime() - startNanos) / 1e9; // Output exportReport(feedMetadata, noticeContainer, config, versionInfo); - printSummary(startNanos, feedContainer, feedLoader, anyTableLoader); + printSummary(feedMetadata, feedContainer, feedLoader, anyTableLoader); return Status.SUCCESS; } /** * Prints validation metadata. * - * @param startNanos start time as nanoseconds + * @param feedMetadata the {@code FeedMetadata} * @param feedContainer the {@code GtfsFeedContainer} */ public static void printSummary( - long startNanos, + FeedMetadata feedMetadata, GtfsFeedContainer feedContainer, GtfsFeedLoader loader, AnyTableLoader anyTableLoader) { @@ -198,7 +199,7 @@ public static void printSummary( logger.atSevere().log(b.toString()); } - logger.atInfo().log("Validation took %.3f seconds%n", (endNanos - startNanos) / 1e9); + logger.atInfo().log("Validation took %.3f seconds%n", feedMetadata.validationTimeSeconds); logger.atInfo().log(feedContainer.tableTotalsText()); } diff --git a/main/src/test/java/org/mobilitydata/gtfsvalidator/report/model/JsonReportSummaryTest.java b/main/src/test/java/org/mobilitydata/gtfsvalidator/report/model/JsonReportSummaryTest.java index 02aa8db483..7b554f6b3b 100644 --- a/main/src/test/java/org/mobilitydata/gtfsvalidator/report/model/JsonReportSummaryTest.java +++ b/main/src/test/java/org/mobilitydata/gtfsvalidator/report/model/JsonReportSummaryTest.java @@ -75,6 +75,7 @@ private static FeedMetadata generateFeedMetaData() { 3 // Should not be present in the resulting GSON ); feedMetadata.specFeatures = Map.of("Feature1", false, "Feature2", true); + feedMetadata.validationTimeSeconds = 100.0; return feedMetadata; } @@ -118,6 +119,7 @@ public void withFeedMetadataWithConfigTest() throws Exception { + "\"countryCode\":\"GB\"," + "\"dateForValidation\":\"2020-01-02\"," + "\"feedInfo\":{\"publisherName\":\"value1\",\"publisherUrl\":\"value2\"}," + + "\"validationTimeSeconds\":100.0," + "\"agencies\":[" + "{\"name\":\"agency1\",\"url\":\"some URL 1\",\"phone\":\"phone1\",\"email\":\"email1\"}," + "{\"name\":\"agency1\",\"url\":\"some URL 1\",\"phone\":\"phone1\",\"email\":\"email1\"}]," @@ -139,6 +141,7 @@ public void withFeedMetadataNoConfigTest() throws Exception { + "\"validatedAt\":\"now\"," + "\"threads\":0," + "\"feedInfo\":{\"publisherName\":\"value1\",\"publisherUrl\":\"value2\"}," + + "\"validationTimeSeconds\":100.0," + "\"agencies\":[" + "{\"name\":\"agency1\",\"url\":\"some URL 1\",\"phone\":\"phone1\",\"email\":\"email1\"}," + "{\"name\":\"agency1\",\"url\":\"some URL 1\",\"phone\":\"phone1\",\"email\":\"email1\"}]," diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/Main.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/Main.java index 5681e17e91..5ac6a4e409 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/Main.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/Main.java @@ -36,11 +36,15 @@ public class Main { private static final FluentLogger logger = FluentLogger.forEnclosingClass(); static final String ACCEPTANCE_REPORT_JSON = "acceptance_report.json"; - static final String ACCEPTANCE_REPORT_SUMMARY_TXT = "acceptance_report_summary.txt"; + static final String ACCEPTANCE_REPORT_SUMMARY_MD = "acceptance_report_summary.md"; private static final int IO_EXCEPTION_EXIT_CODE = 1; private static final int COMPARISON_FAILURE_EXIT_CODE = 2; private static final Gson GSON = - new GsonBuilder().serializeNulls().disableHtmlEscaping().create(); + new GsonBuilder() + .serializeNulls() + .disableHtmlEscaping() + .serializeSpecialFloatingPointValues() + .create(); public static void main(String[] argv) { int rc = run(argv); @@ -122,7 +126,7 @@ private static void exportAcceptanceReport(AcceptanceReport report, String outpu private static void exportReportSummary(String reportSummary, String outputBase) { try { Files.write( - Paths.get(outputBase, ACCEPTANCE_REPORT_SUMMARY_TXT), + Paths.get(outputBase, ACCEPTANCE_REPORT_SUMMARY_MD), reportSummary.getBytes(StandardCharsets.UTF_8)); } catch (IOException e) { logger.atSevere().withCause(e).log("Cannot store acceptance test report summary file"); diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparator.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparator.java index 8af040b991..f374c000cf 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparator.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparator.java @@ -10,6 +10,7 @@ import org.mobilitydata.gtfsvalidator.notice.SeverityLevel; import org.mobilitydata.gtfsvalidator.outputcomparator.io.ChangedNoticesCollector; import org.mobilitydata.gtfsvalidator.outputcomparator.io.CorruptedSourcesCollector; +import org.mobilitydata.gtfsvalidator.outputcomparator.io.ValidationPerformanceCollector; import org.mobilitydata.gtfsvalidator.outputcomparator.model.SourceUrlContainer; import org.mobilitydata.gtfsvalidator.outputcomparator.model.report.AcceptanceReport; @@ -57,6 +58,8 @@ public Result compareValidationRuns( args.getPercentInvalidDatasetsThreshold()); CorruptedSourcesCollector corruptedSources = new CorruptedSourcesCollector(args.getPercentCorruptedSourcesThreshold()); + ValidationPerformanceCollector validationPerformanceCollector = + new ValidationPerformanceCollector(); for (File file : reportDirs) { String sourceId = file.getName(); @@ -73,24 +76,38 @@ public Result compareValidationRuns( // in case a validation report does not exist for a sourceId we add the sourceId to // the list of corrupted sources if (!(referenceReportPath.toFile().exists() && latestReportPath.toFile().exists())) { - corruptedSources.addCorruptedSource(sourceId); + corruptedSources.addCorruptedSource( + sourceId, + referenceReportPath.toFile().exists(), + null, + latestReportPath.toFile().exists(), + null); continue; } ValidationReport referenceReport; ValidationReport latestReport; try { referenceReport = ValidationReport.fromPath(referenceReportPath); + } catch (IOException ioException) { + logger.atSevere().withCause(ioException).log("Error reading reference validation report"); + // in case a file is corrupted, add the sourceId to the list of corrupted sources + corruptedSources.addCorruptedSource(sourceId, true, false, true, null); + continue; + } + try { latestReport = ValidationReport.fromPath(latestReportPath); } catch (IOException ioException) { - logger.atSevere().withCause(ioException).log("Error reading validation reports"); + logger.atSevere().withCause(ioException).log("Error reading latest validation report"); // in case a file is corrupted, add the sourceId to the list of corrupted sources - corruptedSources.addCorruptedSource(sourceId); + corruptedSources.addCorruptedSource(sourceId, true, true, true, false); continue; } newErrors.compareValidationReports(sourceId, sourceUrl, referenceReport, latestReport); droppedErrors.compareValidationReports(sourceId, sourceUrl, latestReport, referenceReport); newWarnings.compareValidationReports(sourceId, sourceUrl, referenceReport, latestReport); droppedWarnings.compareValidationReports(sourceId, sourceUrl, latestReport, referenceReport); + validationPerformanceCollector.compareValidationReports( + sourceId, referenceReport, latestReport); } AcceptanceReport report = @@ -99,7 +116,8 @@ public Result compareValidationRuns( droppedErrors.getChangedNotices(), newWarnings.getChangedNotices(), droppedWarnings.getChangedNotices(), - corruptedSources.toReport()); + corruptedSources.toReport(), + validationPerformanceCollector.toReport()); boolean failure = newErrors.isAboveThreshold() @@ -116,6 +134,7 @@ public Result compareValidationRuns( newWarnings, droppedWarnings, corruptedSources, + validationPerformanceCollector, args); return Result.create(report, reportSummaryString, failure); @@ -149,17 +168,18 @@ private static String generateReportSummaryString( ChangedNoticesCollector newWarnings, ChangedNoticesCollector droppedWarnings, CorruptedSourcesCollector corruptedSources, + ValidationPerformanceCollector validationPerformanceCollector, Arguments args) { StringBuilder b = new StringBuilder(); - String status = failure ? "❌ Invalid acceptance test." : "✅ Rule acceptance tests passed."; - b.append(status).append('\n'); - b.append("New Errors: ").append(newErrors.generateLogString()).append('\n'); - b.append("Dropped Errors: ").append(droppedErrors.generateLogString()).append('\n'); - b.append("New Warnings: ").append(newWarnings.generateLogString()).append('\n'); - b.append("Dropped Warnings: ").append(droppedWarnings.generateLogString()).append('\n'); - b.append(corruptedSources.generateLogString()).append("\n"); + b.append("## \uD83D\uDCDD Acceptance Test Report\n"); + b.append("### \uD83D\uDCCB Summary\n"); + String status = + failure ? "❌ The rule acceptance test has failed" : "✅ The rule acceptance has passed"; + b.append(status); if (args.getCommitSha().isPresent()) { - b.append("Commit: ").append(args.getCommitSha().get()).append("\n"); + b.append(" for commit ").append(args.getCommitSha().get()).append('\n'); + } else { + b.append(".\n"); } if (args.getRunId().isPresent()) { b.append( @@ -168,7 +188,14 @@ private static String generateReportSummaryString( "https://github.com/MobilityData/gtfs-validator/actions/runs", args.getRunId().get())); } - b.append(status).append('\n'); + + b.append("### \uD83D\uDCCA Notices Comparison\n"); + b.append(newErrors.generateLogString("New Errors")).append('\n'); + b.append(droppedErrors.generateLogString("Dropped Errors")).append('\n'); + b.append(newWarnings.generateLogString("New Warnings")).append('\n'); + b.append(droppedWarnings.generateLogString("Dropped Warnings")).append("\n\n"); + b.append(corruptedSources.generateLogString()).append("\n").append("\n"); + b.append(validationPerformanceCollector.generateLogString()).append("\n"); return b.toString(); } } diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ChangedNoticesCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ChangedNoticesCollector.java index b2df53f713..c43228abfa 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ChangedNoticesCollector.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ChangedNoticesCollector.java @@ -109,20 +109,50 @@ public boolean isAboveThreshold() { return computeInvalidDatasetPercentage() >= this.percentInvalidDatasetsThreshold; } - /** Returns a human-readable string summarizing the number of invalid datasets. */ - public String generateLogString() { + /** + * Returns a human-readable string summarizing the number of invalid datasets. + * + * @param severityLevelName the name of the severity level + */ + public String generateLogString(String severityLevelName) { StringBuilder b = new StringBuilder(); + b.append("
\n").append(severityLevelName).append(" "); b.append( String.format( - "%d out of %d datasets (~%.0f%%) are invalid due to code change, which is ", + "(%d out of %d datasets, ~%.0f%%)", invalidDatasetCount, totalDatasetCount, computeInvalidDatasetPercentage())); if (isAboveThreshold()) { - b.append("above"); + b.append(" ❌\n"); } else { - b.append("less than"); + b.append(" ✅\n"); } - b.append( - String.format(" the provided threshold of %.0f%%.", this.percentInvalidDatasetsThreshold)); + + List changedNotices = getChangedNotices(); + if (!changedNotices.isEmpty()) { + if (isAboveThreshold()) { + b.append("

Details of new errors due to code change, which is above"); + } else { + b.append("

Details of new errors due to code change, which is less than"); + } + b.append( + String.format( + " the provided threshold of %.0f%%.

\n", this.percentInvalidDatasetsThreshold)); + b.append("\n| Dataset | Notice Code |\n"); + b.append("|---------|-------------|\n"); + for (ChangedNotice notice : changedNotices) { + for (AffectedSource source : notice.getAffectedSources()) { + b.append("| ") + .append(source.sourceId()) + .append(" | ") + .append(notice.noticeCode()) + .append(" |\n"); + } + } + } else { + b.append("

No changes were detected due to the code change.

\n"); + } + + b.append("
"); return b.toString(); } diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/CorruptedSourcesCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/CorruptedSourcesCollector.java index a8926a31c0..ab0b65671e 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/CorruptedSourcesCollector.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/CorruptedSourcesCollector.java @@ -1,7 +1,6 @@ package org.mobilitydata.gtfsvalidator.outputcomparator.io; import java.util.ArrayList; -import java.util.Collections; import java.util.List; import org.mobilitydata.gtfsvalidator.outputcomparator.model.report.CorruptedSources; @@ -14,7 +13,7 @@ public class CorruptedSourcesCollector { private final float percentCorruptedSourcesThreshold; private int sourceIdCount = 0; - private final List corruptedSourceIds = new ArrayList<>(); + private final List corruptedSourceDetails = new ArrayList<>(); public CorruptedSourcesCollector(float percentCorruptedSourcesThreshold) { this.percentCorruptedSourcesThreshold = percentCorruptedSourcesThreshold; @@ -22,7 +21,11 @@ public CorruptedSourcesCollector(float percentCorruptedSourcesThreshold) { /** Returns a {@link CorruptedSources} object summarizing the collected corrupted sources. */ public CorruptedSources toReport() { - Collections.sort(this.corruptedSourceIds); + this.corruptedSourceDetails.sort((a, b) -> a.sourceId.compareTo(b.sourceId)); + List corruptedSourceIds = new ArrayList<>(); + for (CorruptedSourceDetail detail : corruptedSourceDetails) { + corruptedSourceIds.add(detail.sourceId); + } return CorruptedSources.builder() .setSourceIdCount(sourceIdCount) .setCorruptedSourcesCount(corruptedSourceIds.size()) @@ -36,8 +39,14 @@ public void addSource() { this.sourceIdCount++; } - public void addCorruptedSource(String sourceId) { - this.corruptedSourceIds.add(sourceId); + public void addCorruptedSource( + String sourceId, + Boolean refExists, + Boolean refReadable, + Boolean latestExists, + Boolean latestReadable) { + this.corruptedSourceDetails.add( + new CorruptedSourceDetail(sourceId, refExists, refReadable, latestExists, latestReadable)); } public boolean isAboveThreshold() { @@ -46,24 +55,34 @@ public boolean isAboveThreshold() { public String generateLogString() { StringBuilder b = new StringBuilder(); + b.append("### 🛡️ Corruption Check\n"); + if (corruptedSourceDetails.isEmpty()) { + b.append( + String.format( + "%d out of %d sources (~%.0f %%) are corrupted.\n\n", + 0, sourceIdCount, computeCorruptedSourcesPercentage())); + return b.toString(); + } b.append( String.format( - "%d out of %d sources (~%.0f %%) are corrupted", - corruptedSourceIds.size(), sourceIdCount, computeCorruptedSourcesPercentage())); - if (isAboveThreshold()) { + "
\n%d out of %d sources (~%.0f %%) are corrupted.\n\n", + corruptedSourceDetails.size(), sourceIdCount, computeCorruptedSourcesPercentage())); + b.append( + "| Dataset | Ref Report Exists | Ref Report Readable | Latest Report Exists | Latest Report Readable |\n"); + b.append( + "|-----------|-------------------|---------------------|----------------------|------------------------|\n"); + + for (CorruptedSourceDetail detail : corruptedSourceDetails) { b.append( String.format( - ", which is greater than or equal to the provided threshold of %.0f%%", - percentCorruptedSourcesThreshold)); - } - b.append("."); - if (!corruptedSourceIds.isEmpty()) { - b.append("\n"); - b.append("Corrupted sources:"); - for (String sourceId : corruptedSourceIds) { - b.append("\n").append(sourceId); - } + "| %s | %s | %s | %s | %s |\n", + detail.sourceId, + detail.refExists ? "✅" : "❌", + detail.refReadable == null ? "N/A" : (detail.refReadable ? "✅" : "❌"), + detail.latestExists ? "✅" : "❌", + detail.latestReadable == null ? "N/A" : (detail.latestReadable ? "✅" : "❌"))); } + b.append("
"); return b.toString(); } @@ -72,6 +91,27 @@ float computeCorruptedSourcesPercentage() { if (sourceIdCount == 0) { return 0f; } - return 100f * corruptedSourceIds.size() / sourceIdCount; + return 100f * corruptedSourceDetails.size() / sourceIdCount; + } + + private static class CorruptedSourceDetail { + String sourceId; + Boolean refExists; + Boolean refReadable; + Boolean latestExists; + Boolean latestReadable; + + CorruptedSourceDetail( + String sourceId, + Boolean refExists, + Boolean refReadable, + Boolean latestExists, + Boolean latestReadable) { + this.sourceId = sourceId; + this.refExists = refExists; + this.refReadable = refReadable; + this.latestExists = latestExists; + this.latestReadable = latestReadable; + } } } diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java new file mode 100644 index 0000000000..eadd1861ee --- /dev/null +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollector.java @@ -0,0 +1,207 @@ +package org.mobilitydata.gtfsvalidator.outputcomparator.io; + +import java.util.*; +import org.mobilitydata.gtfsvalidator.model.ValidationReport; +import org.mobilitydata.gtfsvalidator.outputcomparator.model.report.ValidationPerformance; + +public class ValidationPerformanceCollector { + + private final Map referenceTimes; + private final Map latestTimes; + + public ValidationPerformanceCollector() { + this.referenceTimes = new HashMap<>(); + this.latestTimes = new HashMap<>(); + } + + public void addReferenceTime(String sourceId, Double time) { + referenceTimes.put(sourceId, time); + } + + public void addLatestTime(String sourceId, Double time) { + latestTimes.put(sourceId, time); + } + + private Double computeAverage(List times) { + return times.stream().mapToDouble(Double::doubleValue).average().orElse(Double.NaN); + } + + private Double computeMedian(List times) { + if (times.isEmpty()) { + return Double.NaN; + } + int size = times.size(); + List sortedTimes = new ArrayList<>(times); + Collections.sort(sortedTimes); + Double median; + if (size % 2 == 0) { + median = (sortedTimes.get(size / 2 - 1) + sortedTimes.get(size / 2)) / 2.0; + } else { + median = sortedTimes.get(size / 2); + } + return median; + } + + private Double computeStandardDeviation(List times) { + double mean = computeAverage(times); + return Math.sqrt( + times.stream().mapToDouble(time -> Math.pow(time - mean, 2)).average().orElse(Double.NaN)); + } + + private Double computeMax(List times) { + return times.stream().mapToDouble(Double::doubleValue).max().orElse(Double.NaN); + } + + private Double computeMin(List times) { + return times.stream().mapToDouble(Double::doubleValue).min().orElse(Double.NaN); + } + + private String formatMetrics(String metric, String datasetId, Double reference, Double latest) { + String diff; + if (reference.isNaN() || latest.isNaN()) { + diff = "N/A"; + } else { + double difference = latest - reference; + String arrow = difference > 0 ? "⬆️+" : "⬇️"; + diff = String.format("%s%.2f", arrow, difference); + } + return String.format( + "| %s | %s | %.2f | %.2f | %s |\n", metric, datasetId, reference, latest, diff); + } + + public String generateLogString() { + StringBuilder b = new StringBuilder(); + b.append("### ⏱️ Performance Assessment\n") + .append("\n") + .append("
\n") + .append("📈 Validation Time\n") + .append( + "

Assess the performance in terms of seconds taken for the validation process.

\n") + .append("\n") + .append( + "| Time Metric | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n") + .append( + "|-----------------------------|-------------------|----------------|----------------|----------------|\n"); + + List warnings = new ArrayList<>(); + List allReferenceTimes = new ArrayList<>(); + List allLatestTimes = new ArrayList<>(); + Set allKeys = new HashSet<>(referenceTimes.keySet()); + allKeys.addAll(latestTimes.keySet()); + + for (String groupId : allKeys) { + Double referenceTimes = this.referenceTimes.getOrDefault(groupId, Double.NaN); + Double latestTimes = this.latestTimes.getOrDefault(groupId, Double.NaN); + + if (Double.isNaN(referenceTimes) || Double.isNaN(latestTimes)) { + warnings.add(groupId); + continue; + } + + allReferenceTimes.add(referenceTimes); + allLatestTimes.add(latestTimes); + } + + if (!allReferenceTimes.isEmpty() && !allLatestTimes.isEmpty()) { + Double avgReference = computeAverage(allReferenceTimes); + Double avgLatest = computeAverage(allLatestTimes); + Double medianReference = computeMedian(allReferenceTimes); + Double medianLatest = computeMedian(allLatestTimes); + Double stdDevReference = computeStandardDeviation(allReferenceTimes); + Double stdDevLatest = computeStandardDeviation(allLatestTimes); + + b.append(formatMetrics("Average", "--", avgReference, avgLatest)) + .append(formatMetrics("Median", "--", medianReference, medianLatest)) + .append(formatMetrics("Standard Deviation", "--", stdDevReference, stdDevLatest)); + } + + if (!allReferenceTimes.isEmpty()) { + Double minReference = computeMin(allReferenceTimes); + String minReferenceId = + referenceTimes.entrySet().stream() + .filter(entry -> Objects.equals(entry.getValue(), minReference)) + .map(Map.Entry::getKey) + .findFirst() + .orElse("N/A"); + + Double maxReference = computeMax(allReferenceTimes); + String maxReferenceId = + referenceTimes.entrySet().stream() + .filter(entry -> Objects.equals(entry.getValue(), maxReference)) + .map(Map.Entry::getKey) + .findFirst() + .orElse("N/A"); + + Double minLatest = latestTimes.getOrDefault(minReferenceId, Double.NaN); + Double maxLatest = latestTimes.getOrDefault(maxReferenceId, Double.NaN); + + b.append( + formatMetrics( + "Minimum in References Reports", minReferenceId, minReference, minLatest)) + .append( + formatMetrics( + "Maximum in Reference Reports", maxReferenceId, maxReference, maxLatest)); + } + + if (!allLatestTimes.isEmpty()) { + Double minLatest = computeMin(allLatestTimes); + String minLatestId = + latestTimes.entrySet().stream() + .filter(entry -> Objects.equals(entry.getValue(), minLatest)) + .map(Map.Entry::getKey) + .findFirst() + .orElse("N/A"); + + Double maxLatest = computeMax(allLatestTimes); + String maxLatestId = + latestTimes.entrySet().stream() + .filter(entry -> Objects.equals(entry.getValue(), maxLatest)) + .map(Map.Entry::getKey) + .findFirst() + .orElse("N/A"); + + Double minReference = referenceTimes.getOrDefault(minLatestId, Double.NaN); + Double maxReference = referenceTimes.getOrDefault(maxLatestId, Double.NaN); + + b.append(formatMetrics("Minimum in Latest Reports", minLatestId, minReference, minLatest)) + .append(formatMetrics("Maximum in Latest Reports", maxLatestId, maxReference, maxLatest)); + } + + // Add warning message for feeds that are missing validation times either in reference or latest + if (!warnings.isEmpty()) { + b.append("#### ⚠️ Warnings\n") + .append("\n") + .append( + "The following dataset IDs are missing validation times either in reference or latest:\n") + .append(String.join(", ", warnings)) + .append("\n\n"); + } + b.append("
\n\n"); + + return b.toString(); + } + + public void compareValidationReports( + String sourceId, ValidationReport referenceReport, ValidationReport latestReport) { + if (referenceReport.getValidationTimeSeconds() != null) { + addReferenceTime(sourceId, referenceReport.getValidationTimeSeconds()); + } + if (latestReport.getValidationTimeSeconds() != null) { + addLatestTime(sourceId, latestReport.getValidationTimeSeconds()); + } + } + + public List toReport() { + List affectedSources = new ArrayList<>(); + for (String sourceId : referenceTimes.keySet()) { + Double referenceTime = referenceTimes.getOrDefault(sourceId, Double.NaN); + Double latestTime = latestTimes.getOrDefault(sourceId, Double.NaN); + if (!(referenceTime.isNaN() && latestTime.isNaN())) { + affectedSources.add( + ValidationPerformance.create( + sourceId, referenceTime, latestTime, latestTime - referenceTime)); + } + } + return affectedSources; + } +} diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/model/report/AcceptanceReport.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/model/report/AcceptanceReport.java index dc5844f79f..db75522767 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/model/report/AcceptanceReport.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/model/report/AcceptanceReport.java @@ -17,13 +17,21 @@ public abstract class AcceptanceReport { public abstract CorruptedSources corruptedSources(); + public abstract List validationPerformance(); + public static AcceptanceReport create( List newErrors, List droppedErrors, List newWarnings, List droppedWarnings, - CorruptedSources corruptedSources) { + CorruptedSources corruptedSources, + List validationPerformance) { return new AutoValue_AcceptanceReport( - newErrors, droppedErrors, newWarnings, droppedWarnings, corruptedSources); + newErrors, + droppedErrors, + newWarnings, + droppedWarnings, + corruptedSources, + validationPerformance); } } diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/model/report/ChangedNotice.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/model/report/ChangedNotice.java index 11afa8ff99..14f80d3ba0 100644 --- a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/model/report/ChangedNotice.java +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/model/report/ChangedNotice.java @@ -51,4 +51,8 @@ public boolean equals(Object o) { public String toString() { return noticeCode + " " + affectedSources; } + + public List getAffectedSources() { + return affectedSources; + } } diff --git a/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/model/report/ValidationPerformance.java b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/model/report/ValidationPerformance.java new file mode 100644 index 0000000000..1280ced52d --- /dev/null +++ b/output-comparator/src/main/java/org/mobilitydata/gtfsvalidator/outputcomparator/model/report/ValidationPerformance.java @@ -0,0 +1,24 @@ +package org.mobilitydata.gtfsvalidator.outputcomparator.model.report; + +import com.google.auto.value.AutoValue; + +/** Represents the performance of the validation process for a specific source. */ +@AutoValue +public abstract class ValidationPerformance { + public abstract String sourceId(); + + public abstract Double referenceValidationTimeSeconds(); + + public abstract Double latestValidationTimeSeconds(); + + public abstract Double differenceSeconds(); + + public static ValidationPerformance create( + String sourceId, + Double referenceValidationTimeSeconds, + Double latestValidationTimeSeconds, + Double differenceSeconds) { + return new AutoValue_ValidationPerformance( + sourceId, referenceValidationTimeSeconds, latestValidationTimeSeconds, differenceSeconds); + } +} diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java index 59b3d347c0..18e8d66e56 100644 --- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java +++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/cli/ValidationReportComparatorTest.java @@ -107,13 +107,41 @@ public void addedErrorNotice_summaryString() throws Exception { assertThat(result.reportSummary()) .isEqualTo( - "❌ Invalid acceptance test.\n" - + "New Errors: 1 out of 2 datasets (~50%) are invalid due to code change, which is above the provided threshold of 25%.\n" - + "Dropped Errors: 0 out of 2 datasets (~0%) are invalid due to code change, which is less than the provided threshold of 25%.\n" - + "New Warnings: 0 out of 2 datasets (~0%) are invalid due to code change, which is less than the provided threshold of 25%.\n" - + "Dropped Warnings: 0 out of 2 datasets (~0%) are invalid due to code change, which is less than the provided threshold of 25%.\n" - + "0 out of 2 sources (~0 %) are corrupted.\n" - + "❌ Invalid acceptance test.\n"); + "## \uD83D\uDCDD Acceptance Test Report\n" + + "### \uD83D\uDCCB Summary\n" + + "❌ The rule acceptance test has failed.\n" + + "### \uD83D\uDCCA Notices Comparison\n" + + "
\n" + + "New Errors (1 out of 2 datasets, ~50%) ❌\n" + + "

Details of new errors due to code change, which is above the provided threshold of 25%.

\n" + + "\n" + + "| Dataset | Notice Code |\n" + + "|---------|-------------|\n" + + "| feed-id-b | missing_required_file |\n" + + "
\n" + + "
\n" + + "Dropped Errors (0 out of 2 datasets, ~0%) ✅\n" + + "

No changes were detected due to the code change.

\n" + + "
\n" + + "
\n" + + "New Warnings (0 out of 2 datasets, ~0%) ✅\n" + + "

No changes were detected due to the code change.

\n" + + "
\n" + + "
\n" + + "Dropped Warnings (0 out of 2 datasets, ~0%) ✅\n" + + "

No changes were detected due to the code change.

\n" + + "
\n" + + "\n" + + "### \uD83D\uDEE1\uFE0F Corruption Check\n" + + "0 out of 2 sources (~0 %) are corrupted.\n\n\n\n" + + "### ⏱️ Performance Assessment\n\n" + + "
\n" + + "📈 Validation Time\n" + + "

Assess the performance in terms of seconds taken for the validation process.

\n" + + "\n" + + "| Time Metric | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n" + + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" + + "
\n\n\n"); } @Test diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ChangedNoticesCollectorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ChangedNoticesCollectorTest.java index e06cef781d..9edd3849b2 100644 --- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ChangedNoticesCollectorTest.java +++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ChangedNoticesCollectorTest.java @@ -33,9 +33,12 @@ public void testBasicFunctionality() { assertThat(collector.isAboveThreshold()).isFalse(); assertThat(collector.computeInvalidDatasetPercentage()).isWithin(1f).of(0f); assertThat(collector.getChangedNotices()).isEmpty(); - assertThat(collector.generateLogString()) + assertThat(collector.generateLogString("Test")) .isEqualTo( - "0 out of 1 datasets (~0%) are invalid due to code change, which is less than the provided threshold of 25%."); + "
\n" + + "Test (0 out of 1 datasets, ~0%) ✅\n" + + "

No changes were detected due to the code change.

\n" + + "
"); // Second source has an additional error. collector.compareValidationReports( @@ -50,9 +53,16 @@ public void testBasicFunctionality() { .containsExactly( new ChangedNotice("error_b") .addAffectedSource(AffectedSource.create("source-b", "source-b-url", 2))); - assertThat(collector.generateLogString()) + assertThat(collector.generateLogString("Test")) .isEqualTo( - "1 out of 2 datasets (~50%) are invalid due to code change, which is above the provided threshold of 25%."); + "
\n" + + "Test (1 out of 2 datasets, ~50%) ❌\n" + + "

Details of new errors due to code change, which is above the provided threshold of 25%.

\n" + + "\n" + + "| Dataset | Notice Code |\n" + + "|---------|-------------|\n" + + "| source-b | error_b |\n" + + "
"); } @Test @@ -74,9 +84,16 @@ public void testInvalidErrorThreshold() { .containsExactly( new ChangedNotice("error_b") .addAffectedSource(AffectedSource.create("source-a", "source-a-url", 5))); - assertThat(collector.generateLogString()) + assertThat(collector.generateLogString("Test")) .isEqualTo( - "0 out of 1 datasets (~0%) are invalid due to code change, which is less than the provided threshold of 25%."); + "
\n" + + "Test (0 out of 1 datasets, ~0%) ✅\n" + + "

Details of new errors due to code change, which is less than the provided threshold of 25%.

\n" + + "\n" + + "| Dataset | Notice Code |\n" + + "|---------|-------------|\n" + + "| source-a | error_b |\n" + + "
"); } private ValidationReport report(ImmutableMap error_codes_and_counts) { diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/CorruptedSourcesCollectorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/CorruptedSourcesCollectorTest.java index f255666b85..2714c00433 100644 --- a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/CorruptedSourcesCollectorTest.java +++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/CorruptedSourcesCollectorTest.java @@ -3,6 +3,7 @@ import static com.google.common.truth.Truth.assertThat; import java.util.Arrays; +import java.util.List; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -24,14 +25,17 @@ public void testBasicFunctionality() { CorruptedSources.builder() .setSourceIdCount(1) .setCorruptedSourcesCount(0) - .setCorruptedSources(Arrays.asList()) + .setCorruptedSources(List.of()) .setPercentCorruptedSourcesThreshold(25) .setAboveThreshold(false) .build()); - assertThat(collector.generateLogString()).isEqualTo("0 out of 1 sources (~0 %) are corrupted."); + assertThat(normalizeWhitespace(collector.generateLogString())) + .isEqualTo( + normalizeWhitespace( + "### 🛡️ Corruption Check\n0 out of 1 sources (~0 %) are corrupted.\n\n")); collector.addSource(); - collector.addCorruptedSource("source-a"); + collector.addCorruptedSource("source-a", true, true, false, false); assertThat(collector.isAboveThreshold()).isTrue(); assertThat(collector.computeCorruptedSourcesPercentage()).isWithin(1f).of(50f); @@ -44,10 +48,17 @@ public void testBasicFunctionality() { .setPercentCorruptedSourcesThreshold(25) .setAboveThreshold(true) .build()); - assertThat(collector.generateLogString()) + assertThat(normalizeWhitespace(collector.generateLogString())) .isEqualTo( - "1 out of 2 sources (~50 %) are corrupted, which is greater than or equal to the provided threshold of 25%.\n" - + "Corrupted sources:\n" - + "source-a"); + normalizeWhitespace( + "### 🛡️ Corruption Check\n
\n1 out of 2 sources (~50 %) are corrupted.\n\n" + + "| Dataset | Ref Report Exists | Ref Report Readable | Latest Report Exists | Latest Report Readable |\n" + + "|-----------|-------------------|---------------------|----------------------|------------------------|\n" + + "| source-a | ✅ | ✅ | ❌ | ❌ |\n" + + "
")); + } + + private String normalizeWhitespace(String input) { + return input.replaceAll("\\s+", " ").trim(); } } diff --git a/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java new file mode 100644 index 0000000000..962331fc32 --- /dev/null +++ b/output-comparator/src/test/java/org/mobilitydata/gtfsvalidator/outputcomparator/io/ValidationPerformanceCollectorTest.java @@ -0,0 +1,44 @@ +package org.mobilitydata.gtfsvalidator.outputcomparator.io; + +import static com.google.common.truth.Truth.assertThat; + +import org.junit.Test; + +public class ValidationPerformanceCollectorTest { + + @Test + public void generateLogString_test() { + ValidationPerformanceCollector collector = new ValidationPerformanceCollector(); + + // Adding some sample data + collector.addReferenceTime("feed-id-a", 12.0); + collector.addReferenceTime("feed-id-a", 14.0); + collector.addLatestTime("feed-id-a", 16.0); + collector.addLatestTime("feed-id-a", 18.0); + + collector.addReferenceTime("feed-id-b", 20.0); + collector.addLatestTime("feed-id-b", 22.0); + + // Generating the log string + String logString = collector.generateLogString(); + String expectedLogString = + "### ⏱️ Performance Assessment\n" + + "\n" + + "
\n" + + "📈 Validation Time\n" + + "

Assess the performance in terms of seconds taken for the validation process.

\n" + + "\n" + + "| Time Metric | Dataset ID | Reference (s) | Latest (s) | Difference (s) |\n" + + "|-----------------------------|-------------------|----------------|----------------|----------------|\n" + + "| Average | -- | 17.00 | 20.00 | ⬆\uFE0F+3.00 |\n" + + "| Median | -- | 17.00 | 20.00 | ⬆\uFE0F+3.00 |\n" + + "| Standard Deviation | -- | 3.00 | 2.00 | ⬇\uFE0F-1.00 |\n" + + "| Minimum in References Reports | feed-id-a | 14.00 | 18.00 | ⬆\uFE0F+4.00 |\n" + + "| Maximum in Reference Reports | feed-id-b | 20.00 | 22.00 | ⬆️+2.00 |\n" + + "| Minimum in Latest Reports | feed-id-a | 14.00 | 18.00 | ⬆\uFE0F+4.00 |\n" + + "| Maximum in Latest Reports | feed-id-b | 20.00 | 22.00 | ⬆️+2.00 |\n" + + "
\n\n"; + // Assert that the generated log string matches the expected log string + assertThat(logString).isEqualTo(expectedLogString); + } +} diff --git a/output-comparator/src/test/resources/org/mobilitydata/gtfsvalidator/outputcomparator/cli/MainTest-droppedNoticeTypes.json b/output-comparator/src/test/resources/org/mobilitydata/gtfsvalidator/outputcomparator/cli/MainTest-droppedNoticeTypes.json index 524ee1457a..4c5fe4bd53 100644 --- a/output-comparator/src/test/resources/org/mobilitydata/gtfsvalidator/outputcomparator/cli/MainTest-droppedNoticeTypes.json +++ b/output-comparator/src/test/resources/org/mobilitydata/gtfsvalidator/outputcomparator/cli/MainTest-droppedNoticeTypes.json @@ -33,5 +33,6 @@ "corruptedSources": [], "percentCorruptedSourcesThreshold": 5.0, "aboveThreshold": false - } + }, + "validationPerformance": [] } \ No newline at end of file diff --git a/output-comparator/src/test/resources/org/mobilitydata/gtfsvalidator/outputcomparator/cli/MainTest-newNoticeTypes.json b/output-comparator/src/test/resources/org/mobilitydata/gtfsvalidator/outputcomparator/cli/MainTest-newNoticeTypes.json index 8cf93e2eab..ef6cd24670 100644 --- a/output-comparator/src/test/resources/org/mobilitydata/gtfsvalidator/outputcomparator/cli/MainTest-newNoticeTypes.json +++ b/output-comparator/src/test/resources/org/mobilitydata/gtfsvalidator/outputcomparator/cli/MainTest-newNoticeTypes.json @@ -81,5 +81,6 @@ "corruptedSources": [], "percentCorruptedSourcesThreshold": 5.0, "aboveThreshold": false - } + }, + "validationPerformance": [] } \ No newline at end of file diff --git a/output-comparator/src/test/resources/org/mobilitydata/gtfsvalidator/outputcomparator/cli/MainTest-noNewNoticeTypes.json b/output-comparator/src/test/resources/org/mobilitydata/gtfsvalidator/outputcomparator/cli/MainTest-noNewNoticeTypes.json index 5986bab16c..69eb374b27 100644 --- a/output-comparator/src/test/resources/org/mobilitydata/gtfsvalidator/outputcomparator/cli/MainTest-noNewNoticeTypes.json +++ b/output-comparator/src/test/resources/org/mobilitydata/gtfsvalidator/outputcomparator/cli/MainTest-noNewNoticeTypes.json @@ -9,5 +9,6 @@ "corruptedSources": [], "percentCorruptedSourcesThreshold": 5.0, "aboveThreshold": false - } + }, + "validationPerformance": [] } \ No newline at end of file diff --git a/output-comparator/src/test/resources/org/mobilitydata/gtfsvalidator/outputcomparator/cli/MainTest-tooManyCorruptedSources.json b/output-comparator/src/test/resources/org/mobilitydata/gtfsvalidator/outputcomparator/cli/MainTest-tooManyCorruptedSources.json index bcaf88f3e8..be2ce3a323 100644 --- a/output-comparator/src/test/resources/org/mobilitydata/gtfsvalidator/outputcomparator/cli/MainTest-tooManyCorruptedSources.json +++ b/output-comparator/src/test/resources/org/mobilitydata/gtfsvalidator/outputcomparator/cli/MainTest-tooManyCorruptedSources.json @@ -12,5 +12,6 @@ ], "percentCorruptedSourcesThreshold": 2.0, "aboveThreshold": true - } + }, + "validationPerformance": [] } \ No newline at end of file diff --git a/scripts/common.sh b/scripts/common.sh new file mode 100644 index 0000000000..eb7a746b2a --- /dev/null +++ b/scripts/common.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +function format_json() { + local raw_data="$1" + local closing_curly_bracket="}" + + # Replace parts to form valid JSON objects + raw_data=${raw_data//\{id/\{\"id\"} + raw_data=${raw_data//,/\",} + raw_data=${raw_data//\,url/\,\"url\"} + raw_data=${raw_data//\":/\":\"} + raw_data=${raw_data//$closing_curly_bracket/\"$closing_curly_bracket} + + # Correct the improperly escaped double quotes + raw_data=${raw_data//\"\"/\"} + + echo "$raw_data" +} + +function extract_last_number() { + local id="$1" + results=$(echo "$id" | grep -oE '[0-9]+$') +} diff --git a/scripts/extract_ids.sh b/scripts/extract_ids.sh new file mode 100644 index 0000000000..9f8964a7fe --- /dev/null +++ b/scripts/extract_ids.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +source "$(dirname "$0")/common.sh" + +raw_queue_string="$1" +IFS=' ' read -r -a queue <<< "$raw_queue_string" + +concatenated_ids="" +for item in "${queue[@]}" +do + item=$(format_json "$item") + + ID=$(jq -r '.id' <<< "$item") + number=$(echo "$ID" | grep -oE '[0-9]+$') + + if [ -z "$concatenated_ids" ]; then + concatenated_ids="$number" + else + concatenated_ids="${concatenated_ids}_$number" + fi +done + +echo "$concatenated_ids" diff --git a/scripts/queue_runner.sh b/scripts/queue_runner.sh index 0715311e5a..c8a4183af0 100644 --- a/scripts/queue_runner.sh +++ b/scripts/queue_runner.sh @@ -1,15 +1,13 @@ #!/bin/bash -closing_curly_bracket="}" + +source "$(dirname "$0")/common.sh" + master="$1" raw_queue_string="${@:2}" IFS=" " read -a queue <<< $raw_queue_string for item in "${queue[@]}" do - item=${item//\{id/\{\"id\"} - item=${item//,/\",} - item=${item//\,url/\,\"url\"} - item=${item//\":/\":\"} - item=${item//$closing_curly_bracket/\"$closing_curly_bracket} + item=$(format_json "$item") ID=$(jq '.id' <<< "$item") URL=$(jq '.url' <<< "$item")