Skip to content

Commit

Permalink
Merge ca8dba8 into 417a53a
Browse files Browse the repository at this point in the history
  • Loading branch information
jcpitre committed Jun 21, 2023
2 parents 417a53a + ca8dba8 commit 2930520
Show file tree
Hide file tree
Showing 21 changed files with 371 additions and 202 deletions.
259 changes: 131 additions & 128 deletions RULES.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.mobilitydata.gtfsvalidator.notice;

import static org.mobilitydata.gtfsvalidator.annotation.GtfsValidationNotice.SectionRef.TERM_DEFINITIONS;
import static org.mobilitydata.gtfsvalidator.notice.SeverityLevel.WARNING;

import org.mobilitydata.gtfsvalidator.annotation.GtfsValidationNotice;
import org.mobilitydata.gtfsvalidator.annotation.GtfsValidationNotice.SectionRefs;

/** A recommended column is missing in the input file. */
@GtfsValidationNotice(severity = WARNING, sections = @SectionRefs(TERM_DEFINITIONS))
public class MissingRecommendedColumnNotice extends ValidationNotice {

/** The name of the faulty file. */
private final String filename;

/** The name of the missing column. */
private final String fieldName;

public MissingRecommendedColumnNotice(String filename, String fieldName) {
this.filename = filename;
this.fieldName = fieldName;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ public class NoticeContainer {
private final List<ResolvedNotice<SystemError>> systemErrors = new ArrayList<>();
private final Map<String, Integer> noticesCountPerTypeAndSeverity = new HashMap<>();
private boolean hasValidationErrors = false;
private boolean hasValidationWarnings = false;

/**
* Used to specify limits on amount of notices in this {@code NoticeContainer}.
Expand Down Expand Up @@ -99,6 +100,10 @@ public void addValidationNoticeWithSeverity(
if (resolved.isError()) {
hasValidationErrors = true;
}
if (resolved.isWarning()) {
hasValidationWarnings = true;
}

updateNoticeCount(resolved);
if (validationNotices.size() >= maxTotalValidationNotices
|| noticesCountPerTypeAndSeverity.get(resolved.getMappingKey())
Expand Down Expand Up @@ -147,6 +152,7 @@ public void addAll(NoticeContainer otherContainer) {
validationNotices.addAll(otherContainer.validationNotices);
systemErrors.addAll(otherContainer.systemErrors);
hasValidationErrors |= otherContainer.hasValidationErrors;
hasValidationWarnings |= otherContainer.hasValidationWarnings;
for (Entry<String, Integer> entry : otherContainer.noticesCountPerTypeAndSeverity.entrySet()) {
int count = noticesCountPerTypeAndSeverity.getOrDefault(entry.getKey(), 0);
noticesCountPerTypeAndSeverity.put(entry.getKey(), count + entry.getValue());
Expand All @@ -158,6 +164,11 @@ public boolean hasValidationErrors() {
return hasValidationErrors;
}

/** Tells if this container has any {@code ValidationNotice} that is a warning. */
public boolean hasValidationWarnings() {
return hasValidationWarnings;
}

public List<ResolvedNotice<ValidationNotice>> getResolvedValidationNotices() {
return validationNotices;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,16 @@ public int hashCode() {
public boolean isError() {
return getSeverityLevel().ordinal() >= SeverityLevel.ERROR.ordinal();
}

/**
* Tells if this notice is a {@code WARNING}.
*
* <p>This method is preferred to checking {@code severityLevel} directly since more levels may be
* added in the future.
*
* @return true if this notice is a warning, false otherwise
*/
public boolean isWarning() {
return getSeverityLevel() == SeverityLevel.WARNING;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,10 @@ private static NoticeContainer validateHeaders(
.filter(GtfsColumnDescriptor::headerRequired)
.map(GtfsColumnDescriptor::columnName)
.collect(Collectors.toSet()),
columnDescriptors.stream()
.filter(GtfsColumnDescriptor::headerRecommended)
.map(GtfsColumnDescriptor::columnName)
.collect(Collectors.toSet()),
headerNotices);
return headerNotices;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ public abstract class GtfsColumnDescriptor {

public abstract boolean headerRequired();

public abstract boolean headerRecommended();

public abstract FieldLevelEnum fieldLevel();

public abstract Optional<RowParser.NumberBounds> numberBounds();
Expand All @@ -33,6 +35,8 @@ public abstract static class Builder {

public abstract Builder setHeaderRequired(boolean value);

public abstract Builder setHeaderRecommended(boolean value);

public abstract Builder setFieldLevel(FieldLevelEnum value);

public abstract Builder setNumberBounds(Optional<RowParser.NumberBounds> value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.util.TreeSet;
import org.mobilitydata.gtfsvalidator.notice.DuplicatedColumnNotice;
import org.mobilitydata.gtfsvalidator.notice.EmptyColumnNameNotice;
import org.mobilitydata.gtfsvalidator.notice.MissingRecommendedColumnNotice;
import org.mobilitydata.gtfsvalidator.notice.MissingRequiredColumnNotice;
import org.mobilitydata.gtfsvalidator.notice.NoticeContainer;
import org.mobilitydata.gtfsvalidator.notice.UnknownColumnNotice;
Expand All @@ -36,14 +37,20 @@ public void validate(
CsvHeader actualHeader,
Set<String> supportedColumns,
Set<String> requiredColumns,
Set<String> recommendedColumns,
NoticeContainer noticeContainer) {
if (actualHeader.getColumnCount() == 0) {
// This is an empty file.
return;
}
Map<String, Integer> columnIndices = new HashMap<>();
// Sorted tree set for stable order of notices.
// Sorted tree set of all the columns for stable order of notices.
// We remove the columns that are properly present and well formed from that set, and at the
// end only the missing required columns are left in the set.
TreeSet<String> missingColumns = new TreeSet<>(requiredColumns);
// We also want to find the recommended columns that are absent. We use the same scheme for
// these.
TreeSet<String> missingRecommendedColumns = new TreeSet<>(recommendedColumns);
for (int i = 0; i < actualHeader.getColumnCount(); ++i) {
String column = actualHeader.getColumnName(i);
// Column indices are zero-based. We add 1 to make them 1-based.
Expand All @@ -59,12 +66,23 @@ public void validate(
if (!supportedColumns.contains(column)) {
noticeContainer.addValidationNotice(new UnknownColumnNotice(filename, column, i + 1));
}

// If the column is present, it should not be in the missing required columns set
missingColumns.remove(column);

// If the column is present, it should not be in the missing recommended columns set
missingRecommendedColumns.remove(column);
}
if (!missingColumns.isEmpty()) {
for (String column : missingColumns) {
noticeContainer.addValidationNotice(new MissingRequiredColumnNotice(filename, column));
}
}

if (!missingRecommendedColumns.isEmpty()) {
for (String column : missingRecommendedColumns) {
noticeContainer.addValidationNotice(new MissingRecommendedColumnNotice(filename, column));
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,6 @@ void validate(
CsvHeader actualHeader,
Set<String> supportedHeaders,
Set<String> requiredHeaders,
Set<String> recommendedHeaders,
NoticeContainer noticeContainer);
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ public boolean headerRequired() {
return false;
}

@Override
public boolean headerRecommended() {
return false;
}

@Override
public FieldLevelEnum fieldLevel() {
return FieldLevelEnum.REQUIRED;
Expand Down Expand Up @@ -141,6 +146,7 @@ public void asString_recommended_missing() {
GtfsColumnDescriptor.builder()
.setColumnName("column name")
.setHeaderRequired(false)
.setHeaderRecommended(false)
.setFieldLevel(FieldLevelEnum.RECOMMENDED)
.setIsMixedCase(false)
.setIsCached(false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ public void validate(
CsvHeader actualHeader,
Set<String> supportedHeaders,
Set<String> requiredHeaders,
Set<String> recommendedHeaders,
NoticeContainer noticeContainer) {
noticeContainer.addValidationNotice(headerValidationNotice);
}
Expand Down Expand Up @@ -144,13 +145,15 @@ public void missingRequiredField() {
GtfsColumnDescriptor.builder()
.setColumnName(GtfsTestEntity.ID_FIELD_NAME)
.setHeaderRequired(true)
.setHeaderRecommended(false)
.setFieldLevel(FieldLevelEnum.REQUIRED)
.setIsMixedCase(false)
.setIsCached(false)
.build(),
GtfsColumnDescriptor.builder()
.setColumnName(GtfsTestEntity.CODE_FIELD_NAME)
.setHeaderRequired(false)
.setHeaderRecommended(false)
.setFieldLevel(FieldLevelEnum.REQUIRED)
.setIsMixedCase(false)
.setIsCached(false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ public ImmutableList<GtfsColumnDescriptor> getColumns() {
GtfsColumnDescriptor.builder()
.setColumnName(GtfsTestEntity.ID_FIELD_NAME)
.setHeaderRequired(true)
.setHeaderRecommended(false)
.setFieldLevel(FieldLevelEnum.REQUIRED)
.setIsMixedCase(false)
.setIsCached(false)
Expand All @@ -49,6 +50,7 @@ public ImmutableList<GtfsColumnDescriptor> getColumns() {
GtfsColumnDescriptor.builder()
.setColumnName(GtfsTestEntity.CODE_FIELD_NAME)
.setHeaderRequired(false)
.setHeaderRecommended(false)
.setFieldLevel(FieldLevelEnum.OPTIONAL)
.setIsMixedCase(false)
.setIsCached(false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@
import static com.google.common.truth.Truth.assertThat;

import com.google.common.collect.ImmutableSet;
import java.util.Set;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.mobilitydata.gtfsvalidator.notice.DuplicatedColumnNotice;
import org.mobilitydata.gtfsvalidator.notice.EmptyColumnNameNotice;
import org.mobilitydata.gtfsvalidator.notice.MissingRecommendedColumnNotice;
import org.mobilitydata.gtfsvalidator.notice.MissingRequiredColumnNotice;
import org.mobilitydata.gtfsvalidator.notice.NoticeContainer;
import org.mobilitydata.gtfsvalidator.notice.UnknownColumnNotice;
Expand All @@ -40,6 +42,7 @@ public void expectedColumns() {
new CsvHeader(new String[] {"stop_id", "stop_name"}),
ImmutableSet.of("stop_id", "stop_name", "stop_lat", "stop_lon"),
ImmutableSet.of("stop_id"),
Set.of(),
container);

assertThat(container.getValidationNotices()).isEmpty();
Expand All @@ -55,6 +58,7 @@ public void unknownColumnShouldGenerateNotice() {
new CsvHeader(new String[] {"stop_id", "stop_name", "stop_extra"}),
ImmutableSet.of("stop_id", "stop_name"),
ImmutableSet.of("stop_id"),
Set.of(),
container);

assertThat(container.getValidationNotices())
Expand All @@ -71,11 +75,27 @@ public void missingRequiredColumnShouldGenerateNotice() {
new CsvHeader(new String[] {"stop_name"}),
ImmutableSet.of("stop_id", "stop_name"),
ImmutableSet.of("stop_id"),
Set.of(),
container);

assertThat(container.getValidationNotices())
.containsExactly(new MissingRequiredColumnNotice("stops.txt", "stop_id"));
assertThat(container.hasValidationErrors()).isTrue();
}

@Test
public void missingRecommendedColumnShouldGenerateNotice() {
NoticeContainer container = new NoticeContainer();
new DefaultTableHeaderValidator()
.validate(
"stops.txt",
new CsvHeader(new String[] {"stop_name"}),
Set.of("stop_id", "stop_name"),
Set.of(),
Set.of("stop_id"),
container);

assertThat(container.getValidationNotices())
.containsExactly(new MissingRecommendedColumnNotice("stops.txt", "stop_id"));
}

@Test
Expand All @@ -87,6 +107,7 @@ public void duplicatedColumnShouldGenerateNotice() {
new CsvHeader(new String[] {"stop_id", "stop_name", "stop_id"}),
ImmutableSet.of("stop_id", "stop_name"),
ImmutableSet.of("stop_id"),
Set.of(),
container);

assertThat(container.getValidationNotices())
Expand All @@ -103,6 +124,7 @@ public void emptyFileShouldNotGenerateNotice() {
CsvHeader.EMPTY,
ImmutableSet.of("stop_id", "stop_name"),
ImmutableSet.of("stop_id"),
Set.of(),
container);

assertThat(container.getValidationNotices()).isEmpty();
Expand All @@ -118,6 +140,7 @@ public void emptyColumnNameShouldGenerateNotice() {
new CsvHeader(new String[] {"stop_id", null, "stop_name", ""}),
ImmutableSet.of("stop_id", "stop_name"),
ImmutableSet.of("stop_id"),
Set.of(),
container);

assertThat(container.getValidationNotices())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.mobilitydata.gtfsvalidator.annotation.Index;
import org.mobilitydata.gtfsvalidator.annotation.NonNegative;
import org.mobilitydata.gtfsvalidator.annotation.PrimaryKey;
import org.mobilitydata.gtfsvalidator.annotation.RecommendedColumn;
import org.mobilitydata.gtfsvalidator.annotation.Required;
import org.mobilitydata.gtfsvalidator.type.GtfsTime;

Expand Down Expand Up @@ -77,5 +78,6 @@ public interface GtfsStopTimeSchema extends GtfsEntity {
double shapeDistTraveled();

@DefaultValue("1")
@RecommendedColumn
GtfsStopTimeTimepoint timepoint();
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
* <li>{@link StopTimeTimepointWithoutTimesNotice} - a timepoint does not specifies arrival_time
* or departure_time
* <li>{@link MissingTimepointValueNotice} - value for {@code stop_times.timepoint} is missing
* <li>{@link MissingTimepointColumnNotice} - field {@code stop_times.timepoint} is missing
* </ul>
*/
@GtfsValidator
Expand All @@ -55,10 +54,9 @@ public class TimepointTimeValidator extends FileValidator {
public void validate(NoticeContainer noticeContainer) {
if (!stopTimes.hasColumn(GtfsStopTime.TIMEPOINT_FIELD_NAME)) {
// legacy datasets do not use timepoint column in stop_times.txt as a result:
// - this should be flagged;
// - this should be flagged in the header tests.
// - but also no notice regarding the absence of arrival_time or departure_time should be
// generated
noticeContainer.addValidationNotice(new MissingTimepointColumnNotice());
return;
}
for (GtfsStopTime stopTime : stopTimes.getEntities()) {
Expand Down Expand Up @@ -139,19 +137,4 @@ static class MissingTimepointValueNotice extends ValidationNotice {
this.stopSequence = stopTime.stopSequence();
}
}

/** `timepoint` column is missing for a dataset. */
@GtfsValidationNotice(
severity = WARNING,
files = @FileRefs(GtfsStopTimeSchema.class),
bestPractices = @FileRefs(GtfsStopTimeSchema.class))
static class MissingTimepointColumnNotice extends ValidationNotice {

/** The name of the affected file. */
private final String filename;

MissingTimepointColumnNotice() {
this.filename = GtfsStopTime.FILENAME;
}
}
}
Loading

0 comments on commit 2930520

Please sign in to comment.