From ce63e303bd002c3b0913895e3bdc836449263ae2 Mon Sep 17 00:00:00 2001 From: Renata Imai <53949163+r-akemii@users.noreply.github.com> Date: Wed, 12 Jun 2024 10:03:57 -0300 Subject: [PATCH] Modifies obligation of calendar file (#534) * Update gtfs_loader.py * modifies calendar import * Update service.py --- aequilibrae/transit/gtfs_loader.py | 145 +++++++++++------- .../transit/transit_elements/service.py | 27 ++-- .../aequilibrae/transit/test_gtfs_service.py | 6 +- 3 files changed, 106 insertions(+), 72 deletions(-) diff --git a/aequilibrae/transit/gtfs_loader.py b/aequilibrae/transit/gtfs_loader.py index eb3c17a52..8a9df49ac 100644 --- a/aequilibrae/transit/gtfs_loader.py +++ b/aequilibrae/transit/gtfs_loader.py @@ -499,76 +499,103 @@ def __load_routes_table(self): def __load_feed_calendar(self): self.logger.debug("Starting __load_feed_calendar") - - self.logger.debug(' Loading "calendar" table') self.services.clear() + has_cal, has_caldate = True, True + caltxt = "calendar.txt" - with self.zip_archive.open(caltxt, "r") as file: - calendar = parse_csv(file, column_order[caltxt]) - calendar["start_date"] = [datetime.fromisoformat(format_date(i)) for i in calendar["start_date"]] - calendar["end_date"] = [datetime.fromisoformat(format_date(i)) for i in calendar["end_date"]] - self.data_arrays[caltxt] = calendar - if np.unique(calendar["service_id"]).shape[0] < calendar.shape[0]: - self.__fail("There are repeated service IDs in calendar.txt") - - min_date = min(calendar["start_date"].tolist()) - max_date = max(calendar["end_date"].tolist()) - self.feed_dates = create_days_between(min_date, max_date) - - for line in calendar: - service = Service() - service._populate(line, calendar.dtype.names) - self.services[service.service_id] = service - - self.logger.debug(' Loading "calendar dates" table') - caldatetxt = "calendar_dates.txt" + if caltxt in self.zip_archive.namelist(): + self.logger.debug(' Loading "calendar" table') + with self.zip_archive.open(caltxt, "r") as file: + calendar = parse_csv(file, column_order[caltxt]) + + if calendar.shape[0] > 0: + calendar["start_date"] = [datetime.fromisoformat(format_date(i)) for i in calendar["start_date"]] + calendar["end_date"] = [datetime.fromisoformat(format_date(i)) for i in calendar["end_date"]] + self.data_arrays[caltxt] = calendar + if np.unique(calendar["service_id"]).shape[0] < calendar.shape[0]: + self.__fail("There are repeated service IDs in calendar.txt") + + min_date = min(calendar["start_date"].tolist()) + max_date = max(calendar["end_date"].tolist()) + self.feed_dates = create_days_between(min_date, max_date) + + for line in calendar: + service = Service() + service._populate(line, calendar.dtype.names, True) + self.services[service.service_id] = service + else: + self.logger.warning('"calendar.txt" file is empty') + has_cal = False + else: + self.logger.warning(f"{caltxt} not available in this feed") + has_cal = False + caldatetxt = "calendar_dates.txt" if caldatetxt not in self.zip_archive.namelist(): self.logger.warning(f"{caldatetxt} not available in this feed") - return + has_caldate = False - with self.zip_archive.open(caldatetxt, "r") as file: - caldates = parse_csv(file, column_order[caldatetxt]) + if not has_cal and not has_caldate: + raise FileNotFoundError('Missing "calendar" and "calendar_dates" in this feed') - if caldates.shape[0] == 0: - return + if has_caldate: + self.logger.debug(' Loading "calendar dates" table') - ordercal = list(column_order[caldatetxt].keys()) - exception_inconsistencies = 0 - for line in range(caldates.shape[0]): - service_id, sd, exception_type = list(caldates[line][ordercal]) + with self.zip_archive.open(caldatetxt, "r") as file: + caldates = parse_csv(file, column_order[caldatetxt]) - sd = format_date(sd) - - if service_id not in self.services: - s = Service() - s.service_id = service_id - self.services[service_id] = s - msg = " Service ({}) exists on calendar_dates.txt but not on calendar.txt" - self.logger.debug(msg.format(service.service_id)) - exception_inconsistencies += 1 - - service = self.services[service_id] - - if exception_type == 1: - if sd not in service.dates: - service.dates.append(sd) - else: - exception_inconsistencies += 1 - msg = "ignoring service ({}) addition on a day when the service is already active" - self.logger.debug(msg.format(service.service_id)) - elif exception_type == 2: - if sd in service.dates: - _ = service.dates.remove(sd) - else: - exception_inconsistencies += 1 - msg = "ignoring service ({}) removal on a day from which the service was absent" - self.logger.debug(msg.format(service.service_id)) + if caldates.shape[0] > 0 and not has_cal: + min_date = datetime.fromisoformat(format_date(min(caldates["date"].tolist()))) + max_date = datetime.fromisoformat(format_date(max(caldates["date"].tolist()))) + self.feed_dates = create_days_between(min_date, max_date) else: - self.__fail(f"illegal service exception type. {service.service_id}") - if exception_inconsistencies: - self.logger.info(" Minor inconsistencies found between calendar.txt and calendar_dates.txt") + self.logger.warning('"calendar_dates.txt" file is empty') + return + + exception_inconsistencies = 0 + for line in caldates: + sd = format_date(line["date"]) + + if has_cal: + if line["service_id"] not in self.services: + s = Service() + s.service_id = line["service_id"] + self.services[line["service_id"]] = s + msg = " Service ({}) exists on calendar_dates.txt but not on calendar.txt" + self.logger.debug(msg.format(line["service_id"].service_id)) + exception_inconsistencies += 1 + + service = self.services[line["service_id"]] + + if line["exception_type"] == 1: + if sd not in service.dates: + service.dates.append(sd) + else: + exception_inconsistencies += 1 + msg = "ignoring service ({}) addition on a day when the service is already active" + self.logger.debug(msg.format(service.service_id)) + elif line["exception_type"] == 2: + if sd in service.dates: + _ = service.dates.remove(sd) + else: + exception_inconsistencies += 1 + msg = "ignoring service ({}) removal on a day from which the service was absent" + self.logger.debug(msg.format(service.service_id)) + else: + self.__fail(f"illegal service exception type. {service.service_id}") + else: + # Insert only services available + if line["exception_type"] == 1: + if line["service_id"] not in self.services: + s = Service() + s._populate(line, caldates.dtype.names, False) + self.services[s.service_id] = s + else: + self.services[line["service_id"]].dates.append(sd) + + if exception_inconsistencies: + self.logger.info(" Minor inconsistencies found between calendar.txt and calendar_dates.txt") def __fail(self, msg: str) -> None: self.logger.error(msg) diff --git a/aequilibrae/transit/transit_elements/service.py b/aequilibrae/transit/transit_elements/service.py index 1eed2fecf..c5127f4af 100644 --- a/aequilibrae/transit/transit_elements/service.py +++ b/aequilibrae/transit/transit_elements/service.py @@ -1,5 +1,5 @@ from typing import List -from aequilibrae.transit.date_tools import create_days_between, day_of_week +from aequilibrae.transit.date_tools import create_days_between, day_of_week, format_date class Service: @@ -29,22 +29,29 @@ def __init__(self) -> None: self.sunday = 0 self.start_date = "" self.end_date = "" + self.date = "" + self.exception_type = 0 # Not part of GTFS self.dates = [] # type: List[str] - def _populate(self, record: tuple, headers: list) -> None: + def _populate(self, record: tuple, headers: list, from_cal: bool) -> None: + file = "calendar.txt" if from_cal else "calendar_dates.txt" + for key, value in zip(headers, record): if key not in self.__dict__.keys(): - raise KeyError(f"{key} field in calendar.txt is unknown field for that file on GTFS") + raise KeyError(f"{key} field in {file} is unknown field for that file on GTFS") self.__dict__[key] = value - if self.end_date < self.start_date: - raise ValueError(f"Service {self.service_id} has start date after end date") - days = [self.monday, self.tuesday, self.wednesday, self.thursday, self.friday, self.saturday, self.sunday] - dates = create_days_between(self.start_date, self.end_date) - for date in dates: - if days[day_of_week(date)]: - self.dates.append(date) + if from_cal: + if self.end_date < self.start_date: + raise ValueError(f"Service {self.service_id} has start date after end date") + dates = create_days_between(self.start_date, self.end_date) + for date in dates: + if days[day_of_week(date)]: + self.dates.append(date) + else: + dates = [format_date(self.date)] + self.dates.extend(dates) diff --git a/tests/aequilibrae/transit/test_gtfs_service.py b/tests/aequilibrae/transit/test_gtfs_service.py index 9b51566ff..aaf27f570 100644 --- a/tests/aequilibrae/transit/test_gtfs_service.py +++ b/tests/aequilibrae/transit/test_gtfs_service.py @@ -36,7 +36,7 @@ def data_dict(self, past, today): def test__populate(self, data_dict, today, past): s = Service() - s._populate(tuple(data_dict.values()), list(data_dict.keys())) + s._populate(tuple(data_dict.values()), list(data_dict.keys()), True) for key, val in s.__dict__.items(): if key in data_dict: assert val == data_dict[key], "Service population with record failed" @@ -51,10 +51,10 @@ def test__populate(self, data_dict, today, past): data_dict[key] = 0 s = Service() - s._populate(tuple(data_dict.values()), list(data_dict.keys())) + s._populate(tuple(data_dict.values()), list(data_dict.keys()), True) assert 0 == len(s.dates), "Returned too many dates for service" data_dict[randomword(randint(1, 15))] = randomword(randint(1, 20)) s = Service() with pytest.raises(KeyError): - s._populate(tuple(data_dict.values()), list(data_dict.keys())) + s._populate(tuple(data_dict.values()), list(data_dict.keys()), True)