Skip to content

Commit 7f90b36

Browse files
authored
Merge pull request #74 from TaskarCenterAtUW/stage
Stage to Prod
2 parents dc4b540 + 05ae53c commit 7f90b36

17 files changed

+494
-175
lines changed

.github/workflows/unit_tests.yaml

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,25 @@ jobs:
1616
with:
1717
fetch-depth: 0
1818

19-
- name: Setup Virtual Environment
20-
run: |
21-
python3.10 -m venv .venv
22-
source .venv/bin/activate
19+
- name: Set up Python
20+
uses: actions/setup-python@v2
21+
with:
22+
python-version: '3.10'
2323

2424
- name: Install Dependencies
2525
run: |
2626
python -m pip install --upgrade pip
2727
pip install -r requirements.txt
2828
pip install numpy==1.26.4
2929
30-
- name: Run Unit Test Cases
31-
run: python test_report.py
30+
- name: Run tests with coverage
31+
run: |
32+
coverage run --source=src -m unittest discover -s tests/unit_tests/
33+
coverage xml
34+
35+
- name: Check coverage
36+
run: |
37+
coverage report --fail-under=85
3238
3339
#- name: Run Coverage Report
3440
# run: coverage run --source=src -m unittest discover -s tests/unit_tests

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ Follow the steps to install the node packages required for both building and run
4848
# Installing requirements
4949
pip install -r requirements.txt
5050
```
51+
52+
NOTE: if you have problems building on a Mac, e.g. with uamqb, see here: https://github.com/Azure/azure-uamqp-python/issues/386
53+
5154
### How to Run the Server/APIs
5255
5356
1. The http server by default starts with `8000` port

requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
fastapi~=0.111.1
2-
pydantic==1.10.4
2+
pydantic==1.10.16
33
html_testRunner==1.2.1
44
uvicorn==0.20.0
55
python-ms-core==0.0.22
6-
tcat-gtfs-csv-validator~=0.0.38
6+
gtfs-canonical-validator==0.0.5

src/flex_config.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# in an effort to be more permissive of small errors, accept these which could conceivably be calculated/fixed/interpreted by common applications
2+
CHANGE_ERROR_TO_WARNING = [
3+
'block_trips_with_overlapping_stop_times',
4+
'trip_distance_exceeds_shape_distance',
5+
'decreasing_or_equal_stop_time_distance',
6+
'decreasing_shape_distance',
7+
'empty_file',
8+
'equal_shape_distance_diff_coordinates',
9+
'fare_transfer_rule_duration_limit_type_without_duration_limit',
10+
'fare_transfer_rule_duration_limit_without_type',
11+
'fare_transfer_rule_invalid_transfer_count',
12+
'fare_transfer_rule_missing_transfer_count',
13+
'fare_transfer_rule_with_forbidden_transfer_count',
14+
'forbidden_shape_dist_traveled',
15+
'invalid_currency',
16+
'invalid_currency_amount',
17+
'invalid_url',
18+
'location_with_unexpected_stop_time',
19+
'missing_trip_edge',
20+
'new_line_in_value',
21+
'point_near_origin',
22+
'point_near_pole',
23+
'route_both_short_and_long_name_missing',
24+
'route_networks_specified_in_more_than_one_file',
25+
'start_and_end_range_equal',
26+
'start_and_end_range_out_of_order',
27+
'station_with_parent_station',
28+
'stop_time_timepoint_without_times',
29+
'stop_time_with_arrival_before_previous_departure_time',
30+
'stop_time_with_only_arrival_or_departure_time',
31+
'stop_without_location',
32+
'timeframe_only_start_or_end_time_specified',
33+
'timeframe_overlap',
34+
'timeframe_start_or_end_time_greater_than_twenty_four_hours',
35+
'u_r_i_syntax_error'
36+
]
37+
38+
FLEX_FATAL_ERROR_CODES = [
39+
'missing_required_element',
40+
'unsupported_feature_type',
41+
'unsupported_geo_json_type',
42+
'unsupported_geometry_type',
43+
'invalid_geometry',
44+
'forbidden_prior_day_booking_field_value',
45+
'forbidden_prior_notice_start_day',
46+
'forbidden_prior_notice_start_time',
47+
'forbidden_real_time_booking_field_value',
48+
'forbidden_same_day_booking_field_value',
49+
'invalid_prior_notice_duration_min',
50+
'missing_prior_day_booking_field_value',
51+
'missing_prior_notice_duration_min',
52+
'missing_prior_notice_start_time',
53+
'prior_notice_last_day_after_start_day'
54+
]
55+
56+
FLEX_FIELDS = {
57+
'stop_times.txt': [
58+
'start_pickup_dropoff_window',
59+
'end_pickup_dropoff_window',
60+
'pickup_booking_rule_id',
61+
'drop_off_booking_rule_id',
62+
'mean_duration_factor',
63+
'mean_duration_offset',
64+
'safe_duration_factor',
65+
'safe_duration_offset'
66+
]
67+
}
68+
69+
FLEX_FILES = [
70+
'locations.geojson',
71+
'booking_rules.txt',
72+
'location_groups.txt',
73+
'location_group_stops.txt'
74+
75+
]

src/gtfs_flex_validation.py

Lines changed: 58 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@
55
from pathlib import Path
66
from typing import Union, Any
77
from .config import Settings
8-
9-
from tcat_gtfs_csv_validator import gcv_test_release
10-
from tcat_gtfs_csv_validator import exceptions as gcvex
8+
from gtfs_canonical_validator import CanonicalValidator
9+
from .flex_config import CHANGE_ERROR_TO_WARNING, FLEX_FATAL_ERROR_CODES, FLEX_FIELDS, FLEX_FILES
1110

1211
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
1312
# Path used for download file generation.
@@ -17,18 +16,19 @@
1716
logger = logging.getLogger('FLEX_VALIDATION')
1817
logger.setLevel(logging.INFO)
1918

20-
DATA_TYPE = 'gtfs_flex'
21-
SCHEMA_VERSION = 'v2.0'
22-
2319

2420
class GTFSFlexValidation:
25-
def __init__(self, file_path=None, storage_client=None):
21+
def __init__(self, file_path=None, storage_client=None, prefix=None):
2622
self.settings = Settings()
2723
self.container_name = self.settings.storage_container_name
2824
self.storage_client = storage_client
2925
self.file_path = file_path
3026
self.file_relative_path = file_path.split('/')[-1]
3127
self.client = self.storage_client.get_container(container_name=self.container_name)
28+
if prefix:
29+
self.prefix = prefix
30+
else:
31+
self.prefix = self.settings.get_unique_id()
3232

3333
# Facade function to validate the file
3434
# Focuses on the file name with file name validation
@@ -46,31 +46,69 @@ def is_gtfs_flex_valid(self) -> tuple[Union[bool, Any], Union[str, Any]]:
4646
if ext and ext.lower() == '.zip':
4747
downloaded_file_path = self.download_single_file(self.file_path)
4848
logger.info(f' Downloaded file path: {downloaded_file_path}')
49-
try:
50-
gcv_test_release.test_release(DATA_TYPE, SCHEMA_VERSION, downloaded_file_path)
51-
is_valid = True
52-
except Exception as err:
53-
traceback.print_exc()
54-
validation_message = str(err)
55-
logger.error(f' Error While Validating File: {str(err)}')
49+
flex_validator = CanonicalValidator(zip_file=downloaded_file_path)
50+
result = flex_validator.validate()
51+
52+
is_valid = result.status
53+
if isinstance(result.error, list) and result.error is not None:
54+
for error in result.error[:]:
55+
# change some smaller errors to warnings instead to relax the strict validation MD gives us
56+
if error['code'] in CHANGE_ERROR_TO_WARNING:
57+
if result.info is None: result.info = []
58+
result.info.append(error)
59+
result.error.remove(error)
60+
continue
61+
62+
# these are error codes from MD that relate to pathways that are fatal
63+
if error['code'] in FLEX_FATAL_ERROR_CODES:
64+
is_valid = False
65+
continue
66+
67+
# some of the notices relate to pathways, but there's no way to tell except with this logic:
68+
for notice in error['sampleNotices']:
69+
# one of the fields in a given file is a pathway-spec field--if it's flagged, fail
70+
if "fieldName" in notice and "filename" in notice:
71+
if notice['filename'] in FLEX_FIELDS and \
72+
notice['fieldName'] in FLEX_FIELDS[notice['filename']]:
73+
is_valid = False
74+
continue
75+
76+
# one of the pathways spec'd files has an error--if so, fail
77+
if "filename" in notice:
78+
if notice['filename'] in FLEX_FILES:
79+
is_valid = False
80+
continue
81+
82+
# similar to the above, but the field for the filename is parent/child
83+
if "childFilename" in notice:
84+
if notice['childFilename'] in FLEX_FILES:
85+
is_valid = False
86+
continue
87+
88+
# if all errors have been downgraded to warnings, mark us as a success
89+
if len(result.error) == 0:
90+
is_valid = True
91+
92+
if result.error is not None:
93+
validation_message = str(result.error)
94+
logger.error(f' Error While Validating File: {str(result.error)}')
95+
5696
GTFSFlexValidation.clean_up(downloaded_file_path)
5797
else:
5898
logger.error(f' Failed to validate because unknown file format')
5999

60100
return is_valid, validation_message
61101

62102
# Downloads the file to local folder of the server
63-
# file_upload_path is the fullUrl of where the
103+
# file_upload_path is the fullUrl of where the
64104
# file is uploaded.
65105
def download_single_file(self, file_upload_path=None) -> str:
66106
is_exists = os.path.exists(DOWNLOAD_FILE_PATH)
67107
if not is_exists:
68108
os.makedirs(DOWNLOAD_FILE_PATH)
69-
70-
unique_folder = self.settings.get_unique_id()
109+
unique_folder = self.prefix
71110
dl_folder_path = os.path.join(DOWNLOAD_FILE_PATH, unique_folder)
72111

73-
# Ensure the unique folder path is created
74112
os.makedirs(dl_folder_path, exist_ok=True)
75113

76114
file = self.storage_client.get_file_from_url(self.container_name, file_upload_path)
@@ -95,6 +133,5 @@ def clean_up(path):
95133
logger.info(f' Removing File: {path}')
96134
os.remove(path)
97135
else:
98-
folder = os.path.join(DOWNLOAD_FILE_PATH, path)
99-
logger.info(f' Removing Folder: {folder}')
100-
shutil.rmtree(folder, ignore_errors=False)
136+
logger.info(f' Removing Folder: {path}')
137+
shutil.rmtree(path, ignore_errors=False)

src/gtfx_flex_validator.py

Lines changed: 33 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,20 @@
1+
import os
2+
import gc
13
import logging
2-
import uuid
4+
import threading
35
import urllib.parse
4-
from python_ms_core import Core
5-
from python_ms_core.core.queue.models.queue_message import QueueMessage
6+
from pathlib import Path
67
from .config import Settings
7-
from .gtfs_flex_validation import GTFSFlexValidation
8-
from .serializer.gtfx_flex_serializer import GTFSFlexUpload
8+
from python_ms_core import Core
99
from .models.file_upload_msg import FileUploadMsg
10-
import threading
11-
import time
10+
from .gtfs_flex_validation import GTFSFlexValidation
11+
from python_ms_core.core.queue.models.queue_message import QueueMessage
1212

1313
logging.basicConfig()
1414
logger = logging.getLogger('FLEX_VALIDATOR')
1515
logger.setLevel(logging.INFO)
1616

17+
DOWNLOAD_FILE_PATH = f'{Path.cwd()}/downloads'
1718

1819
class GTFSFlexValidator:
1920
_settings = Settings()
@@ -22,8 +23,8 @@ def __init__(self):
2223
self.core = Core()
2324
self.settings = Settings()
2425
self._subscription_name = self.settings.request_subscription
25-
self.request_topic = self.core.get_topic(topic_name=self.settings.request_topic_name,max_concurrent_messages=self.settings.max_concurrent_messages)
26-
# self.response_topic = self.core.get_topic(topic_name=self.settings.response_topic_name)
26+
self.request_topic = self.core.get_topic(topic_name=self.settings.request_topic_name,
27+
max_concurrent_messages=self.settings.max_concurrent_messages)
2728
self.logger = self.core.get_logger()
2829
self.storage_client = self.core.get_storage_client()
2930
self.listening_thread = threading.Thread(target=self.subscribe)
@@ -41,37 +42,45 @@ def process(message) -> None:
4142
logger.info(' No Message')
4243

4344
self.request_topic.subscribe(subscription=self._subscription_name, callback=process)
44-
45-
def process_message(self, upload_msg: FileUploadMsg) -> None:
4645

46+
def process_message(self, upload_msg: FileUploadMsg) -> None:
4747
try:
4848
file_upload_path = urllib.parse.unquote(upload_msg.data.file_upload_path)
49-
logger.info(f' Received message for Project Group: {upload_msg.data.tdei_project_group_id}')
49+
logger.info(f' Message ID: {upload_msg.messageId}')
5050
logger.info(file_upload_path)
5151
if file_upload_path:
5252
# Do the validation in the other class
53-
validator = GTFSFlexValidation(file_path=file_upload_path, storage_client=self.storage_client)
53+
validator = GTFSFlexValidation(file_path=file_upload_path, storage_client=self.storage_client, prefix=upload_msg.messageId)
5454
validation = validator.validate()
55-
self.send_status(valid=validation[0], upload_message=upload_msg,
56-
validation_message=validation[1])
55+
self.send_status(
56+
valid=validation[0],
57+
upload_message=upload_msg,
58+
validation_message=validation[1]
59+
)
5760
else:
5861
logger.info(' No file Path found in message!')
5962
except Exception as e:
6063
logger.error(f' Error processing message: {e}')
6164
self.send_status(valid=False, upload_message=upload_msg, validation_message=str(e))
65+
finally:
66+
folder_to_delete = os.path.join(DOWNLOAD_FILE_PATH, upload_msg.messageId)
67+
GTFSFlexValidation.clean_up(folder_to_delete)
68+
gc.collect()
69+
6270

6371
def send_status(self, valid: bool, upload_message: FileUploadMsg, validation_message: str = '') -> None:
6472
response_message = {
65-
"file_upload_path": upload_message.data.file_upload_path,
66-
"user_id": upload_message.data.user_id ,
67-
"tdei_project_group_id": upload_message.data.tdei_project_group_id,
68-
"success": valid,
69-
"message": validation_message
70-
}
71-
logger.info(f' Publishing new message with ID: {upload_message.messageId} with status: {valid} and Message: {validation_message}')
73+
'file_upload_path': upload_message.data.file_upload_path,
74+
'user_id': upload_message.data.user_id,
75+
'tdei_project_group_id': upload_message.data.tdei_project_group_id,
76+
'success': valid,
77+
'message': validation_message
78+
}
79+
logger.info(
80+
f' Publishing new message with ID: {upload_message.messageId} with status: {valid} and Message: {validation_message}')
7281
data = QueueMessage.data_from({
7382
'messageId': upload_message.messageId,
74-
'message': 'Validation complete',
83+
'message': 'Validation complete',
7584
'messageType': upload_message.messageType,
7685
'data': response_message
7786
})
@@ -85,4 +94,4 @@ def send_response(self, data: QueueMessage) -> None:
8594
logger.error(f'Error sending response: {e}')
8695

8796
def stop_listening(self):
88-
self.listening_thread.join(timeout=0)
97+
self.listening_thread.join(timeout=0)

src/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ async def startup_event(settings: Settings = Depends(get_settings)) -> None:
3838
@app.on_event('shutdown')
3939
async def shutdown_event():
4040
if app.flex_validator:
41-
app.flex_validator.shutdown()
41+
app.flex_validator.stop_listening()
4242

4343

4444
@app.get('/', status_code=status.HTTP_200_OK)

0 commit comments

Comments
 (0)