Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 67 additions & 26 deletions .github/workflows/unit_tests.yml
Original file line number Diff line number Diff line change
@@ -1,27 +1,68 @@
---
name: Tests
on:
pull_request:
branches: [develop]

jobs:
Tests:
name: Unit tests
# Set the agent to run on
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"

- name: Install dependencies
run: |
pip install -r requirements.txt

- name: Run unit tests
run: |
python -m unittest discover -v tests/unit_tests
name: Tests
on:
pull_request:
branches: [ develop, main ]

jobs:
Tests:
name: Unit tests
# Set the agent to run on
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"

- name: Install dependencies
run: |
pip install -r requirements.txt

- name: Determine output folder
id: set_output_folder
run: |
if [[ $GITHUB_EVENT_NAME == "pull_request" ]]; then
branch_name=$GITHUB_BASE_REF
else
branch_name=$GITHUB_REF_NAME
fi
if [[ $branch_name == "main" ]]; then
echo "output_folder=prod" >> $GITHUB_ENV
elif [[ $branch_name == "stage" ]]; then
echo "output_folder=stage" >> $GITHUB_ENV
elif [[ $branch_name == "develop" ]]; then
echo "output_folder=dev" >> $GITHUB_ENV
else
echo "Unknown branch: $branch_name"
exit 1
fi

- name: Run tests with coverage
run: |
timestamp=$(date '+%Y-%m-%d_%H-%M-%S')
mkdir -p test_results
log_file="test_results/${timestamp}_report.log"
echo -e "\nTest Cases Report Report\n" >> $log_file
# Run the tests and append output to the log file
python -m coverage run --source=src/python_osw_validation -m unittest discover -v tests/unit_tests >> $log_file 2>&1
echo -e "\nCoverage Report\n" >> $log_file
coverage report >> $log_file

- name: Check coverage
run: |
coverage report --fail-under=85

- name: Upload report to Azure
uses: LanceMcCarthy/Action-AzureBlobUpload@v2
with:
source_folder: 'test_results'
destination_folder: '${{ env.output_folder }}'
connection_string: ${{ secrets.AZURE_STORAGE_CONNECTION_STRING }}
container_name: 'python-osw-validation-package'
clean_destination_folder: false
delete_if_exists: false

28 changes: 21 additions & 7 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,24 +1,31 @@
# Change log

### 0.2.8

- Fixed geopands version to `0.14.4`.
- Latest geopands version `0.10.0` is not compatible and failing to parse the zones.
- Added unit test cases for valid and invalid zone files

### 0.2.7

- Switch to `jsonschema_rs` for performance enhancement, instead of `jsonschema` package
- Refactor code for improve memory utilization
- Added garbage collector


### 0.2.6
- Add garbage collection to free up memory after validation

- Add garbage collection to free up memory after validation

### 0.2.5
- Updated geopandas package

- Updated geopandas package

### 0.2.3
- Performance improvement if there are any errors

- Performance improvement if there are any errors

### 0.2.2

- Added functionality to get the specific number of errors
```
validator = OSWValidation(zipfile_path=<ZIP_FILE_PATH>)
Expand All @@ -27,10 +34,12 @@
```

### 0.2.1

- Updated zipfile_handler
- Fixed "No .geojson files found in the specified directory or its subdirectories." issue

### 0.2.0

- Updated schema file to OSW 0.2
- Added create_zip method to ZipFileHandler
- Made all OSW files optional
Expand All @@ -40,20 +49,25 @@
- Aggregate schema errors and data integrity errors separately before returning errors to user

### 0.0.5

- Support for multi-level geojson file
- Now handles the following two folder structures when unzipped abc.zip
1. abc\{nodes, edges, points}.geojson
2. {nodes, edges, points}.geojson
1. abc\{nodes, edges, points}.geojson
2. {nodes, edges, points}.geojson

### 0.0.4

- Points are not required for a valid OSW dataset

### 0.0.3

- Added schema file to package

### 0.0.2

- Updated package Unit test cases.
- Updated README file

### 0.0.1

- Initial version of python_osw_validation package.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
jsonschema_rs
zipfile36
coverage
geopandas
geopandas==0.14.4
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
long_description_content_type='text/markdown',
url='https://github.com/TaskarCenterAtUW/TDEI-python-lib-osw-validation',
install_requires=[
'jsonschema_rs',
'zipfile36',
'geopandas'
'jsonschema_rs==0.26.1',
'zipfile36==0.0.12',
'geopandas==0.14.4'
],
packages=find_packages(where='src'),
classifiers=[
Expand Down
2 changes: 2 additions & 0 deletions src/python_osw_validation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Dict, Any, Optional, List
from .extracted_data_validator import ExtractedDataValidator, OSW_DATASET_FILES
from .version import __version__
import traceback

SCHEMA_PATH = os.path.join(os.path.dirname(__file__), 'schema')

Expand Down Expand Up @@ -149,6 +150,7 @@ def validate(self, max_errors=20) -> ValidationResult:
return ValidationResult(True)
except Exception as e:
self.errors.append(f'Unable to validate: {e}')
traceback.print_exc()
return ValidationResult(False, self.errors)
finally:
del OSW_DATASET
Expand Down
2 changes: 1 addition & 1 deletion src/python_osw_validation/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.2.7'
__version__ = '0.2.8'
25 changes: 12 additions & 13 deletions src/python_osw_validation/zipfile_handler.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import os
import glob
import shutil
import tempfile
from typing import Optional
import zipfile36 as zipfile
import glob
from typing import Optional


class ZipFileHandler:
Expand All @@ -21,19 +21,19 @@ def create_zip(self, file_pattern) -> Optional[str]:
try:
# Build the full pattern with the directory
full_pattern = os.path.join(os.path.dirname(self.zip_file_path), file_pattern)

# Find all files in the directory matching the pattern
files_to_zip = glob.glob(full_pattern)

# Create a zip file and add matching files to it
with zipfile.ZipFile(self.zip_file_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
for file in files_to_zip:
archive_name = os.path.relpath(file, os.path.dirname(self.zip_file_path))
zipf.write(file, arcname=archive_name)

# Get the full path to the created zip file
full_zip_path = os.path.abspath(self.zip_file_path)

# Return the full path to the zip file
return full_zip_path
except Exception as e:
Expand All @@ -49,22 +49,21 @@ def extract_zip(self) -> Optional[str]:

if len(zip_ref.namelist()) == 0:
raise Exception('ZIP file is empty')

internal_folder_name = self.find_internal_folder(zip_ref)
return os.path.join(self.extracted_dir,internal_folder_name)
return os.path.join(self.extracted_dir, internal_folder_name)
except Exception as e:
self.error = f'Error extracting ZIP file: {e}'

# finds the first folder available in the extracted folder.
# returns empty if there are no folders inside
def find_internal_folder(self, zip_ref: zipfile.ZipFile)-> str:
def find_internal_folder(self, zip_ref: zipfile.ZipFile) -> str:
for filename in zip_ref.namelist():
path = os.path.join(self.extracted_dir,filename)
if(os.path.isdir(path)):
path = os.path.join(self.extracted_dir, filename)
if (os.path.isdir(path)):
return filename
return ''


def remove_extracted_files(self) -> None:
if self.extracted_dir and os.path.exists(self.extracted_dir):
shutil.rmtree(self.extracted_dir)
Expand Down
Binary file added tests/assets/UW.zones.invalid.zip
Binary file not shown.
Binary file added tests/assets/UW.zones.valid.zip
Binary file not shown.
Binary file added tests/assets/wa.bellevue.zip
Binary file not shown.
21 changes: 21 additions & 0 deletions tests/unit_tests/test_osw_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ def setUp(self):
self.missing_identifier_zipfile = os.path.join(ASSETS_PATH, 'missing_identifier.zip')
self.no_entity_zipfile = os.path.join(ASSETS_PATH, 'no_entity.zip')
self.wrong_datatypes_zipfile = os.path.join(ASSETS_PATH, 'wrong_datatype.zip')
self.valid_zones_file = os.path.join(ASSETS_PATH, 'UW.zones.valid.zip')
self.invalid_zones_file = os.path.join(ASSETS_PATH, 'UW.zones.invalid.zip')
self.valid_osw_file = os.path.join(ASSETS_PATH, 'wa.bellevue.zip')
self.schema_file_path = SCHEMA_FILE_PATH
self.invalid_schema_file_path = INVALID_SCHEMA_FILE_PATH

Expand Down Expand Up @@ -205,6 +208,24 @@ def test_wrong_datatypes_zipfile(self):
self.assertFalse(result.is_valid)
self.assertIsNotNone(result.errors)

def test_valid_osw_file(self):
validation = OSWValidation(zipfile_path=self.valid_osw_file)
result = validation.validate()
self.assertTrue(result.is_valid)
self.assertIsNone(result.errors)

def test_valid_zones_file(self):
validation = OSWValidation(zipfile_path=self.valid_zones_file)
result = validation.validate()
self.assertTrue(result.is_valid)
self.assertIsNone(result.errors)

def test_invalid_zones_file(self):
validation = OSWValidation(zipfile_path=self.invalid_zones_file)
result = validation.validate()
self.assertFalse(result.is_valid)
self.assertIsNotNone(result.errors)


if __name__ == '__main__':
unittest.main()
38 changes: 38 additions & 0 deletions tests/unit_tests/test_zipfile_handler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import unittest
import os
from unittest.mock import patch, MagicMock
from src.python_osw_validation.zipfile_handler import ZipFileHandler

PARENT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
Expand Down Expand Up @@ -39,6 +40,43 @@ def test_remove_extracted_files(self):
self.assertFalse(os.path.exists(extracted_dir))
self.assertIsNone(zip_handler.extracted_dir)

@patch('src.python_osw_validation.zipfile_handler.glob.glob')
@patch('src.python_osw_validation.zipfile_handler.zipfile.ZipFile')
def test_create_zip_success(self, mock_zipfile, mock_glob):
zip_handler = ZipFileHandler(self.valid_zip_path)

# Mock the glob function to return a list of files that match the pattern
mock_glob.return_value = ['file1.txt', 'file2.txt']

# Mock the ZipFile object
mock_zip = MagicMock()
mock_zipfile.return_value.__enter__.return_value = mock_zip

# Call the create_zip function with a file pattern
zip_path = zip_handler.create_zip(file_pattern='*.txt')

# Check if the zip file path is returned correctly
self.assertEqual(zip_path, os.path.abspath(self.valid_zip_path))
self.assertIsNone(zip_handler.error)

# Verify the correct calls with any path formatting adjustments
mock_zip.write.assert_any_call(mock_glob.return_value[0], arcname='../../file1.txt')
mock_zip.write.assert_any_call(mock_glob.return_value[1], arcname='../../file2.txt')

@patch('src.python_osw_validation.zipfile_handler.zipfile.ZipFile')
def test_create_zip_failure(self, mock_zipfile):
zip_handler = ZipFileHandler(self.valid_zip_path)

# Simulate an exception when creating the zip file
mock_zipfile.side_effect = Exception('Mocked error during zip creation')

# Call the create_zip function, expecting it to fail
zip_path = zip_handler.create_zip(file_pattern='*.txt')

# Verify the return value and error handling
self.assertIsNone(zip_path)
self.assertIn('Error creating ZIP file: Mocked error during zip creation', zip_handler.error)


if __name__ == '__main__':
unittest.main()
Loading