TaskarCenterAtUW · susrisha · Nov 7, 2024 · Oct 14, 2024 · Oct 14, 2024 · Oct 14, 2024
diff --git a/.github/workflows/unit_tests.yaml b/.github/workflows/unit_tests.yaml
@@ -1,51 +1,42 @@
----
 name: Unit Tests
-
-#############################
-# Start the job on all push #
-#############################
 on:
+  workflow_dispatch:
   push:
     branches-ignore:
       - '**'
-    # Remove the line above to run when pushing to master
   pull_request:
-    branches: [master, dev, stage]
+    branches: [main, dev, stage]
 
-###############
-# Set the Job #
-###############
 jobs:
   UnitTest:
-    name: Unit Test Cases
-    # Set the agent to run on
     runs-on: ubuntu-latest
+
+    env:
+      DATABASE_NAME: test_database
+
     steps:
-      - name: Checkout code
-        uses: actions/checkout@v3
-
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: "3.10" # Use the appropriate  Python version
-
-      - name: Install dependencies
-        run: |
-          pip install -r requirements.txt
-
-      - name: Run unit tests
-        run: |
-          python test_report.py
-          coverage run --source=src -m unittest discover -s tests/
-          coverage report -m
-          exit_status=$?
-
-          # Set the exit status as an output for later use
-          echo "::set-output name=exit_status::$exit_status"
-
-      - name: Archive Coverage Report
-        if: ${{ always() }}  # Upload the coverage report even if tests fail
-        uses: actions/upload-artifact@v2
-        with:
-          name: htmlcov
-          path: htmlcov
+    - name: Checkout code
+      uses: actions/checkout@v2
+
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.10'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+
+    - name: Run tests with coverage
+      run: |
+        coverage run --source=src -m unittest discover -s tests/unit_tests
+        coverage xml  
+
+    - name: Check coverage
+      run: |
+        coverage report --fail-under=85
+
+
+
+
diff --git a/requirements.txt b/requirements.txt
@@ -3,4 +3,5 @@ pydantic==1.10.4
 python-ms-core==0.0.22
 uvicorn==0.20.0
 html_testRunner==1.2.1
-python-osw-validation==0.2.4
+geopandas==0.14.4
+python-osw-validation==0.2.7
diff --git a/src/osw_validator.py b/src/osw_validator.py
@@ -1,6 +1,5 @@
-import uuid
+import gc
 import logging
-import datetime
 import urllib.parse
 from typing import List
 from python_ms_core import Core
@@ -58,7 +57,7 @@ def validate(self, received_message: Upload):
                 if self.has_permission(roles=['tdei-admin', 'poc', 'osw_data_generator'],
                                        queue_message=received_message) is None:
                     error_msg = 'Unauthorized request !'
-                    logger.error(tdei_record_id, error_msg, received_message)
+                    logger.error(f'{tdei_record_id}, {error_msg}, {received_message}')
                     raise Exception(error_msg)
 
             file_upload_path = urllib.parse.unquote(received_message.data.file_upload_path)
@@ -89,6 +88,8 @@ def send_status(self, result: ValidationResult, upload_message: Upload):
             logger.info(f'Publishing message for : {upload_message.message_id}')
         except Exception as e:
             logger.error(f'Error occurred while publishing message for : {upload_message.message_id} with error: {e}')
+        finally:
+            gc.collect()
 
 
     def has_permission(self, roles: List[str], queue_message: Upload) -> bool:

diff --git a/src/validation.py b/src/validation.py
@@ -1,4 +1,6 @@
+import gc
 import os
+import time
 import shutil
 import logging
 import traceback
@@ -10,7 +12,7 @@
 
 ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
 # Path used for download file generation.
-DOWNLOAD_FILE_PATH = f'{Path.cwd()}/downloads'
+DOWNLOAD_DIR = f'{Path.cwd()}/downloads'
 
 logging.basicConfig()
 logger = logging.getLogger('OSW_VALIDATION')
@@ -25,46 +27,55 @@ def __init__(self, file_path=None, storage_client=None):
         self.file_path = file_path
         self.file_relative_path = file_path.split('/')[-1]
         self.client = self.storage_client.get_container(container_name=self.container_name)
+        is_exists = os.path.exists(DOWNLOAD_DIR)
+        unique_id = self.get_unique_id()
+        if not is_exists:
+            os.makedirs(DOWNLOAD_DIR)
+        self.unique_dir_path = os.path.join(DOWNLOAD_DIR, unique_id)
+        if not os.path.exists(self.unique_dir_path):
+            os.makedirs(self.unique_dir_path)
 
     def validate(self, max_errors=20) -> ValidationResult:
-        return self.is_osw_valid(max_errors)
+        try:
+            return self.is_osw_valid(max_errors)
+        finally:
+            Validation.clean_up(self.unique_dir_path)
 
     def is_osw_valid(self, max_errors) -> ValidationResult:
+        start_time = time.time()
         result = ValidationResult()
         result.is_valid = False
         result.validation_message = ''
         root, ext = os.path.splitext(self.file_relative_path)
         if ext and ext.lower() == '.zip':
             downloaded_file_path = self.download_single_file(self.file_path)
-            logger.info(f' Downloaded file path: {downloaded_file_path}')
-            validator = OSWValidation(zipfile_path=downloaded_file_path)
-            validation_result = validator.validate(max_errors)
-            result.is_valid = validation_result.is_valid
-            if not result.is_valid:
-                result.validation_message = validation_result.errors
-                logger.error(f' Error While Validating File: {str(validation_result.errors)}')
-            Validation.clean_up(downloaded_file_path)
+            if downloaded_file_path:
+                logger.info(f' Downloaded file path: {downloaded_file_path}')
+                validator = OSWValidation(zipfile_path=downloaded_file_path)
+                validation_result = validator.validate(max_errors)
+                result.is_valid = validation_result.is_valid
+                if not result.is_valid:
+                    result.validation_message = validation_result.errors
+                    logger.error(f' Error While Validating File: {str(validation_result.errors)}')
+                Validation.clean_up(downloaded_file_path)
+            else:
+                result.validation_message = 'Failed to validate because unknown file format'
         else:
             result.validation_message = 'Failed to validate because unknown file format'
             logger.error(f' Failed to validate because unknown file format')
-
+        end_time = time.time()
+        time_taken = end_time - start_time
+        logger.info(f'Validation completed in {time_taken} seconds')
+        gc.collect()
         return result
 
     # Downloads the single file into a unique directory
     def download_single_file(self, file_upload_path=None) -> str:
-        is_exists = os.path.exists(DOWNLOAD_FILE_PATH)
-        unique_id = self.get_unique_id()
-        if not is_exists:
-            os.makedirs(DOWNLOAD_FILE_PATH)
-        unique_directory = os.path.join(DOWNLOAD_FILE_PATH,unique_id)
-        if not os.path.exists(unique_directory):
-            os.makedirs(unique_directory)
-
         file = self.storage_client.get_file_from_url(self.container_name, file_upload_path)
         try:
             if file.file_path:
                 file_path = os.path.basename(file.file_path)
-                local_download_path = os.path.join(unique_directory,file_path)
+                local_download_path = os.path.join(self.unique_dir_path, file_path)
                 with open(local_download_path, 'wb') as blob:
                     blob.write(file.get_stream())
                 logger.info(f' File downloaded to location: {local_download_path}')
@@ -74,14 +85,14 @@ def download_single_file(self, file_upload_path=None) -> str:
         except Exception as e:
             traceback.print_exc()
             logger.error(e)
+        finally:
+            gc.collect()
 
     # Generates a unique string for directory
     def get_unique_id(self) -> str:
         unique_id = uuid.uuid1().hex[0:24]
         return unique_id
 
-
-
     @staticmethod
     def clean_up(path):
         if os.path.isfile(path):
@@ -91,3 +102,4 @@ def clean_up(path):
             # folder = os.path.join(DOWNLOAD_FILE_PATH, path)
             logger.info(f' Removing Folder: {path}')
             shutil.rmtree(path, ignore_errors=False)
+        gc.collect()
diff --git a/tests/unit_tests/test_queue_message_content.py b/tests/unit_tests/test_queue_message_content.py
@@ -19,6 +19,10 @@ def setUp(self):
         data = TEST_DATA
         self.upload = Upload(data)
 
+    def test_message(self):
+        self.upload.message = 'New message'
+        self.assertEqual(self.upload.message, 'New message')
+
     def test_message_type(self):
         self.assertEqual(self.upload.message_type, 'workflow_identifier')
         self.upload.message_type = 'New messageType'