Skip to content

Commit

Permalink
Revert "Revert "OpenConceptLab/ocl_issues#1501 | Accepting zip format…
Browse files Browse the repository at this point in the history
… in importers""

This reverts commit 31872cf.
  • Loading branch information
snyaggarwal committed Jul 24, 2023
1 parent 31872cf commit efc7433
Show file tree
Hide file tree
Showing 8 changed files with 396 additions and 48 deletions.
4 changes: 2 additions & 2 deletions core/common/swagger_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,10 @@
default='true'
)
file_upload_param = openapi.Parameter(
'file', openapi.IN_FORM, description="JSON Content File (mandatory)", type=openapi.TYPE_FILE
'file', openapi.IN_FORM, description="JSON Content File (json, csv or zip)", type=openapi.TYPE_FILE
)
file_url_param = openapi.Parameter(
'file_url', openapi.IN_FORM, description="Import FILE URL (mandatory)", type=openapi.TYPE_STRING
'file_url', openapi.IN_FORM, description="Import FILE URL (json, csv or zip)", type=openapi.TYPE_STRING
)
apps_param = openapi.Parameter(
'apps', openapi.IN_FORM, description="App Names (comma separated)", type=openapi.TYPE_STRING
Expand Down
27 changes: 26 additions & 1 deletion core/common/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
drop_version, is_versioned_uri, separate_version, to_parent_uri, jsonify_safe, es_get,
get_resource_class_from_resource_name, flatten_dict, is_csv_file, is_url_encoded_string, to_parent_uri_from_kwargs,
set_current_user, get_current_user, set_request_url, get_request_url, nested_dict_values, chunks, api_get,
split_list_by_condition)
split_list_by_condition, is_zip_file)
from core.concepts.models import Concept
from core.orgs.models import Organization
from core.sources.models import Source
Expand Down Expand Up @@ -523,6 +523,7 @@ def test_validate_identifier_with_wrong_type(self):
'text': 'Accession ID'},
'value': '/orgs/OCL/Code/1/'}])


class UtilsTest(OCLTestCase):
def test_set_and_get_current_user(self):
set_current_user(lambda self: 'foo')
Expand Down Expand Up @@ -874,6 +875,30 @@ def test_is_csv_file(self):
file_mock.name = 'unknown_file.csv'
self.assertTrue(is_csv_file(file=file_mock))

def test_is_zip_file(self):
self.assertFalse(is_zip_file(name='foo/bar'))
self.assertFalse(is_zip_file(name='foo/bar.csv'))
self.assertTrue(is_zip_file(name='foo.zip'))
self.assertTrue(is_zip_file(name='foo.csv.zip'))
self.assertTrue(is_zip_file(name='foo.json.zip'))

file_mock = Mock(spec=File)

file_mock.name = 'unknown_file'
self.assertFalse(is_zip_file(file=file_mock))

file_mock.name = 'unknown_file.json'
self.assertFalse(is_zip_file(file=file_mock))

file_mock.name = 'unknown_file.csv'
self.assertFalse(is_zip_file(file=file_mock))

file_mock.name = 'unknown_file.csv.zip'
self.assertTrue(is_zip_file(file=file_mock))

file_mock.name = 'unknown_file.json.zip'
self.assertTrue(is_zip_file(file=file_mock))

def test_is_url_encoded_string(self):
self.assertTrue(is_url_encoded_string('foo'))
self.assertFalse(is_url_encoded_string('foo/bar'))
Expand Down
14 changes: 11 additions & 3 deletions core/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,12 +663,20 @@ def guess_extension(file=None, name=None):


def is_csv_file(file=None, name=None):
return is_file_extension('csv', file, name)


def is_zip_file(file=None, name=None):
return is_file_extension('zip', file, name)


def is_file_extension(extension, file=None, name=None):
if not file and not name:
return None
return False

extension = guess_extension(file=file, name=name)
file_extension = guess_extension(file=file, name=name)

return extension and extension.endswith('csv')
return file_extension and file_extension.endswith(extension)


def is_url_encoded_string(string, lower=True):
Expand Down
104 changes: 104 additions & 0 deletions core/importers/input_parsers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import csv
import io
from zipfile import ZipFile

import requests
from ocldev.oclcsvtojsonconverter import OclStandardCsvToJsonConverter
from pydash import get, compact

from core.common.utils import is_zip_file, is_csv_file


def csv_file_data_to_input_list(file_content):
return [row for row in csv.DictReader(io.StringIO(file_content))] # pylint: disable=unnecessary-comprehension


class ImportContentParser:
"""
1. Processes json data from 'content' arg
2. Processes json/csv/zip file url from 'file_url' arg
3. Processes json/csv/zip file from 'file' arg
"""
def __init__(self, content=None, file_url=None, file=None):
self.content = content
self.file_url = file_url
self.file = file
self.file_name = get(self, 'file.name') if self.file else None
self.errors = []
self.extracted_file = None
self.is_zip_file = False
self.is_csv_file = False
self.is_json_file = False

def parse(self):
self.validate_args()
self.set_content_type()
self.set_content_from_file()
if not self.errors and not self.content:
self.errors.append('Invalid input.')

def validate_args(self):
if len(compact([self.content, self.file_url, self.file])) != 1:
self.errors.append('Invalid input.')

def set_content_type(self):
self.is_json_file = bool(self.content)
self.is_zip_file = is_zip_file(name=self.file_name or self.file_url)
self.is_csv_file = is_csv_file(name=self.file_name or self.file_url)

def set_content_from_file(self):
if self.file:
self.file_name = get(self, 'file.name')
elif self.file_url:
self.set_file_from_response(self.fetch_file_from_url())
self.set_content()

def fetch_file_from_url(self):
try:
headers = {
'User-Agent': 'OCL' # user-agent required by mod_security on some servers
}
return requests.get(self.file_url, headers=headers, stream=True, timeout=30)
except Exception as e:
self.errors.append(f'Failed to download file from {self.file_url}, Exception: {e}.')

def set_file_from_response(self, response):
if get(response, 'ok'):
if self.is_zip_file:
self.file = io.BytesIO(response.content)
else:
self.file = response.text
elif response:
self.errors.append(f'Failed to download file from {self.file_url}, Status: {response.status_code}.')

def set_content(self):
if self.file:
if self.is_zip_file:
self.set_zipped_content()
else:
self.content = self.file.read()
if isinstance(self.content, bytes):
self.content = self.content.decode('utf-8')
if self.is_csv_file:
self.set_csv_content()

def set_csv_content(self):
try:
self.content = OclStandardCsvToJsonConverter(
input_list=csv_file_data_to_input_list(self.content),
allow_special_characters=True
).process()
except Exception as e:
self.errors.append(f'Failed to process CSV file: {e}.')

def set_zipped_content(self):
with ZipFile(self.file, 'r') as zip_file:
filename_list = zip_file.namelist()
if len(filename_list) != 1:
self.errors.append('Zip file must contain exactly one file.')
else:
with zip_file.open(filename_list[0]) as file:
self.extracted_file = file
self.content = file.read().decode('utf-8')
if is_csv_file(name=filename_list[0]):
self.set_csv_content()
11 changes: 7 additions & 4 deletions core/importers/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1052,10 +1052,13 @@ def json_result(self):
}
for task in self.tasks:
if task.result:
result = task.result.get('json')
for key in total_result:
if result:
total_result[key] += result.get(key)
try:
result = task.result.get('json')
for key in total_result:
if result:
total_result[key] += result.get(key)
except:
pass

total_result['start_time'] = self.start_time_formatted
total_result['elapsed_seconds'] = self.elapsed_seconds
Expand Down

0 comments on commit efc7433

Please sign in to comment.