Skip to content

Commit

Permalink
Use proper configurations for csv reading/writing (#427)
Browse files Browse the repository at this point in the history
  • Loading branch information
jessebrennan committed Nov 15, 2019
1 parent 3bc5ebe commit 5b80cbb
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 42 deletions.
30 changes: 14 additions & 16 deletions hca/dss/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import functools
import json
from collections import defaultdict, namedtuple
import csv
import concurrent.futures
from datetime import datetime
from fnmatch import fnmatchcase
Expand All @@ -24,6 +23,7 @@

from hca.dss.util import iter_paths, object_name_builder, hardlink
from glob import escape as glob_escape
from hca.util import tsv
from ..util import SwaggerClient, DEFAULT_THREAD_COUNT
from ..util.exceptions import SwaggerAPIException
from .. import logger
Expand Down Expand Up @@ -396,18 +396,16 @@ def download_collection(self, uuid, replica, version=None, download_dir=''):
collection = self._serialize_col_to_manifest(uuid, replica, version)
# Explicitly declare mode `w` (default `w+b`) for Python 3 string compat
with tempfile.NamedTemporaryFile(mode='w') as manifest:
tsv = csv.DictWriter(manifest,
fieldnames=('bundle_uuid',
'bundle_version',
'file_name',
'file_sha256',
'file_uuid',
'file_version',
'file_size'),
delimiter='\t',
quoting=csv.QUOTE_NONE)
tsv.writeheader()
tsv.writerows(collection)
writer = tsv.DictWriter(manifest,
fieldnames=('bundle_uuid',
'bundle_version',
'file_name',
'file_sha256',
'file_uuid',
'file_version',
'file_size'))
writer.writeheader()
writer.writerows(collection)
# Flushing the I/O buffer here is preferable to closing the file
# handle and deleting the temporary file later because within the
# context manager there is a guarantee that the temporary file
Expand Down Expand Up @@ -737,7 +735,7 @@ def _download_manifest_tasks(self, no_metadata, no_data):
with open(self.manifest) as f:
bundles = defaultdict(set)
# unicode_literals is on so all strings are unicode. CSV wants a str so we need to jump through a hoop.
reader = csv.DictReader(f, delimiter='\t', quoting=csv.QUOTE_NONE)
reader = tsv.DictReader(f)
for row in reader:
bundles[(row['bundle_uuid'], row['bundle_version'])].add(row['file_name'])
for (bundle_uuid, bundle_version), data_files in bundles.items():
Expand All @@ -763,7 +761,7 @@ def _write_output_manifest(self):
if 'file_path' not in fieldnames:
fieldnames.append('file_path')
with atomic_write(output, overwrite=True, newline='') as f:
writer = csv.DictWriter(f, fieldnames, delimiter='\t', quoting=csv.QUOTE_NONE)
writer = tsv.DictWriter(f, fieldnames)
writer.writeheader()
for row in source_manifest:
row['file_path'] = self._file_path(row['file_sha256'], self.download_dir)
Expand All @@ -775,5 +773,5 @@ def _write_output_manifest(self):
@classmethod
def _parse_manifest(cls, manifest):
with open(manifest) as f:
reader = csv.DictReader(f, delimiter='\t', quoting=csv.QUOTE_NONE)
reader = tsv.DictReader(f)
return reader.fieldnames, list(reader)
11 changes: 11 additions & 0 deletions hca/util/tsv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import csv


# Wrap the csv library with our required options

def DictReader(f):
return csv.DictReader(f, delimiter='\t', dialect='excel-tab')


def DictWriter(f, fieldnames):
return csv.DictWriter(f, fieldnames, delimiter='\t', dialect='excel-tab')
38 changes: 19 additions & 19 deletions test/integration/dss/test_dss_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# coding: utf-8
import errno
from concurrent.futures import ThreadPoolExecutor
import csv
import datetime
import filecmp
from hca.util import tsv
import itertools
import os
import sys
Expand Down Expand Up @@ -163,25 +163,25 @@ def test_python_manifest_download(self):
os.chdir(work_dir)
try:
with open('manifest.tsv', 'w', newline='') as manifest:
tsv = csv.DictWriter(manifest,
fieldnames=('bundle_uuid',
'bundle_version',
'file_name',
'file_sha256'),
delimiter='\t',
quoting=csv.QUOTE_NONE)
tsv.writeheader()
tsv.writerow(dict(bundle_uuid=bundle_uuid,
bundle_version=bundle_version,
file_name=data_files[0],
file_sha256=
'9b4c0dde8683f924975d0867903dc7a967f46bee5c0a025c451b9ba73e43f120'))
writer = tsv.DictWriter(manifest,
fieldnames=('bundle_uuid',
'bundle_version',
'file_name',
'file_sha256'))
writer.writeheader()
writer.writerow(dict(bundle_uuid=bundle_uuid,
bundle_version=bundle_version,
file_name=data_files[0],
file_sha256=
'9b4c0dde8683f924975d0867903dc7a9'
'67f46bee5c0a025c451b9ba73e43f120'))
if bad_bundle:
tsv.writerow(dict(bundle_uuid=str(uuid.uuid4()),
bundle_version=bundle_version,
file_name=data_files[0],
file_sha256=
'9b4c0dde8683f924975d0867903dc7a967f46bee5c0a025c451b9ba73e43f120'))
writer.writerow(dict(bundle_uuid=str(uuid.uuid4()),
bundle_version=bundle_version,
file_name=data_files[0],
file_sha256=
'9b4c0dde8683f924975d0867903dc7a9'
'67f46bee5c0a025c451b9ba73e43f120'))

dest_dir = os.path.join(work_dir, bundle_fqid)
try:
Expand Down
12 changes: 5 additions & 7 deletions test/tutorial/scripts/api/download_manifest_api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from hca.dss import DSSClient
import os
import csv
from hca.util import tsv
import json
import pprint
from get_bundle_api import fetch_bundle, save_bundle, BUNDLE_JSON
Expand All @@ -12,7 +12,7 @@
save_bundle(bundle)

with open("manifest.tsv", "w", newline='') as manifest:
tsv = csv.DictWriter(
writer = tsv.DictWriter(
manifest,
fieldnames=(
"bundle_uuid",
Expand All @@ -22,11 +22,9 @@
"file_version",
"file_sha256",
"file_size",
),
delimiter="\t",
quoting=csv.QUOTE_NONE,
)
)
tsv.writeheader()
writer.writeheader()

with open(BUNDLE_JSON, "w") as jsonfile:
try:
Expand All @@ -38,7 +36,7 @@
pprint.pprint(data)
for content in data["bundle"]["files"]:
if content["name"].endswith(".json"):
tsv.writerow(
writer.writerow(
dict(
bundle_uuid=bundle_uuid,
bundle_version=bundle_version,
Expand Down

0 comments on commit 5b80cbb

Please sign in to comment.