Skip to content

Commit

Permalink
Implemented interface ot csv.DictReader for #92
Browse files Browse the repository at this point in the history
  • Loading branch information
EntilZha committed Feb 2, 2017
1 parent ff09913 commit 452bfa6
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 0 deletions.
14 changes: 14 additions & 0 deletions functional/streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,20 @@ def csv(self, csv_file, dialect='excel', **fmt_params):
csv_input = csvapi.reader(input_file, dialect=dialect, **fmt_params)
return self(csv_input).cache(delete_lineage=True)

def csv_dict_reader(self, csv_file, fieldnames=None, restkey=None, restval=None,
dialect='excel', **kwds):
if isinstance(csv_file, str):
file_open = get_read_function(csv_file, self.disable_compression)
input_file = file_open(csv_file)
elif hasattr(csv_file, 'next') or hasattr(csv_file, '__next__'):
input_file = csv_file
else:
raise ValueError('csv_file must be a file path or implement the iterator interface')

csv_input = csvapi.DictReader(input_file, fieldnames=fieldnames, restkey=restkey,
restval=restval, dialect=dialect, **kwds)
return self(csv_input).cache(delete_lineage=True)

def jsonl(self, jsonl_file):
"""
Reads and parses the input of a jsonl file stream or file.
Expand Down
3 changes: 3 additions & 0 deletions functional/test/data/test_header.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
a,b,c
1,2,3
4,5,6
21 changes: 21 additions & 0 deletions functional/test/test_streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,27 @@ def test_csv(self):
with self.assertRaises(ValueError):
self.seq.csv(1)

def test_csv_dict_reader(self):
result = self.seq.csv_dict_reader('functional/test/data/test_header.csv').to_list()
self.assertEqual(result[0]['a'], '1')
self.assertEqual(result[0]['b'], '2')
self.assertEqual(result[0]['c'], '3')
self.assertEqual(result[1]['a'], '4')
self.assertEqual(result[1]['b'], '5')
self.assertEqual(result[1]['c'], '6')

with open('functional/test/data/test_header.csv', 'r') as f:
result = self.seq.csv_dict_reader(f).to_list()
self.assertEqual(result[0]['a'], '1')
self.assertEqual(result[0]['b'], '2')
self.assertEqual(result[0]['c'], '3')
self.assertEqual(result[1]['a'], '4')
self.assertEqual(result[1]['b'], '5')
self.assertEqual(result[1]['c'], '6')

with self.assertRaises(ValueError):
self.seq.csv_dict_reader(1)

def test_gzip_csv(self):
result = self.seq.csv('functional/test/data/test.csv.gz').to_list()
expect = [['1', '2', '3', '4'], ['a', 'b', 'c', 'd']]
Expand Down

0 comments on commit 452bfa6

Please sign in to comment.