Skip to content

Commit

Permalink
Make the exporter more flexible.
Browse files Browse the repository at this point in the history
This PR allows to specify a key from the task.info task_run.info or
result.info to be used as the root key when exporting the data as CSV.

For example, you could have in the task_runs the following:

```json
info: answer: [{foo: 1}]
```
And another one like this:

```json
info: {answer:[{foo: 1}, {foo: 2}]}
```

In such cases, you may want to tell PYBOSSA to use answer within info as
the root to flatten the data.
  • Loading branch information
teleyinex committed Nov 8, 2017
1 parent 516d5ca commit d5c8222
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 3 deletions.
17 changes: 14 additions & 3 deletions pybossa/exporter/__init__.py
Expand Up @@ -43,20 +43,31 @@ def _get_data(self, table, project_id, flat=False, info_only=False):
repo, query = self.repositories[table]
data = getattr(repo, query)(project_id=project_id)
ignore_keys = current_app.config.get('IGNORE_FLAT_KEYS') or []
if table == 'task':
csv_export_key = current_app.config.get('TASK_CSV_EXPORT_INFO_KEY')
if table == 'task_run':
csv_export_key = current_app.config.get('TASK_RUN_CSV_EXPORT_INFO_KEY')
if table == 'result':
csv_export_key = current_app.config.get('RESULT_CSV_EXPORT_INFO_KEY')
if info_only:
if flat:
tmp = []
for row in data:
inf = row.dictize()['info']
inf = copy.deepcopy(row.dictize()['info'])
if inf and csv_export_key and inf.get(csv_export_key):
inf = inf[csv_export_key]
new_key = '%s_id' % table
if inf and type(inf) == dict:
inf[new_key] = row.id
tmp.append(flatten(inf,
root_keys_to_ignore=ignore_keys))
elif inf and type(inf) == list:
for datum in inf:
datum[new_key] = row.id
tmp.append(flatten(datum,
root_keys_to_ignore=ignore_keys))
else:
tmp.append({'info': inf})
# else:
# tmp.append({'info': inf})
else:
tmp = []
for row in data:
Expand Down
3 changes: 3 additions & 0 deletions test/test_web.py
Expand Up @@ -4440,10 +4440,13 @@ def test_export_result_csv(self):
.filter_by(project_id=project.id).all()
for t in results:
err_msg = "All the result column names should be included"
print t
d = t.dictize()
task_run_ids = d['task_run_ids']
fl = flatten(t.dictize(), root_keys_to_ignore='task_run_ids')
fl['task_run_ids'] = task_run_ids
# keys.append('result_id')
print fl
for tk in fl.keys():
expected_key = "%s" % tk
assert expected_key in keys, (err_msg, expected_key, keys)
Expand Down

0 comments on commit d5c8222

Please sign in to comment.