Make the exporter more flexible.

This PR allows to specify a key from the task.info task_run.info or result.info to be used as the root key when exporting the data as CSV. For example, you could have in the task_runs the following: ```json info: answer: [{foo: 1}] ``` And another one like this: ```json info: {answer:[{foo: 1}, {foo: 2}]} ``` In such cases, you may want to tell PYBOSSA to use answer within info as the root to flatten the data.
Scifabric · Nov 8, 2017 · d5c8222 · d5c8222
1 parent 516d5ca
commit d5c8222
Show file tree

Hide file tree

Showing 2 changed files with 17 additions and 3 deletions.
diff --git a/pybossa/exporter/__init__.py b/pybossa/exporter/__init__.py
@@ -43,20 +43,31 @@ def _get_data(self, table, project_id, flat=False, info_only=False):
         repo, query = self.repositories[table]
         data = getattr(repo, query)(project_id=project_id)
         ignore_keys = current_app.config.get('IGNORE_FLAT_KEYS') or []
+        if table == 'task':
+            csv_export_key = current_app.config.get('TASK_CSV_EXPORT_INFO_KEY')
+        if table == 'task_run':
+            csv_export_key = current_app.config.get('TASK_RUN_CSV_EXPORT_INFO_KEY')
+        if table == 'result':
+            csv_export_key = current_app.config.get('RESULT_CSV_EXPORT_INFO_KEY')
         if info_only:
             if flat:
                 tmp = []
                 for row in data:
-                    inf = row.dictize()['info']
+                    inf = copy.deepcopy(row.dictize()['info'])
+                    if inf and csv_export_key and inf.get(csv_export_key):
+                        inf = inf[csv_export_key]
+                    new_key = '%s_id' % table
                     if inf and type(inf) == dict:
+                        inf[new_key] = row.id
                         tmp.append(flatten(inf,
                                            root_keys_to_ignore=ignore_keys))
                     elif inf and type(inf) == list:
                         for datum in inf:
+                            datum[new_key] = row.id
                             tmp.append(flatten(datum,
                                                root_keys_to_ignore=ignore_keys))
-                    else:
-                        tmp.append({'info': inf})
+                    # else:
+                    #     tmp.append({'info': inf})
             else:
                 tmp = []
                 for row in data:

diff --git a/test/test_web.py b/test/test_web.py
@@ -4440,10 +4440,13 @@ def test_export_result_csv(self):
                     .filter_by(project_id=project.id).all()
         for t in results:
             err_msg = "All the result column names should be included"
+            print t
             d = t.dictize()
             task_run_ids = d['task_run_ids']
             fl = flatten(t.dictize(), root_keys_to_ignore='task_run_ids')
             fl['task_run_ids'] = task_run_ids
+            # keys.append('result_id')
+            print fl
             for tk in fl.keys():
                 expected_key = "%s" % tk
                 assert expected_key in keys, (err_msg, expected_key, keys)