From 23d82d26140477242a519b80f97cb9740810b20b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= <koray_kirli@hms.harvard.edu>
Date: Mon, 31 Oct 2016 18:20:03 -0400
Subject: [PATCH 1/4] FF-320 #comment simply the code

---
 wranglertools/fdnDCIC.py        | 235 ++++++++++++++++----------------
 wranglertools/get_field_info.py |   4 +-
 2 files changed, 117 insertions(+), 122 deletions(-)

diff --git a/wranglertools/fdnDCIC.py b/wranglertools/fdnDCIC.py
index e0d38901..db5068ed 100644
--- a/wranglertools/fdnDCIC.py
+++ b/wranglertools/fdnDCIC.py
@@ -130,109 +130,110 @@ def md5(path):
     return md5sum.hexdigest()
 
 
+############################################################
+############################################################
+# use the following order to process the sheets
+# if name is not here, will not be processed during ordering
+############################################################
+############################################################
 sheet_order = [
-    "User",
-    "Award",
-    "Lab",
-    "Document",
-    "Protocol",
-    "Publication",
-    "Organism",
-    "IndividualMouse",
-    "IndividualHuman",
-    "Vendor",
-    "Biosource",
-    "Construct",
-    "TreatmentRnai",
-    "TreatmentChemical",
-    "GenomicRegion",
-    "Target",
-    "Modification",
-    "Image",
-    "BiosampleCellCulture",
-    "Biosample",
-    "Enzyme",
-    "FileSet",
-    "FileFastq",
-    "FileFasta",
-    "ExperimentSet",
-    "ExperimentHiC",
-    "ExperimentCaptureC"
-]
+    "User", "Award", "Lab", "Document", "Protocol", "Publication", "Organism", "IndividualMouse", "IndividualHuman",
+    "Vendor", "Enzyme", "Biosource", "Construct", "TreatmentRnai", "TreatmentChemical",
+    "GenomicRegion", "Target", "Modification", "Image", "BiosampleCellCulture", "Biosample",
+    "FileSet", "FileFastq", "FileFasta", "ExperimentSet", "ExperimentHiC", "ExperimentCaptureC"]
 
+do_not_use = [
+    "submitted_by", "date_created", "organism", "schema_version", "accession", "uuid", "status",
+    "quality_metric_flags", "notes", "restricted", "file_size", "filename", "alternate_accessions",
+    "content_md5sum", "md5sum", "quality_metric", "files_in_set", "experiments", "experiments_in_set"]
 
-def order_FDN(input_xls):
-    """Order and filter created xls file."""
 
-    do_not_use = [
-        "submitted_by",
-        "date_created",
-        "organism",
-        "schema_version",
-        "accession",
-        "uuid",
-        "status",
-        "quality_metric_flags",
-        "notes",
-        "restricted",
-        "file_size",
-        "filename",
-        "alternate_accessions",
-        "content_md5sum",
-        "md5sum",
-        "quality_metric",
-        "files_in_set",
-        "experiments",
-        "experiments_in_set"
-    ]
+def filter_and_sort(list_names):
+    """Filter and sort fields"""
+    useful = []
+    for field in list_names:
+        if field in do_not_use:
+            pass
+        else:
+            useful.append(field)
+    # sort alphabetically
+    useful = sorted(useful)
+    return useful
 
-    move_frond = [
-        'award',
-        '*award',
-        'lab',
-        '*lab',
-        'description',
-        'title',
-        '*title',
-        'name',
-        '*name',
-        'aliases',
-        '#Field Name:'
-    ]
+move_frond = ['award', '*award', 'lab', '*lab', 'description',
+              'title', '*title', 'name', '*name', 'aliases', '#Field Name:']
 
-    move_end = [
-        'documents',
-        'references',
-        'url',
-        'dbxrefs'
-    ]
 
-    # reorder individual items in sheets, [SHEET, MOVE_ITEM, MOVE_BEFORE]
-    reorder = [
-        ['Biosource', 'cell_line', 'SOP_cell_line'],
-        ['Biosource', 'cell_line_tier', 'SOP_cell_line'],
-        ['GenomicRegion', 'start_coordinate', 'end_coordinate'],
-        ['GenomicRegion', 'start_location', 'end_location'],
-        ['GenomicRegion', 'location_description', 'start_location'],
-        ['BiosampleCellCulture', 'protocol_documents', 'protocol_SOP_deviations'],
-        ['Biosample', 'biosample_relation.relationship_type', 'biosample_relation.biosample'],
-        ['Enzyme', 'catalog_number', 'attachment'],
-        ['Enzyme', 'recognition_sequence', 'attachment'],
-        ['Enzyme', 'site_length', 'attachment'],
-        ['Enzyme', 'cut_position', 'attachment'],
-        ['File', 'related_files.relationship_type', 'related_files.file'],
-        ['Experiment', 'average_fragment_size', 'fragment_size_range'],
-        ['Experiment', 'files', 'documents'],
-        ['Experiment', 'filesets', 'documents'],
-        ['Experiment', 'experiment_relation.relationship_type', 'documents'],
-        ['Experiment', 'experiment_relation.experiment', 'documents'],
-        ['Experiment', 'experiment_sets|0', 'documents'],
-        ['Experiment', 'experiment_sets|1', 'documents'],
-        ['Experiment', 'experiment_sets|2', 'documents'],
-        ['Experiment', 'experiment_sets|3', 'documents'],
+def move_to_frond(list_names):
+    """Move names frond"""
+    for frond in move_frond:
+        try:
+            list_names.remove(frond)
+            list_names.insert(0, frond)
+        except:
+            pass
+        return list_names
+
+move_end = ['documents', 'references', 'url', 'dbxrefs']
+
+
+def move_to_end(list_names):
+    """Move names to end"""
+    for end in move_end:
+        try:
+            list_names.pop(list_names.index(end))
+            list_names.append(end)
+        except:
+            pass
+    return list_names
+
+# reorder individual items in sheets, [SHEET, MOVE_ITEM, MOVE_BEFORE]
+reorder = [
+    ['Biosource', 'cell_line', 'SOP_cell_line'],
+    ['Biosource', 'cell_line_tier', 'SOP_cell_line'],
+    ['GenomicRegion', 'start_coordinate', 'end_coordinate'],
+    ['GenomicRegion', 'start_location', 'end_location'],
+    ['GenomicRegion', 'location_description', 'start_location'],
+    ['BiosampleCellCulture', 'protocol_documents', 'protocol_SOP_deviations'],
+    ['Biosample', 'biosample_relation.relationship_type', 'biosample_relation.biosample'],
+    ['Enzyme', 'catalog_number', 'attachment'],
+    ['Enzyme', 'recognition_sequence', 'attachment'],
+    ['Enzyme', 'site_length', 'attachment'],
+    ['Enzyme', 'cut_position', 'attachment'],
+    ['File', 'related_files.relationship_type', 'related_files.file'],
+    ['Experiment', 'average_fragment_size', 'fragment_size_range'],
+    ['Experiment', 'files', 'documents'],
+    ['Experiment', 'filesets', 'documents'],
+    ['Experiment', 'experiment_relation.relationship_type', 'documents'],
+    ['Experiment', 'experiment_relation.experiment', 'documents'],
+    ['Experiment', 'experiment_sets|0', 'documents'],
+    ['Experiment', 'experiment_sets|1', 'documents'],
+    ['Experiment', 'experiment_sets|2', 'documents'],
+    ['Experiment', 'experiment_sets|3', 'documents'],
+]
+
 
+def switch_fields(list_names, sheet):
+    for sort_case in reorder:
+        # to look for all experiments with "Experiment" name, it will also get ExperimentSet
+        # there are no conflicting field names
+        if sort_case[0] in sheet:
+            try:
+                # tihs is working more consistently then the pop item method
+                list_names.remove(sort_case[1])
+                list_names.insert(list_names.index(sort_case[2]), sort_case[1])
+            except:
+                pass
+    return list_names
+
+# if object name is in the following list, fetch all current/released items and add to xls
+fetch_items = [
+    "Protocol", "Enzymes", "Biosource", "Publication", "Vendor"
     ]
 
+
+def order_FDN(input_xls):
+    """Order and filter created xls file."""
     ReadFile = input_xls
     OutputFile = input_xls[:-4]+'_ordered.xls'
     bookread = xlrd.open_workbook(ReadFile)
@@ -250,39 +251,28 @@ def order_FDN(input_xls):
     if Sheets_read:
         print(Sheets_read, "not in sheet_order list, please update")
         Sheets.extend(Sheets_read)
-
     for sheet in Sheets:
         useful = []
         active_sheet = bookread.sheet_by_name(sheet)
         first_row_values = active_sheet.row_values(rowx=0)
-        for field in first_row_values:
-            if field in do_not_use:
-                pass
-            else:
-                useful.append(field)
-        useful = sorted(useful)
+        print('1')
+        print(first_row_values)
+        # remove items from fields in xls
+        useful = filter_and_sort(first_row_values)
+        print('2')
+        print(useful)
         # move selected to front
-        for frond in move_frond:
-            try:
-                useful.remove(frond)
-                useful.insert(0, frond)
-            except:
-                pass
+        useful = move_to_frond(useful)
+        print('3')
+        print(useful)
         # move selected to end
-        for end in move_end:
-            try:
-                useful.pop(useful.index(end))
-                useful.append(end)
-            except:
-                pass
+        useful = move_to_end(useful)
+        print('4')
+        print(useful)
         # reorder some items based on reorder list
-        for sort_case in reorder:
-            if sort_case[0] in sheet:
-                try:
-                    useful.remove(sort_case[1])
-                    useful.insert(useful.index(sort_case[2]), sort_case[1])
-                except:
-                    pass
+        useful = switch_fields(useful, sheet)
+        print('5')
+        print(useful)
         # create a new sheet and write the data
         new_sheet = book_w.add_sheet(sheet)
         for write_row_index, write_item in enumerate(useful):
@@ -294,5 +284,10 @@ def order_FDN(input_xls):
         for i in range(100):
             for ix in range(len(useful)):
                 new_sheet.write(write_column_index+1+i, ix, '', style)
-
     book_w.save(OutputFile)
+    ############################################################
+    ############################################################
+    # use the following order to process the sheets
+    # if name is not here, will not be processed during ordering
+    ############################################################
+    ############################################################
diff --git a/wranglertools/get_field_info.py b/wranglertools/get_field_info.py
index 8002572c..faa1cc1b 100755
--- a/wranglertools/get_field_info.py
+++ b/wranglertools/get_field_info.py
@@ -174,14 +174,14 @@ def get_uploadable_fields(connection, types, include_description=False,
     return fields
 
 
-def create_xls(fields, filename):
+def create_xls(all_fields, filename):
     '''
     fields being a dictionary of sheet -> FieldInfo(objects)
     create one sheet per dictionary item, with three columns of fields
     for fieldname, description and enum
     '''
     wb = xlwt.Workbook()
-    for obj_name, fields in fields.items():
+    for obj_name, fields in all_fields.items():
         ws = wb.add_sheet(obj_name)
         ws.write(0, 0, "#Field Name:")
         ws.write(1, 0, "#Field Type:")

From b78f0daca806e452e70101f97196b1af95aa6dd4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= <koray_kirli@hms.harvard.edu>
Date: Mon, 31 Oct 2016 18:57:01 -0400
Subject: [PATCH 2/4] +

---
 wranglertools/fdnDCIC.py        | 28 ++++++++++++++--------------
 wranglertools/get_field_info.py |  2 +-
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/wranglertools/fdnDCIC.py b/wranglertools/fdnDCIC.py
index db5068ed..16b99e44 100644
--- a/wranglertools/fdnDCIC.py
+++ b/wranglertools/fdnDCIC.py
@@ -227,12 +227,18 @@ def switch_fields(list_names, sheet):
     return list_names
 
 # if object name is in the following list, fetch all current/released items and add to xls
-fetch_items = [
-    "Protocol", "Enzymes", "Biosource", "Publication", "Vendor"
-    ]
+fetch_items = {
+    "Protocol": "protocol", "Enzymes": "enzymes", "Biosource": "biosources",
+    "Publication": "publications", "Vendor": "vendors"}
 
 
-def order_FDN(input_xls):
+def fetch_all_items(sheet, field_list, connection):
+    if sheet in fetch_items.keys():
+        json_list = get_FDN(fetch_items[sheet], connection)
+        return(json_list)
+
+
+def order_FDN(input_xls, connection):
     """Order and filter created xls file."""
     ReadFile = input_xls
     OutputFile = input_xls[:-4]+'_ordered.xls'
@@ -255,24 +261,18 @@ def order_FDN(input_xls):
         useful = []
         active_sheet = bookread.sheet_by_name(sheet)
         first_row_values = active_sheet.row_values(rowx=0)
-        print('1')
-        print(first_row_values)
         # remove items from fields in xls
         useful = filter_and_sort(first_row_values)
-        print('2')
-        print(useful)
         # move selected to front
         useful = move_to_frond(useful)
-        print('3')
-        print(useful)
         # move selected to end
         useful = move_to_end(useful)
-        print('4')
-        print(useful)
         # reorder some items based on reorder list
         useful = switch_fields(useful, sheet)
-        print('5')
-        print(useful)
+        # fetch all items for common objects
+        all_items = fetch_all_items(sheet, useful, connection)
+        print(all_items)
+
         # create a new sheet and write the data
         new_sheet = book_w.add_sheet(sheet)
         for write_row_index, write_item in enumerate(useful):
diff --git a/wranglertools/get_field_info.py b/wranglertools/get_field_info.py
index faa1cc1b..a395aaf5 100755
--- a/wranglertools/get_field_info.py
+++ b/wranglertools/get_field_info.py
@@ -220,7 +220,7 @@ def main():
         file_name = args.outfile
         create_xls(fields, file_name)
         if args.order:
-            fdnDCIC.order_FDN(file_name)
+            fdnDCIC.order_FDN(file_name, connection)
 
 if __name__ == '__main__':
     main()

From d38eb0db04ef2c7db32bc01c1e3c3a2586d4c281 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= <koray_kirli@hms.harvard.edu>
Date: Tue, 1 Nov 2016 14:26:08 -0400
Subject: [PATCH 3/4] FF-320 #comment added the code in the ordering function

---
 Data_Files/Rao_et_al_2014/fieldsRao.xls | Bin 309248 -> 309248 bytes
 wranglertools/fdnDCIC.py                |  45 ++++++++++++++++++------
 wranglertools/get_field_info.py         |   2 +-
 3 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/Data_Files/Rao_et_al_2014/fieldsRao.xls b/Data_Files/Rao_et_al_2014/fieldsRao.xls
index 6179c8c415609774de7e3e2bcab3ba6e6e2694ea..ea5ec5b47d9bde8f5f8e4b5368c514a62fa41e0f 100644
GIT binary patch
delta 37
qcmZqpAk^?dXhRMQ+m|1c`cqAsi&)x=SQvqr35c1u7qPJPZ3X}={|-3-

delta 37
qcmZqpAk^?dXhRMQ+p(O$sMW^JMJ(+_EQ~<R1jNkSi&$9tHUj_@&<#!i

diff --git a/wranglertools/fdnDCIC.py b/wranglertools/fdnDCIC.py
index 16b99e44..6abcaaaa 100644
--- a/wranglertools/fdnDCIC.py
+++ b/wranglertools/fdnDCIC.py
@@ -172,7 +172,7 @@ def move_to_frond(list_names):
             list_names.insert(0, frond)
         except:
             pass
-        return list_names
+    return list_names
 
 move_end = ['documents', 'references', 'url', 'dbxrefs']
 
@@ -228,17 +228,33 @@ def switch_fields(list_names, sheet):
 
 # if object name is in the following list, fetch all current/released items and add to xls
 fetch_items = {
-    "Protocol": "protocol", "Enzymes": "enzymes", "Biosource": "biosources",
-    "Publication": "publications", "Vendor": "vendors"}
+    "Protocol": "protocol", "Enzymes": "enzyme", "Biosource": "biosource",
+    "Publication": "publication", "Vendor": "vendor"}
 
 
-def fetch_all_items(sheet, field_list, connection):
+def fetch_all_items(sheet, field_list):
+    """For a given sheet, get all released items"""
+    all_items = []
     if sheet in fetch_items.keys():
-        json_list = get_FDN(fetch_items[sheet], connection)
-        return(json_list)
+        obj = fetch_items[sheet]
+        HEADERS = {'accept': 'application/json'}
+        URL = "http://data.4dnucleome.org/search/?type={}&frame=object&limit=all&format=json".format(obj)
+        response = requests.get(URL, headers=HEADERS)
+        items_list = response.json()['@graph']
+        for item in items_list:
+            item_info = []
+            for field in field_list:
+                if field == "#Field Name:":
+                    item_info.append("#")
+                else:
+                    item_info.append(item.get(field, ''))
+            all_items.append(item_info)
+        return all_items
+    else:
+        return
 
 
-def order_FDN(input_xls, connection):
+def order_FDN(input_xls):
     """Order and filter created xls file."""
     ReadFile = input_xls
     OutputFile = input_xls[:-4]+'_ordered.xls'
@@ -270,9 +286,7 @@ def order_FDN(input_xls, connection):
         # reorder some items based on reorder list
         useful = switch_fields(useful, sheet)
         # fetch all items for common objects
-        all_items = fetch_all_items(sheet, useful, connection)
-        print(all_items)
-
+        all_items = fetch_all_items(sheet, useful)
         # create a new sheet and write the data
         new_sheet = book_w.add_sheet(sheet)
         for write_row_index, write_item in enumerate(useful):
@@ -280,10 +294,19 @@ def order_FDN(input_xls, connection):
             column_val = active_sheet.col_values(read_col_ind)
             for write_column_index, cell_value in enumerate(column_val):
                 new_sheet.write(write_column_index, write_row_index, cell_value, style)
+        # write common objects
+        if all_items:
+            for i, item in enumerate(all_items):
+                for ix in range(len(useful)):
+                    write_column_index_II = write_column_index+1+i
+                    new_sheet.write(write_column_index_II, ix, item[ix], style)
+        else:
+            write_column_index_II = write_column_index
         # write 50 empty lines with text formatting
         for i in range(100):
             for ix in range(len(useful)):
-                new_sheet.write(write_column_index+1+i, ix, '', style)
+                write_column_index_III = write_column_index_II+1+i
+                new_sheet.write(write_column_index_III, ix, '', style)
     book_w.save(OutputFile)
     ############################################################
     ############################################################
diff --git a/wranglertools/get_field_info.py b/wranglertools/get_field_info.py
index a395aaf5..faa1cc1b 100755
--- a/wranglertools/get_field_info.py
+++ b/wranglertools/get_field_info.py
@@ -220,7 +220,7 @@ def main():
         file_name = args.outfile
         create_xls(fields, file_name)
         if args.order:
-            fdnDCIC.order_FDN(file_name, connection)
+            fdnDCIC.order_FDN(file_name)
 
 if __name__ == '__main__':
     main()

From d5989d6d3b2be56779b4a6e05508de007ccfc2e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= <koray_kirli@hms.harvard.edu>
Date: Tue, 1 Nov 2016 14:52:03 -0400
Subject: [PATCH 4/4] FF-320 #comment Added connection for user specific items
 to be fetched

---
 wranglertools/fdnDCIC.py        | 14 ++++++--------
 wranglertools/get_field_info.py |  2 +-
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/wranglertools/fdnDCIC.py b/wranglertools/fdnDCIC.py
index 6abcaaaa..e6fea400 100644
--- a/wranglertools/fdnDCIC.py
+++ b/wranglertools/fdnDCIC.py
@@ -232,15 +232,13 @@ def switch_fields(list_names, sheet):
     "Publication": "publication", "Vendor": "vendor"}
 
 
-def fetch_all_items(sheet, field_list):
+def fetch_all_items(sheet, field_list, connection):
     """For a given sheet, get all released items"""
     all_items = []
     if sheet in fetch_items.keys():
-        obj = fetch_items[sheet]
-        HEADERS = {'accept': 'application/json'}
-        URL = "http://data.4dnucleome.org/search/?type={}&frame=object&limit=all&format=json".format(obj)
-        response = requests.get(URL, headers=HEADERS)
-        items_list = response.json()['@graph']
+        obj_id = "search/?type=" + fetch_items[sheet]
+        get_FDN(obj_id, connection)
+        items_list = get_FDN(obj_id, connection)['@graph']
         for item in items_list:
             item_info = []
             for field in field_list:
@@ -254,7 +252,7 @@ def fetch_all_items(sheet, field_list):
         return
 
 
-def order_FDN(input_xls):
+def order_FDN(input_xls, connection):
     """Order and filter created xls file."""
     ReadFile = input_xls
     OutputFile = input_xls[:-4]+'_ordered.xls'
@@ -286,7 +284,7 @@ def order_FDN(input_xls):
         # reorder some items based on reorder list
         useful = switch_fields(useful, sheet)
         # fetch all items for common objects
-        all_items = fetch_all_items(sheet, useful)
+        all_items = fetch_all_items(sheet, useful, connection)
         # create a new sheet and write the data
         new_sheet = book_w.add_sheet(sheet)
         for write_row_index, write_item in enumerate(useful):
diff --git a/wranglertools/get_field_info.py b/wranglertools/get_field_info.py
index faa1cc1b..a395aaf5 100755
--- a/wranglertools/get_field_info.py
+++ b/wranglertools/get_field_info.py
@@ -220,7 +220,7 @@ def main():
         file_name = args.outfile
         create_xls(fields, file_name)
         if args.order:
-            fdnDCIC.order_FDN(file_name)
+            fdnDCIC.order_FDN(file_name, connection)
 
 if __name__ == '__main__':
     main()