From ef57b124133b73905c7eb2ee397f235362c9573b Mon Sep 17 00:00:00 2001 From: aschroed Date: Wed, 8 May 2019 15:35:35 -0400 Subject: [PATCH 1/7] Added Gene and BioFeature to sheet_order and removed references to Target from docs --- README.md | 8 ++-- doc/metadata_submission.md | 4 +- doc/schema_info.md | 3 +- .../4DNWranglerTools.egg-info/PKG-INFO | 46 +++++++++---------- wranglertools/get_field_info.py | 2 +- wranglertools/import_data.py | 2 +- 6 files changed, 33 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index 33e23c3d..ee350f4e 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ pip3 install submit4dn --upgrade ### Troubleshooting -If you encounter an error containing something like: +If you encounter an error containing something like: ``` Symbol not found: _PyInt_AsLong @@ -85,17 +85,17 @@ get_field_info --type Biosample --comments --outfile biosample.xls Example list of sheets: ~~~~ -get_field_info --type Publication --type Document --type Vendor --type Protocol --type BiosampleCellCulture --type Biosource --type Enzyme --type Construct --type TreatmentAgent --type TreatmentRnai --type Modification --type Biosample --type FileFastq --type IndividualMouse --type ExperimentHiC --type ExperimentSetReplicate --type ExperimentCaptureC --type Target --type GenomicRegion --type ExperimentSet --type Image --comments --outfile MetadataSheets.xls +get_field_info --type Publication --type Document --type Vendor --type Protocol --type BiosampleCellCulture --type Biosource --type Enzyme --type Construct --type TreatmentAgent --type TreatmentRnai --type Modification --type Biosample --type FileFastq --type IndividualMouse --type ExperimentHiC --type ExperimentSetReplicate --type ExperimentCaptureC --type BioFeature --type GenomicRegion --type ExperimentSet --type Image --comments --outfile MetadataSheets.xls ~~~~ Example list of sheets: (using python scripts) ~~~~ -python3 -m wranglertools.get_field_info --type Publication --type Document --type Vendor --type Protocol --type BiosampleCellCulture --type Biosource --type Enzyme --type Construct --type TreatmentAgent --type TreatmentRnai --type Modification --type Biosample --type FileFastq --type IndividualHuman --type ExperimentHiC --type ExperimentCaptureC --type Target --type GenomicRegion --type ExperimentSet --type ExperimentSetReplicate --type Image --comments --outfile MetadataSheets.xls +python3 -m wranglertools.get_field_info --type Publication --type Document --type Vendor --type Protocol --type BiosampleCellCulture --type Biosource --type Enzyme --type Construct --type TreatmentAgent --type TreatmentRnai --type Modification --type Biosample --type FileFastq --type IndividualHuman --type ExperimentHiC --type ExperimentCaptureC --type BioFeature --type GenomicRegion --type ExperimentSet --type ExperimentSetReplicate --type Image --comments --outfile MetadataSheets.xls ~~~~ Example list of sheets: (Experiment seq) ~~~~ -python3 -m wranglertools.get_field_info --type Publication --type Document --type Vendor --type Protocol --type BiosampleCellCulture --type Biosource --type Enzyme --type Construct --type TreatmentAgent --type TreatmentRnai --type Modification --type Biosample --type FileFastq --type ExperimentSeq --type Target --type GenomicRegion --type ExperimentSet --type ExperimentSetReplicate --type Image --comments --outfile exp_seq_all.xls +python3 -m wranglertools.get_field_info --type Publication --type Document --type Vendor --type Protocol --type BiosampleCellCulture --type Biosource --type Enzyme --type Construct --type TreatmentAgent --type TreatmentRnai --type Modification --type Biosample --type FileFastq --type ExperimentSeq --type BioFeature --type GenomicRegion --type ExperimentSet --type ExperimentSetReplicate --type Image --comments --outfile exp_seq_all.xls ~~~~ Example list of sheets: (Experiment seq simple) diff --git a/doc/metadata_submission.md b/doc/metadata_submission.md index 11cb1fab..e7040270 100644 --- a/doc/metadata_submission.md +++ b/doc/metadata_submission.md @@ -69,7 +69,7 @@ In some cases a field value must be formatted in a certain way or the Item will In other cases a field value must match a certain pattern. For example, if a field requires a DNA sequence then the submitted value must contain only the characters A, T, G, C or N. -_Database Cross Reference (DBxref) fields_, which contain identifiers that refer to external databases, are another case requiring special formatting. In many cases the values of these fields need to be in database\_name:ID format. For example, an SRA experiment identifier would need to be submitted in the form ‘SRA:SRX1234567’ (see also [Basic fields example](#basic-field) above). Note that in a few cases where the field takes only identifiers for one or two specific databases the ID alone can be entered - for example, when entering gene symbols in the *'targeted\_genes’* field of the Target Item you can enter only the gene symbols i.e. PARK2, DLG1. +_Database Cross Reference (DBxref) fields_, which contain identifiers that refer to external databases, are another case requiring special formatting. In many cases the values of these fields need to be in database\_name:ID format. For example, an SRA experiment identifier would need to be submitted in the form ‘SRA:SRX1234567’ (see also [Basic fields example](#basic-field) above). ####When a field specifies a linked item Some fields in a Sheet for an Item may contain references to another Item. These may be of the same type or different types. Examples of this type of field include the *‘biosource’* field in Biosample or the *‘files’* field in the ExperimentHiC. Note that the latter is also an example of a list field that can take multiple values. @@ -264,7 +264,7 @@ The scripts accepts the following parameters:. **To get the complete list of relevant sheets in one workbook:** - get_field_info --type Publication --type Document --type Vendor --type Protocol --type BiosampleCellCulture --type Biosource --type Enzyme --type Construct --type TreatmentChemical --type TreatmentRnai --type Modification --type Biosample --type FileFastq --type FileSet --type IndividualHuman --type IndividualMouse --type ExperimentHiC --type ExperimentCaptureC --type ExperimentRepliseq --type Target --type GenomicRegion --type ExperimentSet --type ExperimentSetReplicate --type Image --comments --outfile AllItems.xls + get_field_info --type Publication --type Document --type Vendor --type Protocol --type BiosampleCellCulture --type Biosource --type Enzyme --type Construct --type TreatmentChemical --type TreatmentRnai --type Modification --type Biosample --type FileFastq --type FileSet --type IndividualHuman --type IndividualMouse --type ExperimentHiC --type ExperimentCaptureC --type ExperimentRepliseq --type BioFeature --type GenomicRegion --type Gene --type ExperimentSet --type ExperimentSetReplicate --type Image --comments --outfile AllItems.xls ##Submission of metadata using the 4DN REST API The 4DN-DCIC metadata database can be accessed using a Hypertext-Transfer-Protocol-(HTTP)-based, Representational-state-transfer (RESTful) application programming interface (API) - aka the REST API. In fact, this API is used by the ```import_data``` script used to submit metadata entered into excel spreadsheets as described [in this document](https://docs.google.com/document/d/1Xh4GxapJxWXCbCaSqKwUd9a2wTiXmfQByzP0P8q5rnE). This API was developed by the [ENCODE][encode] project so if you have experience retrieving data from or submitting data to ENCODE use of the 4DN-DCIC API should be familiar to you. The REST API can be used both for data submission and data retrieval, typically using scripts written in your language of choice. Data objects exchanged with the server conform to the standard JavaScript Object Notation (JSON) format. Libraries written for use with your chosen language are typically used for the network connection, data transfer, and parsing of data (for example, requests and json, respectively for Python). For a good introduction to scripting data retrieval (using GET requests) you can refer to [this page](https://www.encodeproject.org/help/rest-api/) on the [ENCODE][encode] web site that also has a good introduction to viewing and understanding JSON formatted data. diff --git a/doc/schema_info.md b/doc/schema_info.md index e7c1e7ec..0cb2fd60 100644 --- a/doc/schema_info.md +++ b/doc/schema_info.md @@ -6,6 +6,7 @@ award.json | Award | award(s) biosample.json | Biosample | biosample(s) biosample\_cell\_culture.json | BiosampleCellCulture | biosample-cell-cultures, biosample\_cell\_culture biosource.json | Biosource | biosource(s) +bio_feature.json | BioFeature | bio-features, bio\_feature construct.json | Construct | construct(s) document.json | Document | document(s) enzyme.json | Enzyme | enzyme(s) @@ -19,6 +20,7 @@ file\_fastq.json | FileFastq | files-fastq, file\_fastq file\_processed.json | FileProcessed | files-processed, file\_processed file\_reference.json | FileReference | files-reference, file\_reference file\_set.json | FileSet | file-sets, file\_set +gene.json | Gene | gene(s) genomic\_region.json | GenomicRegion | genomic-regions, genomic\_region image.json | Image | image(s) individual\_human.json | IndividualHuman | individuals-human, individual\_human @@ -37,7 +39,6 @@ software.json | Software | software(s) sop\_map.json | SopMap | sop-maps, sop\_map summary\_statistic.json | SummaryStatistic | summary-statistics, summary\_statistic summary\_statistic\_hi\_c.json | SummaryStatisticHiC | summary-statistics-hi-c, summary\_statistic\_hi\_c -target.json | Target | target(s) treatment\_chemical.json | TreatmentChemical | treatments-chemical, treatment\_chemical treatment\_rnai.json | TreatmentRnai | treatments-rnai, treatment\_rnai user.json | User | user(s) diff --git a/wranglertools/4DNWranglerTools.egg-info/PKG-INFO b/wranglertools/4DNWranglerTools.egg-info/PKG-INFO index 07e4acc6..164a57ca 100644 --- a/wranglertools/4DNWranglerTools.egg-info/PKG-INFO +++ b/wranglertools/4DNWranglerTools.egg-info/PKG-INFO @@ -6,13 +6,13 @@ Home-page: http://data.4dnucleome.org Author: 4DN Team at Harvard Medical School Author-email: jeremy_johnson@hms.harvard.edu License: MIT -Description: +Description: ##Connection first thing you need is the keyfile to access the REST application it is a json formatted file that contains key,secret and server under one identifier. Here is the default structure. The default path is /Users/user/keypairs.json - + { "default": { "key": "TheConnectionKey", @@ -22,7 +22,7 @@ Description: } if file name is different and the key is not named default add it to the code: python3 code.py --keyfile nameoffile.json --key NotDefault - + ##Generate fields.xls To create an xls file with sheets to be filled use the example and modify to your needs. It will accept the following parameters. --type use for each sheet that you want to add to the excel workbook @@ -31,11 +31,11 @@ Description: --comments adds the comments together with enums (by default False) --writexls creates the xls file (by default True) --outfile change the default file name "fields.xls" to a specified one - + *Full list* ~~~~ - python3 get_field_info.py --type Publication --type Document --type Vendor --type Protocol --type ProtocolsCellCulture --type Biosource --type Enzyme --type Construct --type TreatmentChemical --type TreatmentRnai --type Modification --type Biosample --type File --type FileSet --type IndividualHuman --type IndividualMouse --type ExperimentHiC --type ExperimentCaptureC --type Target --type GenomicRegion --type ExperimentSet --type Image --outfile AllItems.xls --order - + python3 get_field_info.py --type Publication --type Document --type Vendor --type Protocol --type ProtocolsCellCulture --type Biosource --type Enzyme --type Construct --type TreatmentChemical --type TreatmentRnai --type Modification --type Biosample --type File --type FileSet --type IndividualHuman --type IndividualMouse --type ExperimentHiC --type ExperimentCaptureC --type BioFeature --type GenomicRegion --type ExperimentSet --type Image --outfile AllItems.xls --order + ~~~~ *To get a single sheet use* ``` @@ -44,50 +44,50 @@ Description: python3 get_field_info.py --type Biosample --comments --outfile biosample.xls python3 get_field_info.py --type Biosample --comments --outfile biosample.xls --order ``` - + #Specifications for fields.xls In fields.xls, each excel sheet is named after an object type, like ExperimentHiC, Biosample, Construct, Protocol... - + *Each sheet has 3 rows* 1) Field name 2) Field description 3) Choices for controlled vocabulary (some fields only accept a value from a list of selection, like experiment type) - + The first entry will start from row 4, and column 2. - + Each field can be a certain type; string, number/integer, list. If the type is integer, number or array, it will be indicated with the fields name; field:number, fields:int, field:array. If the field is a string, you will only see the field name. If the field is an array (field:list), you may enter a single item, or multiple items separated by comma. - + field:array item1,item2,item2,item4 - + Some objects containing fields that are grouped together, called embedded sub-objects. For example the "experiment_relations" has 2 fields called "relationship_type", and "experiment". In the field names you will see * experiment_relations.relationship_type * experiment_relations.experiment - + If the embedded sub-object is a list, you can increase the number of items by creating new columns and appending numbers to the fields names * experiment_relations.relationship_type1 * experiment_relations.experiment1 * experiment_relations.relationship_type2 * experiment_relations.experiment2 - - + + **Aliases** - + When you create new object types at the same time, it is not possible to reference one item in another with an accession or uuid since it is not assigned yet. For example, if you have a new experiment with a new biosample in the same excel workbook (different sheets), what are you going to put in biosample field in experiments sheet? To overcome this problem, a lab specific identifier called alias is used. "aliases" field accepts multiple entries in the form of "labname:refname,labname:refname2" (testlab:expHic001). If you add lab:bisample1 to aliases field in biosample, you can then use this value in biosample field in experiment. - - + + #Specifications for import_data.py You can use import_data.py either to upload new metadata or patch fields of an existing metadata. When you import file data, the status has to be "uploading". if you have some other status, like "uploaded" and then patch the status to "uploading", you will not be able to upload file, because the dedicated url for aws upload is creating during post if the status is uploading. - + **Uploading vs Patching** - + If there is a uuid, alias, @id, or accession in the document that matches and existing entry in the database, it will ask if you want to PATCH that object one by one. If you use '--patchall' if you want to patch ALL objects in your document and ignore that message. - + If no object identifiers are found in the document, you need to use '--update' for POSTing to occur. - + To upload objects with attachments, use the column titled "attachment" containing the path the file you wish to attach - + Platform: UNKNOWN diff --git a/wranglertools/get_field_info.py b/wranglertools/get_field_info.py index b5f466a4..aad00711 100755 --- a/wranglertools/get_field_info.py +++ b/wranglertools/get_field_info.py @@ -233,7 +233,7 @@ class FieldInfo(object): sheet_order = [ "User", "Award", "Lab", "Document", "ExperimentType", "Protocol", "Publication", "Organism", "IndividualMouse", "IndividualFly", "IndividualHuman", "FileFormat", "Vendor", "Enzyme", - "Construct", "TreatmentRnai", "TreatmentAgent", "GenomicRegion", "Target", + "Construct", "TreatmentRnai", "TreatmentAgent", "GenomicRegion", "Gene", "Target", "BioFeature", "Antibody", "Modification", "Image", "Biosource", "BiosampleCellCulture", "Biosample", "FileFastq", "FileProcessed", "FileReference", "FileCalibration", "FileSet", "FileSetCalibration", "MicroscopeSettingD1", "MicroscopeSettingD2", diff --git a/wranglertools/import_data.py b/wranglertools/import_data.py index 78e1bda6..b9ea687b 100755 --- a/wranglertools/import_data.py +++ b/wranglertools/import_data.py @@ -45,7 +45,7 @@ Defining Object type: Each "sheet" of the excel file is named after the object type you are uploading, with the format used on http://data.4dnucleome.org//profiles/ -Ex: ExperimentHiC, Biosample, Document, Target +Ex: ExperimentHiC, Biosample, Document, BioFeature If there is a single sheet that needs to be posted or patched, you can name the single sheet with the object name and use the '--type' argument From f4e354494ab9e42f72f8bef2cee5ee9bbbdfa29b Mon Sep 17 00:00:00 2001 From: aschroed Date: Wed, 8 May 2019 16:12:33 -0400 Subject: [PATCH 2/7] removed Target from sheet_order --- wranglertools/get_field_info.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wranglertools/get_field_info.py b/wranglertools/get_field_info.py index aad00711..efed5feb 100755 --- a/wranglertools/get_field_info.py +++ b/wranglertools/get_field_info.py @@ -233,7 +233,7 @@ class FieldInfo(object): sheet_order = [ "User", "Award", "Lab", "Document", "ExperimentType", "Protocol", "Publication", "Organism", "IndividualMouse", "IndividualFly", "IndividualHuman", "FileFormat", "Vendor", "Enzyme", - "Construct", "TreatmentRnai", "TreatmentAgent", "GenomicRegion", "Gene", "Target", "BioFeature", + "Construct", "TreatmentRnai", "TreatmentAgent", "GenomicRegion", "Gene", "BioFeature", "Antibody", "Modification", "Image", "Biosource", "BiosampleCellCulture", "Biosample", "FileFastq", "FileProcessed", "FileReference", "FileCalibration", "FileSet", "FileSetCalibration", "MicroscopeSettingD1", "MicroscopeSettingD2", From ad0db844d215afc5cbb22e0a176dfdb5aa0145ed Mon Sep 17 00:00:00 2001 From: aschroed Date: Thu, 9 May 2019 14:37:04 -0400 Subject: [PATCH 3/7] made some updates to docs and preset values to replace references to target with biofeature and gene --- doc/metadata_submission.md | 19 ++++++++++++++++++- doc/schema_info.md | 2 +- .../4DNWranglerTools.egg-info/PKG-INFO | 3 +-- wranglertools/get_field_info.py | 12 ++++++------ 4 files changed, 26 insertions(+), 10 deletions(-) diff --git a/doc/metadata_submission.md b/doc/metadata_submission.md index e7040270..489e4503 100644 --- a/doc/metadata_submission.md +++ b/doc/metadata_submission.md @@ -264,7 +264,24 @@ The scripts accepts the following parameters:. **To get the complete list of relevant sheets in one workbook:** - get_field_info --type Publication --type Document --type Vendor --type Protocol --type BiosampleCellCulture --type Biosource --type Enzyme --type Construct --type TreatmentChemical --type TreatmentRnai --type Modification --type Biosample --type FileFastq --type FileSet --type IndividualHuman --type IndividualMouse --type ExperimentHiC --type ExperimentCaptureC --type ExperimentRepliseq --type BioFeature --type GenomicRegion --type Gene --type ExperimentSet --type ExperimentSetReplicate --type Image --comments --outfile AllItems.xls + get_field_info --type all --comments --outfile AllItems.xls + + +**You can also generate the sheets needed for a particular type of experiment using pre-set options** + + get_field_info --type hic --comments --outfile HiCMetadata.xls + + Current presets include: + - hic for most types of Hi-C eg. in situ, dilution, single cell + - chipseq for ChIP-seq + - repliseq for 2-phase or multi-phase Repli-seq + - atacseq for ATAC-seq + - damid for DamID-seq + - chiapet for CHIA-Pet and PLAC-seq + - capturec for Capture Hi-C + - fish for RNA and DNA FISH + - spt for Single Particle Tracking Imaging experiments + ##Submission of metadata using the 4DN REST API The 4DN-DCIC metadata database can be accessed using a Hypertext-Transfer-Protocol-(HTTP)-based, Representational-state-transfer (RESTful) application programming interface (API) - aka the REST API. In fact, this API is used by the ```import_data``` script used to submit metadata entered into excel spreadsheets as described [in this document](https://docs.google.com/document/d/1Xh4GxapJxWXCbCaSqKwUd9a2wTiXmfQByzP0P8q5rnE). This API was developed by the [ENCODE][encode] project so if you have experience retrieving data from or submitting data to ENCODE use of the 4DN-DCIC API should be familiar to you. The REST API can be used both for data submission and data retrieval, typically using scripts written in your language of choice. Data objects exchanged with the server conform to the standard JavaScript Object Notation (JSON) format. Libraries written for use with your chosen language are typically used for the network connection, data transfer, and parsing of data (for example, requests and json, respectively for Python). For a good introduction to scripting data retrieval (using GET requests) you can refer to [this page](https://www.encodeproject.org/help/rest-api/) on the [ENCODE][encode] web site that also has a good introduction to viewing and understanding JSON formatted data. diff --git a/doc/schema_info.md b/doc/schema_info.md index 0cb2fd60..0b702a8a 100644 --- a/doc/schema_info.md +++ b/doc/schema_info.md @@ -39,7 +39,7 @@ software.json | Software | software(s) sop\_map.json | SopMap | sop-maps, sop\_map summary\_statistic.json | SummaryStatistic | summary-statistics, summary\_statistic summary\_statistic\_hi\_c.json | SummaryStatisticHiC | summary-statistics-hi-c, summary\_statistic\_hi\_c -treatment\_chemical.json | TreatmentChemical | treatments-chemical, treatment\_chemical +treatment\_agent.json | TreatmentAgent | treatments-agent, treatment\_agent treatment\_rnai.json | TreatmentRnai | treatments-rnai, treatment\_rnai user.json | User | user(s) vendor.json | Vendor | vendor(s) diff --git a/wranglertools/4DNWranglerTools.egg-info/PKG-INFO b/wranglertools/4DNWranglerTools.egg-info/PKG-INFO index 164a57ca..69457b9f 100644 --- a/wranglertools/4DNWranglerTools.egg-info/PKG-INFO +++ b/wranglertools/4DNWranglerTools.egg-info/PKG-INFO @@ -34,8 +34,7 @@ Description: *Full list* ~~~~ - python3 get_field_info.py --type Publication --type Document --type Vendor --type Protocol --type ProtocolsCellCulture --type Biosource --type Enzyme --type Construct --type TreatmentChemical --type TreatmentRnai --type Modification --type Biosample --type File --type FileSet --type IndividualHuman --type IndividualMouse --type ExperimentHiC --type ExperimentCaptureC --type BioFeature --type GenomicRegion --type ExperimentSet --type Image --outfile AllItems.xls --order - + python3 get_field_info.py --type all --outfile AllItems.xls ~~~~ *To get a single sheet use* ``` diff --git a/wranglertools/get_field_info.py b/wranglertools/get_field_info.py index efed5feb..7114a9f4 100755 --- a/wranglertools/get_field_info.py +++ b/wranglertools/get_field_info.py @@ -408,18 +408,18 @@ def get_sheet_names(types_list): else: presets = { 'hic': ["image", "filefastq", "experimenthic"], - 'chipseq': ["target", "antibody", "filefastq", "experimentseq"], + 'chipseq': ["gene", "biofeature", "antibody", "filefastq", "experimentseq"], 'repliseq': ["filefastq", "experimentrepliseq", "experimentset"], 'atacseq': ["enzyme", "filefastq", "experimentatacseq"], - 'damid': ["target", "filefastq", "fileprocessed", "experimentdamid"], - 'chiapet': ["target", "filefastq", "experimentchiapet"], - 'capturec': ["genomicregion", "target", "filefastq", "filereference", "experimentcapturec"], + 'damid': ["gene", "biofeature", "filefastq", "fileprocessed", "experimentdamid"], + 'chiapet': ["gene", "biofeature", "filefastq", "experimentchiapet"], + 'capturec': ["genomicregion", "biofeature", "filefastq", "filereference", "experimentcapturec"], 'fish': [ - "genomicregion", "target", "antibody", "microscopesettinga1", "filemicroscopy", + "genomicregion", "biofeature", "antibody", "microscopesettinga1", "filemicroscopy", "filereference", "fileprocessed", "imagingpath", "experimentmic", ], 'spt': [ - "target", "modification", "microscopesettinga2", + "gene", "biofeature", "modification", "microscopesettinga2", "fileprocessed", "imagingpath", "experimentmic", ]} for key in presets.keys(): From 48699c0b3fe463c288f694f7d36a96e4d4978e41 Mon Sep 17 00:00:00 2001 From: aschroed Date: Thu, 9 May 2019 16:52:39 -0400 Subject: [PATCH 4/7] Adding option and functionality so import-item fields are excluded for non-admins --- wranglertools/get_field_info.py | 91 +++++++++++++++++---------------- 1 file changed, 48 insertions(+), 43 deletions(-) diff --git a/wranglertools/get_field_info.py b/wranglertools/get_field_info.py index 7114a9f4..b19a114d 100755 --- a/wranglertools/get_field_info.py +++ b/wranglertools/get_field_info.py @@ -86,6 +86,10 @@ def getArgs(): # pragma: no cover action='store_true', help="will skip attribution prompt \ needed for automated submissions") + parser.add_argument('--noadmin', + default=False, + action='store_true', + help="Will set an admin user to non-admin for generating sheets") args = parser.parse_args() return args @@ -127,6 +131,7 @@ def __init__(self, key4dn): self.user = me_page['@id'] self.email = me_page['email'] self.check = True + self.admin = True if 'admin' in me_page.get('groups', []) else False except: print('Can not establish connection, please check your keys') me_page = {} @@ -213,6 +218,7 @@ class FieldInfo(object): comm = attr.ib(default=u'') enum = attr.ib(default=u'') + # additional fields for experiment sheets to capture experiment_set related information exp_set_addition = [FieldInfo('*replicate_set', 'Item:ExperimentSetReplicate', 3, 'Grouping for replicate experiments'), FieldInfo('*bio_rep_no', 'integer', 4, 'Biological replicate number'), @@ -221,14 +227,6 @@ class FieldInfo(object): # 'Grouping for non-replicate experiments') ] -fetch_items = { - "Document": "document", - "Protocol": "protocol", - "Enzyme": "enzyme", - "Biosource": "biosource", - "Publication": "publication", - "Vendor": "vendor" - } sheet_order = [ "User", "Award", "Lab", "Document", "ExperimentType", "Protocol", "Publication", "Organism", @@ -242,7 +240,7 @@ class FieldInfo(object): "ExperimentCaptureC", "ExperimentRepliseq", "ExperimentAtacseq", "ExperimentChiapet", "ExperimentDamid", "ExperimentSeq", "ExperimentTsaseq", "ExperimentSet", "ExperimentSetReplicate", "WorkflowRunSbg", "WorkflowRunAwsem", "OntologyTerm" - ] +] file_types = [i for i in sheet_order if i.startswith('File') and not i.startswith('FileSet')] file_types.remove('FileFormat') @@ -281,41 +279,45 @@ def dotted_field_name(field_name, parent_name=None): def build_field_list(properties, required_fields=None, include_description=False, - include_comment=False, include_enums=False, parent='', is_submember=False): + include_comment=False, include_enums=False, parent='', is_submember=False, admin=False): fields = [] for name, props in properties.items(): is_member_of_array_of_objects = False - if not props.get('calculatedProperty', False): - if 'submit4dn' not in props.get('exclude_from', [""]): - if is_subobject(props): - if get_field_type(props).startswith('array'): - is_member_of_array_of_objects = True - fields.extend(build_field_list(props['items']['properties'], - required_fields, - include_description, - include_comment, - include_enums, - name, - is_member_of_array_of_objects) - ) - else: - field_name = dotted_field_name(name, parent) - if required_fields is not None: - if field_name in required_fields: - field_name = '*' + field_name - field_type = get_field_type(props) - if is_submember: - field_type = "array of embedded objects, " + field_type - desc = '' if not include_description else props.get('description', '') - comm = '' if not include_comment else props.get('comment', '') - enum = '' if not include_enums else props.get('enum', '') - lookup = props.get('lookup', 500) # field ordering info - # if array of string with enum - if field_type == "array of strings": - sub_props = props.get('items', '') - enum = '' if not include_enums else sub_props.get('enum', '') - # copy paste exp set for ease of keeping track of different types in experiment objects - fields.append(FieldInfo(field_name, field_type, lookup, desc, comm, enum)) + if props.get('calculatedProperty'): + continue + if 'submit4dn' in props.get('exclude_from', []): + continue + if ('import_items' in props.get('permission', []) and not admin): + continue + if is_subobject(props): + if get_field_type(props).startswith('array'): + is_member_of_array_of_objects = True + fields.extend(build_field_list(props['items']['properties'], + required_fields, + include_description, + include_comment, + include_enums, + name, + is_member_of_array_of_objects) + ) + else: + field_name = dotted_field_name(name, parent) + if required_fields is not None: + if field_name in required_fields: + field_name = '*' + field_name + field_type = get_field_type(props) + if is_submember: + field_type = "array of embedded objects, " + field_type + desc = '' if not include_description else props.get('description', '') + comm = '' if not include_comment else props.get('comment', '') + enum = '' if not include_enums else props.get('enum', '') + lookup = props.get('lookup', 500) # field ordering info + # if array of string with enum + if field_type == "array of strings": + sub_props = props.get('items', '') + enum = '' if not include_enums else sub_props.get('enum', '') + # copy paste exp set for ease of keeping track of different types in experiment objects + fields.append(FieldInfo(field_name, field_type, lookup, desc, comm, enum)) return fields @@ -348,7 +350,8 @@ def get_uploadable_fields(connection, types, include_description=False, required_fields, include_description, include_comments, - include_enums) + include_enums, + admin=connection.admin) if name.startswith('Experiment') and not name.startswith('ExperimentSet') and name != 'ExperimentType': fields[name].extend(exp_set_addition) if 'extra_files' in properties: @@ -429,7 +432,7 @@ def get_sheet_names(types_list): lowercase_types += [ 'protocol', 'publication', 'biosource', 'biosample', 'biosamplecellculture', 'image', 'experimentsetreplicate' - ] + ] sheets = [sheet for sheet in sheet_order if sheet.lower() in lowercase_types] for name in types_list: modified_name = name.lower().replace('-', '').replace('_', '') @@ -444,6 +447,8 @@ def main(): # pragma: no cover if key.error: sys.exit(1) connection = FDN_Connection(key) + if args.noadmin: + connection.admin = False sheets = get_sheet_names(args.type) fields = get_uploadable_fields(connection, sheets, args.descriptions, args.comments, args.enums) From bcd2ea4a4d50bf8bd896edc73f7556a9d9224720 Mon Sep 17 00:00:00 2001 From: aschroed Date: Fri, 10 May 2019 14:53:40 -0400 Subject: [PATCH 5/7] Added some tests for new functionality; while doing so found a couple of bugs in build_field_list function having to do with embedded objects that were not arrays and 'array of string' field_type typo --- tests/conftest.py | 60 +++++++++++++++++++++++++++++++-- tests/test_get_field_info.py | 21 ++++++++++-- wranglertools/get_field_info.py | 37 ++++++++++++++------ 3 files changed, 102 insertions(+), 16 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 7f8044a5..fc9678c9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,6 +2,7 @@ import pytest from wranglertools.get_field_info import FDN_Key, FDN_Connection + class MockedResponse(object): def __init__(self, json, status): self._json = json @@ -19,6 +20,7 @@ def __init__(self, key4dn): self.award = 'test_award' self.labs = ['test_lab'] self.email = 'test@test.test' + self.admin = False def set_award(self, lab, dontPrompt=False): self.award = 'test_award' @@ -100,6 +102,21 @@ def connection_fake(): @pytest.fixture(scope="module") def item_properties(): return {'@id': {'calculatedProperty': True, 'title': 'ID', 'type': 'string'}, + "accession": { + "serverDefault": "accession", + "title": "Accession", + "permission": "import_items", + "description": "A unique identifier to be used to reference the object.", + "format": "accession", + "internal_comment": "Only admins are allowed to set or update this value.", + "type": "string", + "accessionType": "SR"}, + "schema_version": { + + "pattern": "^\\d+(\\.\\d+)*$", + "title": "Schema Version", + "default": "2", + "type": "string"}, '@type': {'calculatedProperty': True, 'items': {'type': 'string'}, 'title': 'Type', @@ -150,7 +167,11 @@ def item_properties(): 'pattern': '^\\d+(\\.\\d+)*$', 'requestMethod': [], 'title': 'Schema Version', - 'type': 'string'}, + 'type': 'string', + "exclude_from": [ + "submit4dn", + "FFedit-create" + ]}, 'start_date': {'anyOf': [{'format': 'date-time'}, {'format': 'date'}], 'comment': 'Date can be submitted as YYYY-MM-DD or ' 'YYYY-MM-DDTHH:MM:SSTZD (TZD is the time zone ' @@ -166,6 +187,7 @@ def item_properties(): 'replaced', 'released', 'revoked'], + 'suggested_enum': ['awesome'], 'title': 'Status', 'type': 'string'}, 'title': {'description': 'The grant name from the NIH database, if ' @@ -179,7 +201,8 @@ def item_properties(): 'format': 'uri', 'rdfs:subPropertyOf': 'rdfs:seeAlso', 'title': 'URL', - 'type': 'string'}, + 'type': 'string', + 'suggested_enum': ['https://www.test.com', 'https://www.example.com']}, 'uuid': {'format': 'uuid', 'requestMethod': 'POST', 'serverDefault': 'uuid4', @@ -189,7 +212,38 @@ def item_properties(): 'the user has permission to view.', 'enum': ['4DN', 'Not 4DN'], 'title': 'View access group', - 'type': 'string'}} + 'type': 'string'}, + "file_format_specification": { + "type": "object", + "properties": { + "download": { + "title": "File Name", + "description": "File Name of the attachment.", + "type": "string" + }, + "href": { + "internal_comment": "Internal webapp URL for document file", + "title": "href", + "description": "Path to download the file attached to this Item.", + "type": "string" + } + }, + "title": "File format specification", + "description": "Text or pdf file that further explains the file format", + "formInput": "file", + "ff_flag": "clear clone", + "lookup": 1}, + "guide_rnas": { + "description": "The guide RNA sequences used in Crispr targetting.", + "type": "array", + "items": { + "title": "Guide RNA", + "description": "Sequence of the guide RNA - submit as DNA (i.e. T not U) can include the PAM motif that is not actually part of the transcribed target and should not include the tracrRNA so that the sequence submitted reflects the genomic sequence", + "type": "string", + "pattern": "^[ATGCN]+$" + }, + "lookup": 60, + "title": "Guide RNAs"}} @pytest.fixture diff --git a/tests/test_get_field_info.py b/tests/test_get_field_info.py index 36da50b4..d48f9c75 100644 --- a/tests/test_get_field_info.py +++ b/tests/test_get_field_info.py @@ -252,16 +252,33 @@ def test_dotted_field_name_no_parent(): def test_build_field_list(item_properties): field_list = gfi.build_field_list(item_properties, required_fields=["title", "pi"]) assert field_list - assert len(field_list) == 13 + assert len(field_list) == 15 names = [i.name for i in field_list] assert '*title' in names -def test_build_field_list_gets_enum(item_properties): +def test_build_field_list_excludes_from_and_skip_import_items(item_properties): + field_list = gfi.build_field_list(item_properties) + assert not [field for field in field_list if field.name == 'schema_version'] # exclude_from + assert not [field for field in field_list if field.name == 'accession'] # import_items + + +def test_build_field_list_does_not_skip_import_items_if_admin(item_properties): + field_list = gfi.build_field_list(item_properties, admin=True) + assert not [field for field in field_list if field.name == 'schema_version'] # exclude_from + assert [field for field in field_list if field.name == 'accession'] # import_items + + +def test_build_field_list_gets_enum_or_suggested_enum(item_properties): field_list = gfi.build_field_list(item_properties, include_enums=True) for field in field_list: if field.name == "project": assert ['4DN', 'External'] == field.enum + if field.name == "url": + assert ['https://www.test.com', 'https://www.example.com'] == field.enum + if field.name == "status": + assert 'awesome' not in field.enum + assert 'current' in field.enum field_list = gfi.build_field_list(item_properties) for field in field_list: diff --git a/wranglertools/get_field_info.py b/wranglertools/get_field_info.py index b19a114d..59bd3c0d 100755 --- a/wranglertools/get_field_info.py +++ b/wranglertools/get_field_info.py @@ -265,6 +265,8 @@ def get_field_type(field): def is_subobject(field): + if field.get('type') == 'object': + return True try: return field['items']['type'] == 'object' except: @@ -292,14 +294,23 @@ def build_field_list(properties, required_fields=None, include_description=False if is_subobject(props): if get_field_type(props).startswith('array'): is_member_of_array_of_objects = True - fields.extend(build_field_list(props['items']['properties'], - required_fields, - include_description, - include_comment, - include_enums, - name, - is_member_of_array_of_objects) - ) + fields.extend(build_field_list(props['items']['properties'], + required_fields, + include_description, + include_comment, + include_enums, + name, + is_member_of_array_of_objects) + ) + else: + fields.extend(build_field_list(props['properties'], + required_fields, + include_description, + include_comment, + include_enums, + name, + is_member_of_array_of_objects) + ) else: field_name = dotted_field_name(name, parent) if required_fields is not None: @@ -310,12 +321,16 @@ def build_field_list(properties, required_fields=None, include_description=False field_type = "array of embedded objects, " + field_type desc = '' if not include_description else props.get('description', '') comm = '' if not include_comment else props.get('comment', '') - enum = '' if not include_enums else props.get('enum', '') + enum = '' + if include_enums: + enum = props.get('enum') if 'enum' in props else props.get('suggested_enum', '') lookup = props.get('lookup', 500) # field ordering info # if array of string with enum - if field_type == "array of strings": + if field_type == "array of string": sub_props = props.get('items', '') - enum = '' if not include_enums else sub_props.get('enum', '') + enum = '' + if include_enums: + enum = sub_props.get('enum') if 'enum' in sub_props else sub_props.get('suggested_enum', '') # copy paste exp set for ease of keeping track of different types in experiment objects fields.append(FieldInfo(field_name, field_type, lookup, desc, comm, enum)) return fields From 2005ec58553a53119d32cb2f9dd7a87e6a3aac0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Mon, 13 May 2019 14:27:22 -0400 Subject: [PATCH 6/7] take out last target mentions --- tests/conftest.py | 2 +- wranglertools/import_data.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index fc9678c9..d802d348 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -363,7 +363,7 @@ def returned_experiment_set_schema(): @pytest.fixture def returned_vendor_items(): - data = {'@id': '/search/?type=Vendor&limit=all&frame=object', 'sort': {'label': {'order': 'asc', 'missing': '_last', 'ignore_unmapped': True}, 'date_created': {'order': 'desc', 'ignore_unmapped': True}}, 'columns': {'@id': 'ID', 'aliases': 'Lab aliases', 'name': 'name', 'description': 'Description', 'title': 'Name'}, 'clear_filters': '/search/?type=Vendor', '@context': '/terms/', 'views': [{'href': '/report/?type=Vendor&limit=all&frame=object', 'title': 'View tabular report', 'icon': 'table'}], 'notification': 'Success', 'filters': [{'field': 'type', 'term': 'Vendor', 'remove': '/search/?limit=all&frame=object'}], '@type': ['Search'], '@graph': [{'url': 'https://www.thermofisher.com/us/en/home/brands/thermo-scientific.html#/legacy=www.fermentas.com', '@id': '/vendors/thermofisher-scientific/', 'aliases': [], 'status': 'in review by lab', 'description': 'previously also Fermentas', 'award': '/awards/1U01CA200059-01/', 'uuid': 'b31106bc-8535-4448-903e-854af460b21f', 'lab': '/labs/4dn-dcic-lab/', 'date_created': '2016-12-08T18:31:47.847660+00:00', '@type': ['Vendor', 'Item'], 'schema_version': '1', 'title': 'ThermoFisher Scientific', 'name': 'thermofisher-scientific', 'submitted_by': '/users/986b362f-4eb6-4a9c-8173-3ab267307e3a/'}, {'url': 'https://www.neb.com', '@id': '/vendors/new-england-biolabs/', 'aliases': [], 'status': 'in review by lab', 'description': '', 'award': '/awards/1U01CA200059-01/', 'uuid': 'b31106bc-8535-4448-903e-854af460b21e', 'lab': '/labs/4dn-dcic-lab/', 'date_created': '2016-12-08T18:31:47.824418+00:00', '@type': ['Vendor', 'Item'], 'schema_version': '1', 'title': 'New England Biolabs', 'name': 'new-england-biolabs', 'submitted_by': '/users/986b362f-4eb6-4a9c-8173-3ab267307e3a/'}, {'url': 'http://www.worthington-biochem.com', '@id': '/vendors/worthington-biochemical/', 'aliases': [], 'status': 'in review by lab', 'description': '', 'award': '/awards/1U01CA200059-01/', 'uuid': 'b31106bc-8535-4448-903e-854af460b21d', 'lab': '/labs/4dn-dcic-lab/', 'date_created': '2016-12-08T18:31:47.807726+00:00', '@type': ['Vendor', 'Item'], 'schema_version': '1', 'title': 'Worthington Biochemical', 'name': 'worthington-biochemical', 'submitted_by': '/users/986b362f-4eb6-4a9c-8173-3ab267307e3a/'}], 'title': 'Search', 'total': 3, 'facets': [{'total': 3, 'title': 'Data Type', 'field': 'type', 'terms': [{'key': 'Vendor', 'doc_count': 3}, {'key': 'AccessKey', 'doc_count': 0}, {'key': 'AnalysisStep', 'doc_count': 0}, {'key': 'Award', 'doc_count': 0}, {'key': 'Biosample', 'doc_count': 0}, {'key': 'BiosampleCellCulture', 'doc_count': 0}, {'key': 'Biosource', 'doc_count': 0}, {'key': 'Construct', 'doc_count': 0}, {'key': 'Document', 'doc_count': 0}, {'key': 'Enzyme', 'doc_count': 0}, {'key': 'Experiment', 'doc_count': 0}, {'key': 'ExperimentCaptureC', 'doc_count': 0}, {'key': 'ExperimentHiC', 'doc_count': 0}, {'key': 'ExperimentRepliseq', 'doc_count': 0}, {'key': 'File', 'doc_count': 0}, {'key': 'FileFasta', 'doc_count': 0}, {'key': 'FileFastq', 'doc_count': 0}, {'key': 'FileProcessed', 'doc_count': 0}, {'key': 'FileReference', 'doc_count': 0}, {'key': 'FileSet', 'doc_count': 0}, {'key': 'Individual', 'doc_count': 0}, {'key': 'IndividualMouse', 'doc_count': 0}, {'key': 'Lab', 'doc_count': 0}, {'key': 'Modification', 'doc_count': 0}, {'key': 'Ontology', 'doc_count': 0}, {'key': 'OntologyTerm', 'doc_count': 0}, {'key': 'Organism', 'doc_count': 0}, {'key': 'Publication', 'doc_count': 0}, {'key': 'Software', 'doc_count': 0}, {'key': 'SopMap', 'doc_count': 0}, {'key': 'Target', 'doc_count': 0}, {'key': 'Treatment', 'doc_count': 0}, {'key': 'TreatmentChemical', 'doc_count': 0}, {'key': 'TreatmentRnai', 'doc_count': 0}, {'key': 'User', 'doc_count': 0}, {'key': 'Workflow', 'doc_count': 0}, {'key': 'WorkflowRun', 'doc_count': 0}]}, {'total': 3, 'title': 'Audit category: DCC ACTION', 'field': 'audit.INTERNAL_ACTION.category', 'terms': [{'key': 'mismatched status', 'doc_count': 0}, {'key': 'validation error', 'doc_count': 0}, {'key': 'validation error: run_status', 'doc_count': 0}]}]} + data = {'@id': '/search/?type=Vendor&limit=all&frame=object', 'sort': {'label': {'order': 'asc', 'missing': '_last', 'ignore_unmapped': True}, 'date_created': {'order': 'desc', 'ignore_unmapped': True}}, 'columns': {'@id': 'ID', 'aliases': 'Lab aliases', 'name': 'name', 'description': 'Description', 'title': 'Name'}, 'clear_filters': '/search/?type=Vendor', '@context': '/terms/', 'views': [{'href': '/report/?type=Vendor&limit=all&frame=object', 'title': 'View tabular report', 'icon': 'table'}], 'notification': 'Success', 'filters': [{'field': 'type', 'term': 'Vendor', 'remove': '/search/?limit=all&frame=object'}], '@type': ['Search'], '@graph': [{'url': 'https://www.thermofisher.com/us/en/home/brands/thermo-scientific.html#/legacy=www.fermentas.com', '@id': '/vendors/thermofisher-scientific/', 'aliases': [], 'status': 'in review by lab', 'description': 'previously also Fermentas', 'award': '/awards/1U01CA200059-01/', 'uuid': 'b31106bc-8535-4448-903e-854af460b21f', 'lab': '/labs/4dn-dcic-lab/', 'date_created': '2016-12-08T18:31:47.847660+00:00', '@type': ['Vendor', 'Item'], 'schema_version': '1', 'title': 'ThermoFisher Scientific', 'name': 'thermofisher-scientific', 'submitted_by': '/users/986b362f-4eb6-4a9c-8173-3ab267307e3a/'}, {'url': 'https://www.neb.com', '@id': '/vendors/new-england-biolabs/', 'aliases': [], 'status': 'in review by lab', 'description': '', 'award': '/awards/1U01CA200059-01/', 'uuid': 'b31106bc-8535-4448-903e-854af460b21e', 'lab': '/labs/4dn-dcic-lab/', 'date_created': '2016-12-08T18:31:47.824418+00:00', '@type': ['Vendor', 'Item'], 'schema_version': '1', 'title': 'New England Biolabs', 'name': 'new-england-biolabs', 'submitted_by': '/users/986b362f-4eb6-4a9c-8173-3ab267307e3a/'}, {'url': 'http://www.worthington-biochem.com', '@id': '/vendors/worthington-biochemical/', 'aliases': [], 'status': 'in review by lab', 'description': '', 'award': '/awards/1U01CA200059-01/', 'uuid': 'b31106bc-8535-4448-903e-854af460b21d', 'lab': '/labs/4dn-dcic-lab/', 'date_created': '2016-12-08T18:31:47.807726+00:00', '@type': ['Vendor', 'Item'], 'schema_version': '1', 'title': 'Worthington Biochemical', 'name': 'worthington-biochemical', 'submitted_by': '/users/986b362f-4eb6-4a9c-8173-3ab267307e3a/'}], 'title': 'Search', 'total': 3, 'facets': [{'total': 3, 'title': 'Data Type', 'field': 'type', 'terms': [{'key': 'Vendor', 'doc_count': 3}, {'key': 'AccessKey', 'doc_count': 0}, {'key': 'AnalysisStep', 'doc_count': 0}, {'key': 'Award', 'doc_count': 0}, {'key': 'Biosample', 'doc_count': 0}, {'key': 'BiosampleCellCulture', 'doc_count': 0}, {'key': 'Biosource', 'doc_count': 0}, {'key': 'Construct', 'doc_count': 0}, {'key': 'Document', 'doc_count': 0}, {'key': 'Enzyme', 'doc_count': 0}, {'key': 'Experiment', 'doc_count': 0}, {'key': 'ExperimentCaptureC', 'doc_count': 0}, {'key': 'ExperimentHiC', 'doc_count': 0}, {'key': 'ExperimentRepliseq', 'doc_count': 0}, {'key': 'File', 'doc_count': 0}, {'key': 'FileFasta', 'doc_count': 0}, {'key': 'FileFastq', 'doc_count': 0}, {'key': 'FileProcessed', 'doc_count': 0}, {'key': 'FileReference', 'doc_count': 0}, {'key': 'FileSet', 'doc_count': 0}, {'key': 'Individual', 'doc_count': 0}, {'key': 'IndividualMouse', 'doc_count': 0}, {'key': 'Lab', 'doc_count': 0}, {'key': 'Modification', 'doc_count': 0}, {'key': 'Ontology', 'doc_count': 0}, {'key': 'OntologyTerm', 'doc_count': 0}, {'key': 'Organism', 'doc_count': 0}, {'key': 'Publication', 'doc_count': 0}, {'key': 'Software', 'doc_count': 0}, {'key': 'SopMap', 'doc_count': 0}, {'key': 'BioFeature', 'doc_count': 0}, {'key': 'Treatment', 'doc_count': 0}, {'key': 'TreatmentChemical', 'doc_count': 0}, {'key': 'TreatmentRnai', 'doc_count': 0}, {'key': 'User', 'doc_count': 0}, {'key': 'Workflow', 'doc_count': 0}, {'key': 'WorkflowRun', 'doc_count': 0}]}, {'total': 3, 'title': 'Audit category: DCC ACTION', 'field': 'audit.INTERNAL_ACTION.category', 'terms': [{'key': 'mismatched status', 'doc_count': 0}, {'key': 'validation error', 'doc_count': 0}, {'key': 'validation error: run_status', 'doc_count': 0}]}]} return MockedResponse(data, 200) diff --git a/wranglertools/import_data.py b/wranglertools/import_data.py index b9ea687b..64638b39 100755 --- a/wranglertools/import_data.py +++ b/wranglertools/import_data.py @@ -51,8 +51,8 @@ with the object name and use the '--type' argument Ex: %(prog)s mydata.xsls --type ExperimentHiC -The header of each sheet should be the names of the fields. -Ex: award, lab, target, etc. +The name of each sheet should be the names of the fields. +Ex: Award, Lab, BioFeature, etc. To upload objects with attachments, use the column titled "attachment" containing the full path to the file you wish to attach From f792dcebacb82187a3071c17518286c145dba84c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Mon, 13 May 2019 14:30:07 -0400 Subject: [PATCH 7/7] doc correction --- wranglertools/import_data.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/wranglertools/import_data.py b/wranglertools/import_data.py index 64638b39..83c75638 100755 --- a/wranglertools/import_data.py +++ b/wranglertools/import_data.py @@ -51,9 +51,12 @@ with the object name and use the '--type' argument Ex: %(prog)s mydata.xsls --type ExperimentHiC -The name of each sheet should be the names of the fields. +The name of each sheet should be the names of the object type. Ex: Award, Lab, BioFeature, etc. +The column names on the sheets should be the field names +Ex: aliases, experiment_type, etc. + To upload objects with attachments, use the column titled "attachment" containing the full path to the file you wish to attach