In [31]:
# *******************************************************
#                   Name Dictionary: 
# *******************************************************

# Create contact/permissions/file information file
#       a) initial basic information for run 
#       b) dictionary to populate sciencebase items and header of download status file
#       c) erddap type/explanation dictionary to reference
#       d) sciencebase user read/write permissions      

import json
import pprint

nfdict = {}


#  path to save this dictionary file (should go in a working directory)

tempdir = "/xxxxxxxx/xxxxxxxx/xxxxxxx/"
#tempdir = "/Users/twellman/Documents/BCB_data_projects/OBIS_usa_database/erddap_MBON_test2/"
#tempdir = "/Users/twellman/Documents/BCB_data_projects/OBIS_usa_database/PacIOOS/"
#tempdir = "/Users/twellman/Documents/BCB_data_projects/OBIS_usa_database/NOAA/"


# ****************************************************************************
# ****************************************************************************


# section to adjust specs as needed

# initialize this dictionary
nfdict['init'] ={}

#  set username for ScienceBase login
nfdict['init']['login_name'] = 'twellman@usgs.gov'

#  Name of download status file (json), existing file or one to create, depends on ['url_flag'] below
nfdict['init']['sb_json'] = "file_sb.json"
                         
#  Base ERDDAP data server url - to "/erddap" level
#nfdict['init']['baseurl']  = 'http://gcoos4.tamu.edu:8080/erddap'
#nfdict['init']['baseurl']  = "http://oos.soest.hawaii.edu/erddap"
nfdict['init']['baseurl']  = "http://coastwatch.pfeg.noaa.gov/erddap"
    
#  Content retrieval requests,
includes datasets, metadata, and other information 
nfdict['init']['dataproc'] = [".nc",".ncHeader", ".csv"]    
nfdict['init']['metafiles'] =  [".iso19115"]
nfdict['init']['table_info'] =   ["Dataset ID", "Title", "Institution" ,"Summary", "Background Info","Info",'RSS']
    
#  Sciencebase OpenDap weblinks to show in ScienceBase item
nfdict['init']['webnames'] = ['.html','.csv','.nc','.ncHeader','.json','.fgdc','.geoJson','.iso19115']

# project provenance label
# nfdict[u'prov_state'] = 'GCOOS MBON data retrieve'
# nfdict[u'prov_state'] = 'PacIOOS data retrieve'
nfdict[u'prov_state'] = 'NOAA data retrieve'

#  whether to use existing search (advanced search url) in status file  
nfdict['init']['url_flag'] = True
    
# whether to repeat data retrievals (proc) or data uploads to ScienceBase (sb)
# purge_proc = True will repeat data retrievals, switch to False after one run for append mode.
# purge_sb = True will reload ScienceBase files (add or update), False will only add new files.
# Using 'skip' will skip either process entirely.  
nfdict['init']['purge_proc'] = False
nfdict['init']['purge_sb'] = False



# ****************************************************************************
# ****************************************************************************

# hardwired specs - no need to modify - work in progress


#  File format flags - HARDWIRED, work in progress
nfdict['init']['infotype'] = '.json'   # format to read dataset information 
nfdict['init']['linktype'] = '.html'   # format to show data resources
nfdict['init']['dataform'] = 'dframe'  # format to display data (Pandas dataframe) 

# create erddap file type/explanation dictionary

fnames =['.asc','.csv','.csvp','.csv0','.das','.dds','.dods','.esriCsv','.fgdc',
         '.geoJson','.graph','.help','.html','.htmlTable','.iso19115','.json',
         '.mat','.nc','.ncHeader','.ncCF','.ncCFHeader','.ncCFMA','.ncCFMAHeader',
         '.odvTxt','.subset','.tsv','.tsvp','.tsv0','.xhtml','.graph','.subset']

describe = [' OPeNDAP-style comma-separated ASCII text',
    'Comma-separated value file (ISO 8601)',
    'Comma separated value file (name(units))',
    'Comma separated value file (no names/units)',
    'OPeNDAP Dataset Attribute Structure (DAS)',
    'OPeNDAP Dataset Descriptor Structure (DDS)',
    'OPeNDAP clients, download data DODS format',
    'Comma separated value file ESRI ArcGIS 9.x',
    'FGDC .xml metadata',
    'GeoJSON .json file',
    'View a Make A Graph web page',
    'Description of tabledap',
    'OPeNDAP-style HTML Data Access Form',
    'html data web page',
    'ISO 19115-2 .xml metadata',
    'JSON file',
    'Download a MATLAB binary file.',
    ' NetCDF-3 binary file (COARDS/CF/ACDD)',
    'Netcdf header (metadata)',
    'NetCDF-3 CF DSG file',
    'NetCDF header (metadata)',
    'NetCDF-3 CF DSG file',
    'Header (the metadata) for the .ncCFMA file',
    'ODV Generic Spreadsheet File (.txt)',
    'HTML form for subsetting data',
    'Tab-separated ASCII text table ( ISO 8601 times)',
    'Tab-separated ASCII text table ( line 1: name (units))',
    'A .tsv file without column names or units',
    'XHTML (XML) file with the data in a table',
    'OPeNDAP raw data graphical processing',
    'OPeNDAP raw data subsetting']

nfdict['file_dict'] = dict(zip(fnames, describe ))

# read/write users permissions
nfdict[u'write_names'] = [u'USER:twellman@usgs.gov', u"USER:albenson@usgs.gov", u"USER:sbristol@usgs.gov", u"USER:saulenbach@usgs.gov"]
nfdict[u'read_names'] = [u'USER:twellman@usgs.gov', u"USER:albenson@usgs.gov", u"USER:sbristol@usgs.gov", u"USER:saulenbach@usgs.gov"]

# date key-label information for erddap and sciencebase
nfdict['date_keys'] = ["time_coverage_start", "time_coverage_end" ]
nfdict['date_labels'] = [u"Data record (start)", u"Data record (end)"]

# contact key-label information for erddap and sciencebase
nfdict['erddap_contype'] = ['pointOfContact','originator','processor','publisher']
nfdict['sb_contype'] = [u'Point of Contact', u'Originator', u'Processor', u'Publisher']
nfdict['info_keys'] = ['type', 'name', 'email','url', 'institution', 'phone', 'role']
nfdict['label_keys'] = [u'contactType', u'name', u'email', u'onlineResource', u'organization', u'phone', u'role']

# bcb obis node manager info
nfdict[u'BCB_contact'] = {u'active': True,
      u'contactType': u'person',
      u'email': u'albenson@usgs.gov',
      u'firstName': u'Abigail',
      u'jobTitle': u'Biologist',
      u'lastName': u'Benson',
      u'middleName': u'L',
      u'name': u'Abigail L Benson',
      u'oldPartyId': 23179,
      u'organization': {u'displayText': u'Scientific Data Integration and Visualization'},
      u'primaryLocation': {u'building': u'DFC Bldg 810',
      u'buildingCode': u'KBT',
      u'faxPhone': u'3032024229',
      u'mailAddress': {u'city': u'Denver',
      u'country': u'USA',
      u'line1': u'Box 25046, Denver Federal Center, Mail Stop 306',
      u'mailStopCode': u'306',
      u'state': u'CO',
      u'zip': u'80225-0046'},
      u'name': u'Abigial L Benson/GIO/USGS/DOI - Primary Location',
      u'streetAddress': {u'city': u'Lakewood',
      u'country': u'US',
      u'line1': u'West 6th Ave. & Kipling St., DFC Bldg. 810',
      u'state': u'CO',
      u'zip': u'80225-0046'}},
      u'type': u'Point of Contact'}

# general structure + initial info for status file (sb_json) header information
nfdict[u'general information'] = [
                {u'purpose' : u'Data Distillery development: MBON (GCOOS) data downloads'},
                {u'file created (date-time)' : 'none'},
                {u'file modified (date-time)' : 'none'},
                {u'data base url': 'none'},
                {u'data search url': ''},
                {u'data search json': ''},
                {u'contacts': u'Abigail Benson, Tristan P. Wellman, Steve Aulenbach, Sky Bristol'},
                {u'email': u'albenson@usgs.gov, twellman@usgs.gov, saulenbach@usgs.gov, sbristol@usgs.gov'},
                {u'organization':u'U.S. Geological Survey, Core Science Analytics, and Synthesis (B.C.B.)'},
                {u'address': u'Denver Federal Center, Building 810, Lakewood, Colorado'},
                {u'processing uuid' : ''}]  

# old method, no longer used, formerly overwrote dataset information
#name_file_dict[u'publisher'] = {u'contactType': u'organization',
#       u'name': u'Marine Biodiversity Oservation Network',
#       u'onlineResource': u'http://oceanservice.noaa.gov/news/apr16/mbon.html',
#       u'organization': {u'displayText': u'Marine Biodiversity Oservation Network'},
#       u'primaryLocation': {},
#       u'type': u'Publisher'}
#name_file_dict[u'processor'] = {u'contactType': u'organization',
#       u'name': u'GCOOS: Gulf of Mexico Coastal Ocean Observing System',
#       u'onlineResource': u'http://gcoos.tamu.edu',
#       u'organization': {u'displayText': u'GCOOS: Gulf of Mexico Coastal Ocean Observing System'},
#       u'primaryLocation': {},
#       u'type': u'Processor'}


# end of inputs 

# ****************************************************************************
# ****************************************************************************


# save file and reload to check


# save file to working directory (tempdir)
with open(tempdir + "name_file_dict.json", 'w') as fp:
    json.dump(nfdict, fp, indent=4)

# reload file to examine
with open(tempdir + "name_file_dict.json") as fp:    
    nf_dict = json.load(fp)
pprint.pprint(nf_dict)

{u'BCB_contact': {u'active': True,
                  u'contactType': u'person',
                  u'email': u'albenson@usgs.gov',
                  u'firstName': u'Abigail',
                  u'jobTitle': u'Biologist',
                  u'lastName': u'Benson',
                  u'middleName': u'L',
                  u'name': u'Abigail L Benson',
                  u'oldPartyId': 23179,
                  u'organization': {u'displayText': u'Scientific Data Integration and Visualization'},
                  u'primaryLocation': {u'building': u'DFC Bldg 810',
                                       u'buildingCode': u'KBT',
                                       u'faxPhone': u'3032024229',
                                       u'mailAddress': {u'city': u'Denver',
                                                        u'country': u'USA',
                                                        u'line1': u'Box 25046, Denver Federal Center, Mail Stop 306',
                                       