In [None]:
# *******************************************************
#          <<<   Python ERDDAP Interface  >>>           #
#                                                       #
#      Functions:                                       #
#       (a) search by catagory, protocol, keyword       #
#       (b) request / process data from search / url    #
#       (c) examine/QC metadata and other information   #
#       (d) create, modify ScienceBase record           #
#                                                       #
#       Simple prototyping and tasks, for the moment    #
#                                                       #
#       T. Wellman, BCB, Core Sciences, USGS            #
#       W.I.P. << DRAFT >> components mostly etched out # 
#       Version 0.1 to 0.2   9/1/2016,  1/15/2017       #
# *******************************************************

# DRAFT NOTES (DIARY):

# This program attempts to strike a balance between using an
# ERDDAP server directly and working mainly within a Python
# environment for processing files and information to ScienceBase.
# The visual interface of ERDDAP is very intuitive and easy
# to use, so replacing it is not the primary goal. The 
# basic idea here is to interact with ERDDAP and automate
# some of the tedium involved in retrieving ERDDAP files, error 
# checking, and setting up or updating a ScienceBase item.

# A data search can be performed fresh or by repeating past searches
# using an "advanced search url" generated by ERDDAP. The input 
# information block (below) is presently used for processing options.
# Pan or search "Main Program" to find the notebook cell below for inputs. 
# The search url and general file paths are included in the "sb_json" file.
# A name dictionary holds custom information to populate Sciencebase records.
# The name dictionary is created beforehand (search "Name Dictionary")
# in this notebook. Both the name dictionary and sb_json file should be in the
# specified working directory (tempdir). See the input section with paths 
# and such to specify in the "Main Program". Inputs and such are a bit 
# hardwired for now.
 
# The "url_flag" shows whether (True, False) an existing url has been created. 
# The adv. url is in "General information" section of the "sb_json" file (below), 
# as the search url in json and html. It is easy to manually query an 
# external erddap to generate a url and piece together the "General information"  
# section manually to bypass the internal query process. Or just indicate False
# and use the internal functions to generate this information block automatically.
#  --> url_flag = True or False
    
# Purge functions indicate whether to overwrite content used in all processing (proc) or sciencbase (sb)
# This is useful in updating files, restarting runs after making code changes or connection failures.
# Set overall purge (purge_proc = True) ONCE then flip to False if restart is needed. 
# purge_sb can also use a 'reset' to reset only the upload status to ScienceBase.
#  -->  purge_proc = True or False
#  -->  purge_sb = True, False, or 'reset'

# Note currently uploading large files to ScienceBase can be an issue. The PYSB upload/replace functions
# may need some tweaks. Offsite uploads at my home were problematic likely due to slow upload speeds.
# There are options to upload large files to ScienceBase indicated on the website that have not been
# explored here at present. 

# Adjust main program inputs first and save changes, then run next four notebook cells below, in order

# This is a work in progress........ 




In [None]:
import numpy as np
import time
import pprint
import pysb
import re
import requests
import pandas as pd
from dateutil import parser
import datetime
import sys
import json
from os import path, makedirs, remove
from shutil import copyfileobj
from urllib import urlencode, quote 
from collections import OrderedDict
from IPython.core.display import display
from timeit import default_timer as timer
from lxml import etree, objectify

print('modules loaded')

In [None]:
# Simple dictionary of ERDDAP file types and associated explanations

fnames =['.asc','.csv','.csvp','.csv0','.das','.dds','.dods','.esriCsv','.fgdc',
         '.geoJson','.graph','.help','.html','.htmlTable','.iso19115','.json',
         '.mat','.nc','.ncHeader','.ncCF','.ncCFHeader','.ncCFMA','.ncCFMAHeader',
         '.odvTxt','.subset','.tsv','.tsvp','.tsv0','.xhtml','.graph','.subset']

describe = [' OPeNDAP-style comma-separated ASCII text',
    'Comma-separated value file (ISO 8601)',
    'Comma separated value file (name(units))',
    'Comma separated value file (no names/units)',
    'OPeNDAP Dataset Attribute Structure (DAS)',
    'OPeNDAP Dataset Descriptor Structure (DDS)',
    'OPeNDAP clients, download data DODS format',
    'Comma separated value file ESRI ArcGIS 9.x',
    'FGDC .xml metadata',
    'GeoJSON .json file',
    'View a Make A Graph web page',
    'Description of tabledap',
    'OPeNDAP-style HTML Data Access Form',
    'html data web page',
    'ISO 19115-2 .xml metadata',
    'JSON file',
    'Download a MATLAB binary file.',
    ' NetCDF-3 binary file (COARDS/CF/ACDD)',
    'Netcdf header (metadata)',
    'NetCDF-3 CF DSG file',
    'NetCDF header (metadata)',
    'NetCDF-3 CF DSG file',
    'Header (the metadata) for the .ncCFMA file',
    'ODV Generic Spreadsheet File (.txt)',
    'HTML form for subsetting data',
    'Tab-separated ASCII text table ( ISO 8601 times)',
    'Tab-separated ASCII text table ( line 1: name (units))',
    'A .tsv file without column names or units',
    'XHTML (XML) file with the data in a table',
    'OPeNDAP raw data graphical processing',
    'OPeNDAP raw data subsetting']

file_dict = dict(zip(fnames, describe ))
print('ERDDAP file dict loaded')

In [None]:
# *******************************************************
#    Definitions:  status = draft, work in progress 
# *******************************************************

# write xml to python dictionary using recursion
def xml_to_dict(xml_str):
    def xml_to_dict_recursion(xml_object):
        dict_object = xml_object.__dict__
        if not dict_object:  # if empty dict returned
            return xml_object
        for key, value in dict_object.items():
            dict_object[key] = xml_to_dict_recursion(value)
        return dict_object
    xml_obj = objectify.fromstring(xml_str)
    return {xml_obj.tag: xml_to_dict_recursion(xml_obj)}

# Adapt to Python v2+ ascii conversion issues (apparently fixed in python 3) 
def removeNonAscii(df_ascii): 
    df_ascii = df_ascii.apply(
        lambda x: ''.join([i if 32 < ord(i) < 126 else " " for i in x]))
    return df_ascii

# Basic stats on requested ERDDAP data table (super simple for now)
def get_stats(df_o):
    agencies, counts = np.unique(df_o.loc[df_o["Institution"] != ("???" or "")]["Institution"],return_counts=True)    
    agencies = np.append(agencies, "Unknown")
    counts = np.append(counts, df_o.shape[0] - np.sum(counts))
    stats = [df_o["griddap"].count(),
             np.count_nonzero(df_o["tabledap"]!=""),
             np.count_nonzero(df_o["griddap"]!=""),
             len(agencies)-1 ]
    print( "\n***** Query Results *****\n\nTotal number of datasets: %d\nNumber of tables: %d\nNumber of grids: %d\
        \nNumber of agency groups providing data: %d\n" % tuple(stats))
    header =  ["** Agency Groups ** ", "** Dataset count **"]
    a = max(len(str(max(agencies))),len(header[0]))
    c = max(len(str(max(counts))),len(header[0]))
    f  = '\t{0:<%d}\t{1:<%d}' % (a, c)
    print(f.format(header[0], header[1]))
    for p in zip(agencies, counts):
        print(f.format(p[0],p[1]))
    return

# Show table
def viewtable(df_view, cw, mr):
    pd.set_option('display.max_colwidth', cw)
    df_view.describe() 
    sys.stdout.write("\nDisplay limited to a maximmum of %d datasets:\n" % mr)
    df_view.columns.name = "Data Index"
    display(df_view.ix[0:mr-1]) # assume 0 start index
    return

# bulk data download options: all data, all tables, or all grids  - refine later **
def bulkoption(df):
    bulkdict = {}
    url = []
    opts = ["D", "T", "G", "C"]
    task = ["all datasets", "all tables", "all grids", "by category"]
    resource = [ "info" , "tabledap", "griddap", "categorize"]
    
    for r in resource:
        url.append(''.join(df.loc[df['Resource'] == r]['URL']))      
    combo = zip(resource, task, url)
    
    # combine search info into dictionary
    bulk_opts = zip(opts, combo)
    for o, t in bulk_opts:
        bulkdict[o] = t
    return bulkdict

# Generate url based on category and/or word/phrase search, 
# Note: doesn't include value constraints - incomplete, a bit clunky, refine later **
def build_url(srch_word, protocol, cat_table, cindx, subcategory):
    adv_url_html =  "{}{}".format(baseurl, "/search/advanced.html?")
    prot_dict = {}
    prot_dict = {"A" : "(any)", "G" : "griddap",
                 "W" : "WMS","T" : "tabledap"}
    val_dict = OrderedDict()
    val_dict = {"maxLat": "", "minLon": "", "maxLon": "",
                "minLat": "", "minTime": "", "maxTime": ""}
    udict = OrderedDict()
    udict["searchFor"] = srch_word
    udict["protocol"] = prot_dict[protocol]
    for c in cat_table.Categorize:
        udict[c] = "(any)"
    if subcategory !="": 
        udict[cat_table.get_value(cindx,"Categorize", takeable=False)] = subcategory
    url_dict = udict.copy()
    url_dict.update(val_dict)
    data = urlencode(url_dict)
    gen_url_html =  "{}{}".format(adv_url_html, data)
    quote(gen_url_html, safe='') 
    gen_url_json = gen_url_html
    gen_url_json = gen_url_json.replace(".html",".json")  
    return gen_url_html, gen_url_json

# Attempt url request, report error if encountered 
def url_request(url,rtype):
    try:
        if rtype == 'json':
            r = requests.get(url).json()
#            r.raise_for_status()
        else:
            r = requests.get(url, stream=True)
#            r.raise_for_status()
    except ValueError as e:  
        sys.stdout.write("\n{}{}\n\n{}".format('** Flag ** : ', e,"Search criteria could be too restrictive"))
        sys.stdout.write("Error: {}\n{}".format(e, r.status_code)) 
        err = 'failed'
    except requests.exceptions.Timeout as e:
        sys.stdout.write("\n{}".format("\n** Flag ** : system timeout - retry later")) 
        sys.stdout.write("Error: {}\n{}".format(e, r.status_code)) 
        err = 'failed'
    except requests.exceptions.HTTPError as e:
        sys.stdout.write("\n** Flag ** : http request error") 
        sys.stdout.write("Error: {}\n{}".format(e, r.status_code)) 
        err = 'failed'
    except requests.exceptions.RequestException as e:
        sys.stdout.write("\n** Flag ** : systematic exception error\nSearch url may need revision") 
        sys.stdout.write("Error: {}\n{}".format(e, r.status_code)) 
        err = 'failed'
    else:
        err = 'passed'
    return r, err

# evaluate whether file directory exists or intialize (read/create) download status file as json
def ensure_dir(directory, url_flag, nf, task):
    if task =='initialize':
        file_sb = "{}{}".format(directory, sb_json)
        if path.isfile(file_sb) and url_flag is not False:
            with open(file_sb) as json_data:  
                status = json.load(json_data)                
            if 'json' in status[0]['general information'][5]['data search json']:
                url_flag = True
            else:
                sys.stdout.write("\nNote: adv. search url was not found in sb_json (download status) file")
                url_flag = False
        else:
            sys.stdout.write("\nNote: sb_json (download status) file was not found in specified work folder")
            url_flag = False
            
        # if download status file is absent or disregarded then create, initialize, and overwrite new file   
        if url_flag == False:
            status = [{'general information': "none"},{'local_directory': tempdir},{'datasets (id)': {}}] 
            status[0]['general information'] = nf
            status[0]['general information'][3]['data base url'] = baseurl 
            date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
            status[0]['general information'][1]['file created (date-time)'] = date
            with open(file_sb, 'w') as fp:
                json.dump(status, fp, indent=4)
        return  status, url_flag
    else:
        try:
            if not path.exists(directory):
                makedirs(directory)
        except:
            return

# Download files - data chunking option, uses content disposition   
def download_file(url, fpath, altname, chunk):                   
    try:
        response, err = url_request(url,'file')                 
    except:
        sys.stdout.write("\n\tRequest error - url: %s" % url)
        download_file
        return 'Request error - url'
    if response.status_code == 200: 
        try:
            d = response.headers['content-disposition']
        except:
            d = []
            sys.stdout.write("\n\tGeneric filename using disposition:\n\t %s" % url)
        if d != []:
            locf = ''.join(re.findall("filename=(.+)", d))
        else:
            locf = altname 
        path = "{}/{}".format(fpath, locf)
    
        # Stream file object, no data chunking
        if chunk == "OFF":
            with open(path, 'wb') as f:
                response.raw.decode_content = True
                copyfileobj(response.raw, f)
                sys.stdout.write("\n\tDownloaded %s " % locf)
                response.close()
                return path    
        # Chunk data - adjust iter_content size (bytes) ** -- not yet tested
        else:
            with open(path, 'wb') as f:
                for chunk in response.iter_content(1024):
                    f.write(chunk)
                sys.stdout.write("\n\tDownloaded %s " % locf)
                response.close()
                return path
    else:
        sys.stdout.write("\n\t%s returned request error status code: %s" % (altname, response.status_code)) 
        response.close()
        return
    sys.stdout.write("\nRequest error status code: %s" % r.status_code)
    
     
# Attempt retrieval of data files, metadata, and misc information
# check information existence, purge (i.e. update) data files if commanded
def data_proc(status, df_request, url_flag, purge_proc, purge_sb):
    
    # purge data files - reset download/upload process
    if purge_proc == True:
        sys.stdout.write("\n** Purge requested reset file/metadata information")
        for index, row in df_request.iterrows():
            status[2]['datasets (id)'][row['Dataset ID']]['download'] = 'incomplete'
            status[2]['datasets (id)'][row['Dataset ID']]['upload'] = 'incomplete'
            status[2]['datasets (id)'][row['Dataset ID']]['files'] = { k: "None" for k in (
                dataproc + metafiles + ['info_dict','info_request'])}
        with open(status[1]['local_directory'] + sb_json, 'w') as fp:
            json.dump(status, fp, indent=4)  
    elif purge_sb == 'reset':
        for index, row in df_request.iterrows():
            status[2]['datasets (id)'][row['Dataset ID']]['upload'] = 'incomplete'
        with open(status[1]['local_directory'] + sb_json, 'w') as fp:
            json.dump(status, fp, indent=4)  
            
    for index, row in df_request.iterrows():
        
        # ensure existence of working directory (temporary?) 
        fpath = "{}{}".format(tempdir, row["Dataset ID"])
        ensure_dir(fpath, '--', '--', '--')
        
        # if warranted create or modify dataset download record (ERDDAP dataset ID) 
        if row['Dataset ID'] not in status[2]['datasets (id)']:
            status[2]['datasets (id)'][row['Dataset ID']] = OrderedDict([('sb_id',"None"), ('dataset_url', "None"), 
                         ('download','incomplete'), ('upload','incomplete'), ('files', OrderedDict())])
            status[2]['datasets (id)'][row['Dataset ID']]['files'] = { k: "None" for k in (
                                 dataproc + metafiles + ['info_dict','info_request'])}  
  
        # find ERDDAP dataset type (table, grid, wms), assumes type entries are unique 
        url_base = ''.join([row["tabledap"], row["griddap"], row["wms"]])
        status[2]['datasets (id)'][row['Dataset ID']]['dataset_url'] = url_base + ".html"

        # retrieve requested record information 
        if status[2]['datasets (id)'][row['Dataset ID']]['download'] != 'incomplete':
            sys.stdout.write("\n** Dataset ** : %s\n\n  files downloaded - bypassing download:" % row['Dataset ID'])
        else:
            sys.stdout.write("\n** Processing Dataset ** : %s - %s\n\nProcessing datafiles:"
                % (row["Dataset ID"], row["Title"]))

            # retrieve requested ERDDAP datasets
            for d in dataproc:
                if status[2]['datasets (id)'][row['Dataset ID']]['files'][d] == "None":
                    url = ''.join("{}{}".format(url_base, d))
                    altname = ''.join("{}{}".format(row["Dataset ID"], d))
                    f = download_file(url,fpath, altname, "OFF") 
                    status[2]['datasets (id)'][row['Dataset ID']]['files'][d] = f
            sys.stdout.write("\n\tStep completed...\n")
            
            sys.stdout.write("\nProcessing metafiles:")
            for d in metafiles:
                if status[2]['datasets (id)'][row['Dataset ID']]['files'][d] == "None":
                    url = ''.join("{}{}".format(url_base, d))
                    altname = ''.join("{}{}".format(row["Dataset ID"], d)) 
                    f = download_file(url,fpath, altname, "OFF") 
                    status[2]['datasets (id)'][row['Dataset ID']]['files'][d] = f
            sys.stdout.write("\n\tStep completed...\n")
            
            with open(status[1]['local_directory'] + sb_json, 'w') as fp:
                json.dump(status, fp, indent=4)
                
            # process data table information 
            sys.stdout.write("\nProcessing table information:")

            # info dictionary 
            info_request = {}
            info_dict = {'summary_info': {},'data_info' : {} }
            
            # retrieve other ERDDAP information as dictionary (info_request, RSS, or basic xml, etc.)
            if status[2]['datasets (id)'][row['Dataset ID']]['files']['info_request'] == "None":
                for d in table_info:
                    entry = row[d]
                    if "http" in entry and d != 'Summary': # search additional link to info
                        ext = entry.rsplit(".",1)[1]
                        sys.stdout.write("\n\tRetrieving additional url to request: %s " % d)
                        altname = ''.join("{}{}".format(row["Dataset ID"], d))
                        if d == "Info":
                            info_request, err = url_request(entry,'json')
                            if err != 'error':
                                f = "{}{}".format(fpath,"/info_request.json")
                                with open(f, 'w') as fp:
                                    json.dump(info_request, fp, indent=4)
                                status[2]['datasets (id)'][row['Dataset ID']]['files']['info_request'] = f
                        elif d == 'RSS': # request, parse rss file
                            response, err = url_request(entry,'file')  
                            if response.status_code == 200:
                                response.raw.decode_content = True
                                tree = etree.parse(response.raw)
                                root = tree.getroot()
                                label = root.tag.rsplit("}",1)[1]
                                ns = {label: root.nsmap[None]}
                                f = "{}{}{}{}".format("//",label,":", 'item/*')
                                modinfo = tree.xpath(f, namespaces=ns)
                                info_dict['summary_info']['rss'] = {}
                                for r in modinfo:
                                    info_dict['summary_info']['rss'][r.tag.rsplit("}",1)[1]] = r.text 
                                f= '//rss:pubDate'
                                modinfo = tree.xpath(f, namespaces=ns)
                                for r in modinfo:
                                    info_dict['summary_info']['rss']['pubDate'] = r.text
                            response.close()
                        elif ext == "xml":  # request, convert xml request to dictionary
                            response, err = url_request(entry,'file')  
                            if response.status_code == 200:
                                response.raw.decode_content = True
                                tree = etree.parse(response.raw)
                                xml_string = etree.tostring(tree)
                                info_dict['data_info'][d] = xml_to_dict(xml_string)
                            response.close()
                        else:
                            sys.stdout.write('file type: %s is not detailed.'
                                             '\nBulk printing response as string' % ext ) 
                            response, err = url_request(entry,'file') 
                            if response.status_code == 200:
                                info_dict['data_info'][d] = response.text
                            response.close()
                    else:
                        info_dict['summary_info'][d] = entry # copy text block from data table

                    status[2]['datasets (id)'][row['Dataset ID']]['download'] = 'attempted'
            
                # update file information
                f = "{}{}".format(fpath, "/info_dict.json")
                with open(f, 'w') as fp:
                    json.dump(info_dict, fp, indent=4)
                status[2]['datasets (id)'][row['Dataset ID']]['files']['info_dict'] = f
            
            # check if downloads complete
            qc_files = True
            bstat = status[2]['datasets (id)'][row['Dataset ID']]['files']
            for key, value in bstat.iteritems():
                if value == "None":
                    qc_files = False
            if qc_files == True:     
                status[2]['datasets (id)'][row['Dataset ID']]['download'] = "YES"
            else:
                status[2]['datasets (id)'][row['Dataset ID']]['download'] = 'request/processing errors' 
            with open(status[1]['local_directory'] + sb_json, 'w') as fp:
                json.dump(status, fp, indent=4)
                
        sys.stdout.write("\n\tStep completed...\n")         
        
        # work with only one dataset = for testing purposes - comment out break otherwise      
        # break
    return

# Search ERDDAP datasets using a simple interface (otherwise get adv url and bypass)  - incomplete ** 
def search_url(df, bulk_opts_dict, retries=5, complaint='Invalid entries - stopped'):
    
    while True:
        
        print("Request ERDDAP datasets using a retrieval option <letter>: \n\t%s\n\t%s" 
                  % ("(B) Bulk type constraint, or", "(S) Search (categorical, keyword(s), phrase)"))
        entry = (raw_input()).upper()
        
        if entry == "B":  # bulk file read 
            while True:
                sys.stdout.write("\nRequest all: (D) datasets, (T) Tables, or (G) Grids\n")
                bulk = (raw_input()).upper()
                if bulk in bulk_opts_dict:
                    link = ''.join(bulk_opts_dict[bulk][2]).replace(".json",".html")
                    print "\nBulk Request %s\nURL: %s" % ( bulk_opts_dict[bulk][1], link )
                    response, err = url_request(bulk_opts_dict[bulk][2],'json')
                    df_query = pd.DataFrame(response['table']['rows'], columns=response['table']['columnNames'])
                    get_stats(df_query)
                    return df_query, link, bulk_opts_dict[bulk][2] 
                else:
                    print ("\nInvalid command - enter an indicated letter option (*)") 
        
        elif entry == "S":  # Search by category, search word(s), and/or protocol (data type)
            sys.stdout.write("\n*** Note: custom search methods are loosely fitted - in progress ***")
            response, err = url_request(bulk_opts_dict["C"][2],'json')
            df_cquery = pd.DataFrame(response['table']['rows'], columns=response['table']['columnNames'])
            df_cquery.columns.name = "Search Index"
            display(df_cquery)
            
            # Category search
            sys.stdout.write("Select search index for category [left column] (optional: # is not index --> skip):\n")
            try:
                cindx = int(raw_input())
            except:
                cindx = [] 
            squery_entry = ""
            dfl = list(df_cquery.index.values)
            if cindx in dfl:
                url = ''.join(df_cquery.iloc[[cindx]]["URL"])
                response, err = url_request(url,'json')
                df_squery = pd.DataFrame(response['table']['rows'], columns=response['table']['columnNames'])
                df_squery.columns.name = "Search Index"
                display(df_squery)
                sindx = raw_input("Select search index for subcategory [left column] (optional: # is not index --> skip):\n")
                try:
                    sindx = int(sindx)
                except:
                    sindx = []
                dfl = list(df_squery.index.values)
                if sindx in dfl:
                    squery_entry = df_squery.get_value(sindx,"Category", takeable=False)
                    url = ''.join(df_squery.iloc[[sindx]]["URL"])
                    response, err = url_request(url,'json')
                    df_query = pd.DataFrame(response['table']['rows'], columns=response['table']['columnNames'])  
                else:
                    sys.stdout.write("\n\tEntered search index was not in list, skipped subcategory search.\n\n")       
            else:
                sys.stdout.write("\n\tEntered search index was not in list, skipped category refinement.\n\n")   

            # Word or phrase search 
            sys.stdout.write("Optional keyword or phrase search\nEnter space-delimited search word(s) or qouted phrase (blank --> skip):\n")
            search = raw_input()
            if search == '': 
                sys.stdout.write("Search input was blank, skipped word search")  
            
            # Protocol (data type) search constraint - currently does not examine subsets
            sys.stdout.write("\nDefine allowed protocol (data type): \n\t%s\n\t%s\n\t%s\n" 
                  % ("(A) All data types,", "(G) Griddap, or", "(T) Tabledap, or (W) Wms (default --> All types)"))
            protocol = (raw_input()).upper()
            if protocol not in ["A", "G", "T", "W"]: protocol = "A"
            
            # Gather pertinent info, build custom url for http services
            comb = ''.join([search, protocol, squery_entry])
            if comb != "A":
                gen_url_html, gen_url_json = build_url(search, protocol, df_cquery, cindx, squery_entry)   
                sys.stdout.write("\n'Advanced' search url:\n%s\n" % gen_url_html)
                try:
                    url_request(gen_url_json,'json')
                except:
                    sys.stdout.write("Identified exception during url request")
                    return
                df_request = pd.DataFrame(response['table']['rows'], columns=response['table']['columnNames'])
                get_stats(df_request)        
                return df_request, gen_url_html, gen_url_json
            else:
                sys.stdout.write("\nNo search constraints were detected - retry\n\n")  
        else:     
            print ("\nInvalid command - enter a letter option (*)\n") 

# set permissions
def set_permissions(item_id, acls):
    sb_base_url = "https://www.sciencebase.gov/catalog/item/" + item_id + "/permissions/"
    return sb._get_json(sb._session.put( sb_base_url, data=json.dumps(acls)))

# adjust read/write (task) privleges 
def set_acls(acls, names, task):
    if 'inheritsFromId' in acls[task]:
        del acls[task]['inheritsFromId']
    acls[task]['inherited'] = False
    new_acls = []
    for p in names:
        new_acls.append(p)
    acls[task]['acl'] = new_acls
    return acls

# reformat date entries
def date_reform(date_input):
    date_rec = str(parser.parse(date_input, ignoretz=True)).split(" ",1)[0]
    if date_rec == datetime.date.today():
        date_rec = "{}{}".format(date_input,'_recheck') 
    return date_rec

# pull info from ERDDAP info table as pandas dataframe (adhoc corrections for now, work in progress)
def populate_sbase(attribute , rmv, info_frame):
    indx = info_frame[info_frame['Attribute Name'] == attribute].index.tolist()[0]
    entry = info_frame.get_value(indx,"Value", takeable=False)
    if rmv == 'yes':
        if isinstance(entry, basestring):
            entry = re.sub(r'\,(?! )', ', ', re.sub(r' +', ' ', entry)) # ensure space after commas
#            entry = re.sub(r'\.(?! )', '. ', re.sub(r' +', ' ', entry)) # ensure space after period
            entry = entry.rstrip() # remove carriage returns /n  
            entry = entry.replace('?s',"'s") # remove erroneous "?" unicode trans errors
    return entry

# Create and update SB records, modify as needed
def SciBase_item(status, nf_dict, purge_sb):

    # login to Sciencebase
    sys.stdout.write("{}{}".format('\n** Processing ScienceBase items **','\n\n\tLogin to ScienceBase - '))
    sb = pysb.SbSession()
    sb.loginc(str(login_name))
    time.sleep(5)

    # iterate over list of datasets in search record
    for dataset in status[2]['datasets (id)']:

        # individual dataset download information
        data = status[2]['datasets (id)'][dataset]
        
        if data["download"] == 'YES':

            # Open ERDDAP dataset dictionary files (info_request, info_dict: *.json format)
            with open(data['files']['info_request']) as fp:    
                info_request = json.load(fp)
            info_frame = pd.DataFrame(info_request['table']['rows'], columns=info_request['table']['columnNames'])
            with open(status[2]['datasets (id)'][dataset]['files']['info_dict']) as fp:    
                info_dict = json.load(fp)

            sys.stdout.write("\nProcessing dataset: %s into ScienceBase\n" % dataset)

            # check SB item existence (visibility)
            chk = True
            if data["sb_id"] != "None":
                try:
                    SB_rec = sb.get_item(data["sb_id"])
                except:
                    sys.stdout.write("\n{}{}{}".format('Warning: SB item ', data["sb_id"],' was NOT located - creating new item'))
                    chk = False
                else:
                    sys.stdout.write("\n\t{}{}{}".format('SB item: ', data["sb_id"],' was located in ScienceBase'))
            else:
                chk = False
            
            # create new sciencebase item, if warranted
            if chk == False:
                new_item_info = {'title': populate_sbase("title", 'yes', info_frame),
                    'parentId': sb.get_my_items_id(),
                    'provenance': {'annotation': nf_dict['prov_state']}}
                SB_rec = sb.create_item(new_item_info)
                data["sb_id"] = SB_rec['id']
                status[2]['datasets (id)'][dataset]["sb_id"] = SB_rec['id']
                sys.stdout.write("\n\t{}{}".format('Creating new ScienceBase item:', data["sb_id"]))
                with open(tempdir + sb_json,'w') as fp:    
                    json.dump(status, fp, indent=4)
            
            # check SB item permissions, if no permissions create new item
            if u'USER:' + login_name in SB_rec['permissions'][u'write'][u'acl']:
                sys.stdout.write("\n\t** Verified ** user has write privileges for item")
            else:
                sys.stdout.write("\n *****  WARNING  ***** : user without write privileges - creating new item")
                data["sb_id"] = "None"
                new_item_info = {'title': populate_sbase("title",'yes', info_frame),
                    'parentId': sb.get_my_items_id(),
                    'provenance': {'annotation': nf_dict['prov_state']}}
                SB_rec = sb.create_item(new_item_info)

            sys.stdout.write("\n\tCheck files, upload/update files and metadata")
            
            # gather list of files currently in item's 'files' and 'facets'
            uploaded_files = []
            if 'files' in SB_rec:
                uploaded_files = [f_ex['name'] for f_ex in SB_rec['files']]  
            if 'facets' in SB_rec:
                for facet in SB_rec['facets']:
                    if 'files' in facet:
                        for f in facet['files']:
                            uploaded_files.append(f['name'])
                        
            # upload only new files or upload/update all files
            if purge_sb != True:
                for f in (metafiles + dataproc):
                    fn = data['files'][f].rsplit( "/", 1 )[ 1 ] 
                    if fn not in uploaded_files:
                        sys.stdout.write("\n\t{}".format("uploading new file to item"))
                        sb.uploadFileToItem(SB_rec, data['files'][f])
            elif data["upload"] != "YES":
                for f in (metafiles + dataproc):
                    fn = data['files'][f].rsplit( "/", 1 )[ 1 ] 
                    if fn not in uploaded_files:
                        sys.stdout.write("\n\t{}".format("uploading new file to item"))
                        sb.uploadFileToItem(SB_rec, data['files'][f])
                    else:
                        sys.stdout.write("\n\t{}".format("updating existing file in item"))
                        sb.replace_file(data['files'][f], SB_rec)
                        SB_rec = sb.get_item(data["sb_id"])
                   

            sys.stdout.write("\n\tModifying ScienceBase record information")        
                    
            # retrieve latest SB item after file uploads/updates 
            SB_rec = sb.get_item(data["sb_id"])
                        
            # record SB item create date 
            entry_create = date_reform(populate_sbase("date_created",'no', info_frame))

            # data record start date
            entry_start = date_reform(populate_sbase("time_coverage_start",'no', info_frame))

            # data record end date
            entry_end = date_reform(populate_sbase("time_coverage_end",'no', info_frame))

            # files retrieval (download) date using OS call for metadata file
            date_input = path.getmtime(data['files'][metafiles[0]])
            Int2date = datetime.datetime.fromtimestamp(date_input)
            retrieve = datetime.datetime.strftime(Int2date, '%Y-%m-%d')
            
            # load data record dates  
            rep = status[0]['general information'][2]['file modified (date-time)']
            if rep == 'none' or 'dates' not in SB_rec:                   
                SB_rec['dates']  = [{u"type":u"Item submission",
                    u"dateString":str(datetime.date.today()), u"label": u"Item submission"}]       
            else:
                SB_rec[u'dates'] = [SB_rec[u'dates'][0]]
                
            SB_rec['dates'].extend([
                {u"type":u"Data record (start)",
                u"dateString":entry_start,
                u"label":u"Data record (start)"},
                {u"type":u"Data record (end)",
                u"dateString":entry_end,
                u"label":u"Data record (end)"},
                {u"type":u"Retrieved from source",
                u"dateString":str(retrieve),
                u"label":u"Retrieved from source"}])
                                 
            # update provenance
            SB_rec['provenance']['annotation'] = nf_dict['prov_state'] 

            # update citation information, if available
            try:
                SB_rec["citation"] = "{}{}".format("{}{}".format("Data citation - ", populate_sbase("bibliographicCitation",'yes', info_frame)),
                    "{}{}".format(". Study citation - ", populate_sbase("associatedReferences",'yes', info_frame)))
            except:
                pass

            # update title
            SB_rec["title"] = populate_sbase("title",'yes', info_frame)

            # update additional information
            entry_license = populate_sbase("license",'yes', info_frame)

            # update summary + add license information
            SB_rec["body"] = "{}{}{}".format(populate_sbase("summary",'yes', info_frame),'&nbsp; &nbsp;\n<br> \n<br>',entry_license)

            # provide OPeNDAP weblinks
            webrec = []
            for w in webnames:
                d_url = status[2]['datasets (id)'][dataset]["dataset_url"].replace('.html', w)
                rec = {
                    u'hidden': False,
                    u'rel': u'related',
                    u'title': file_dict[w],
                    u'type': u'OPeNDAP weblinks - data provenance trace',
                    u'uri': d_url} 
                webrec.append(rec)
            SB_rec['webLinks'] = webrec
            
            # edit and remove redundancy in contact names
            store_name = []
            if SB_rec['contacts']:
                for s in SB_rec['contacts']:
                    if s['name'] != '' and '?' not in s['name']:
                        store_name.append(s)
            if nf_dict['BCB_contact']['name'] not in store_name:
                store_name.append(nf_dict['BCB_contact'])
            if nf_dict['processor']['name'] not in store_name:
                store_name.append(nf_dict['processor'])
            if nf_dict['publisher']['name'] not in store_name:
                store_name.append(nf_dict['publisher'] )
            seen = set()            
            new_store_name = []
            for s in store_name:
                if s['name']  not in seen:
                    seen.add(s['name'])
                    new_store_name.append(s) 
            SB_rec['contacts'] = new_store_name

            # set permissions 
            acls = SB_rec['permissions']
            set_acls(acls, nf_dict['read_names'],'read')
            set_acls(acls, nf_dict['write_names'],'write')
            # set_permissions(SB_rec['id'], acls) - currently not used (sub 2 lines below)
            sb_base_url = "https://www.sciencebase.gov/catalog/item/" + SB_rec['id'] + "/permissions/"
            sb._get_json(sb._session.put( sb_base_url, data=json.dumps(acls)))

            # remove redundancy in tag names
            seen = set()
            new_tags = []
            for d in SB_rec[u'tags']:
                if d['name']  not in seen:
                    seen.add(d['name'])
                    new_tags.append(d) 
            SB_rec[u'tags'] = new_tags

            # save updates, file
            sys.stdout.write("\n\tScienceBase processing completed\n")
            sb.updateSbItem(SB_rec)
            status[2]['datasets (id)'][dataset]["upload"] = "YES"
            with open(tempdir + sb_json,'w') as fp:    
                json.dump(status, fp, indent=4)
                
            # work with only one dataset = for testing purposes - comment out (break below) otherwise       
            # break 
        else:
            sys.stdout.write("\nInfo for dataset %s - incomplete not processed\n" % dataset)  
    
# *****************************************************

#   code below started, greater work in progress
                         
#   Generate input/log file of manual search, for re-runs and reproduction - NOT yet implemented **
#   Add Header username, computer info, datestamp add logging capabilities
#   class Logger(object):
#      def __init__(self, filename="Default.log"):
#          self.terminal = sys.stdout
#          self.log = open(filename, "a")
#
#      def write(self, message):
#          self.terminal.write(message)
#          self.log.write(message)
#   sys.stdout = Logger("yourlogfilename.txt")

# Request upper-level info on available datasets incomplete **
# def reqtable(df, name):
#    global df_result
#    url = ''.join(df.loc[df['Resource'] == name]['URL'])
#    response, err = url_request(url,'json')
#    df_result = pd.DataFrame(response['table']['rows'], columns=response['table']['columnNames']) 
#    df_result.drop(df_result[df_result['Dataset ID'] == "allDatasets"].index)
#    get_stats(df_result)      
#    return
    
# Evaluate table of requested datasets, flag or exclude datasets as warranted.  incomplete - not started **
# def evaltable(df_result):
#    print("... incomplete, may perform assessments in the future")
#    return

# Evaluate individual datasets, flag or exclude as warranted, incomplete - not started **
# def evaldataset(df_result):
#    print("Functionality may include QA/QC metrics, taxonomic checks, and other criteria")
#    return

print('definitions loaded')

In [None]:
# !/usr/bin/python

# *******************************************************
#                   Main Program: 
# *******************************************************

def main(): 
    
    tstamp = []
    tasklist = []
    tasklist.append('Main program processing')
    tstamp.append(timer())
    
    sys.stdout.write('Search ERDDAP data repository, develop search criteria\n') 
    
    # open name/permissions information (generated a priori)
    with open(tempdir + "name_file_dict.json") as fp:    
        nf_dict = json.load(fp)
    
    # check existence of download status file, adv. search url
    SB_status, url_chk = ensure_dir(tempdir, url_flag, nf_dict[ u'general information'], 'initialize') 
        
    if url_chk is not True:
        
        # ERDDAP general data resources 
        subdir   = '/index'  
        url_o = "".join((baseurl, subdir, infotype))
        sys.stdout.write("\nHome URL: %s \n\n" % (url_o.rsplit( ".", 1 )[ 0 ] + linktype )) 
        response, err = url_request(url_o,'json')
        
        if dataform == 'dframe':
            
    #      pandas dataframe visual specs
            pd.set_option('display.notebook_repr_html', True)
            pd.set_option('display.max_colwidth', -1)
            pd.set_option('display.max_rows', 500)
            colwidth = 100 # maximum column width in final request table
            maxrows = 200 # maximum datasets to show in final request table

    #      Main navigation links of the ERDDAP server
            df = pd.DataFrame(response['table']['rows'], columns=response['table']['columnNames'])
            df['URL'] = df['URL'].str.replace('.json', linktype)
            sys.stdout.write(df.to_string(index=False, justify='left') + '\n')

    #      All datasets served by the ERDDAP server
            url = ''.join(df.loc[df['Resource'] == 'info']['URL'])
            sys.stdout.write("\nLink to all datasets with metadata: %s\n" % url) 

    #      All datasets served by the ERDDAP server
            url = "".join((baseurl, "/tabledap/allDatasets", linktype))
            sys.stdout.write("\nLink to all tabes, includes searchable information: %s\n\n" % url) 

    #      Adjust links,settings
            pd.set_option('display.max_colwidth',250)
            df['URL'] = df['URL'].str.replace(linktype,'.json')

    #      Set up info for search types (option, task, url)
            bulk_opts_dict = bulkoption(df)

        else: # full json method not yet developed, incomplete **
            sys.stdout.write("{}{}".format('Invalid entry, defaulting to .json format', '\n')) 
            dataform == 'json'
            pprint.pprint(response.keys())
            pp = pprint.PrettyPrinter(indent=2)
            sys.exit()
    #       add other functions using json

        # develop request table
        tstamp.append(timer())
        tasklist.append('Search and request datasets from ERDDAP')
        df_request, gen_url_html, gen_url_json = search_url(df, bulk_opts_dict)
        
        # store ERDDAP advanced search url and save download status file
        SB_status[0]['general information'][4]['data search url'] =  gen_url_html
        SB_status[0]['general information'][5]['data search json'] =  gen_url_json
        
    else:
        sys.stdout.write("\n{}{}{}\n\n".format('\n*** Automated search using information file: ', sb_json,'  ***'))                
        response, err  = url_request(SB_status[0]['general information'][5]['data search json'],'json')
        if err == 'failed': 
            sys.stdout.write("Identified problem using previous search url (*.json)\nExiting program")
            sys.exit()
        else:
            df_request = pd.DataFrame(response['table']['rows'], columns=response['table']['columnNames'])
    
    # process data - incomplete, work in progress
    tstamp.append(timer())
    tasklist.append('Process datasets and information')
    data_proc(SB_status, df_request, url_flag, purge_proc, purge_sb)
    
    # create, set up, modify Sciencebase records for data search, work in progress
    tstamp.append(timer())
    tasklist.append('Develop and/or modify ScienceBase records')
    sys.stdout.write("{}{}".format('\n** Processing ScienceBase items **','\n\n\tLogin to ScienceBase - '))
    if purge_sb != False:
        SciBase_item(SB_status, nf_dict, True)
    else:
        SciBase_item(SB_status, nf_dict, False)
    
    # final save status file
    date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
    SB_status[0]['general information'][2]['file modified (date-time)'] = date
    with open(SB_status[1]['local_directory'] + sb_json, 'w') as fp:
        json.dump(SB_status, fp, indent=4)
    
    tstamp.append(timer())
    for i in range(len(tstamp)-1):
        sys.stdout.write('\nTask '+ str(i+1) + ': ' + tasklist[i] + ' | Runtime [secs]: '+ str(round(tstamp[i+1]-tstamp[i],3)))
    
    sys.stdout.write("\n\nProgram has finished")

    
# ************************************************************************* 
    
if __name__ == '__main__':
    
    
# ************************************************************************

#  *** General inputs *** (place in input file at some point)

#  username to login to ScienceBase 
    login_name = 'twellman@usgs.gov'

#  Set local (working) directory + folders, eventually use full memory stream (note: '/' at end)
    tempdir = "/Users/twellman/Documents/BCB_data_projects/OBIS_usa_database/erddap_MBON_test/"
    
#  Name of download status file (json), existing in local directory or to set new
    sb_json = "file_sb.json"
                         
#  Base ERDDAP url - to "/erddap" level
    baseurl  = 'http://gcoos4.tamu.edu:8080/erddap'
    
#  Content retrieval requests, includes datasets, metadata, and other information 
    dataproc = [".nc",".ncHeader", ".csv"]    
    metafiles =  [".iso19115"]
    table_info =   ["Dataset ID", "Title", "Institution" ,"Summary", "Background Info","Info",'RSS']
    infoformat = ".json"
    
#  Sciencebase OpenDap weblinks to show in ScienceBase item
    webnames = ['.html','.csv','.nc','.ncHeader','.json','.fgdc','.geoJson','.iso19115']
                         
#  File format flags - hardwired, work in progress
    infotype = '.json'   # format to read dataset information 
    linktype = '.html'   # format to show data resources
    dataform = 'dframe'  # format to display data (Pandas dataframe) 

#  whether to use existing search (advanced search url) in "sb_json" file (above), 
#  if false/absent goes through simplfied query process and internally generates 
#  new adv url, can also manually create the general information section in "sb_json".
    url_flag = True
    
# whether to overwrite content used in overall processing (proc) or ScienceBase (sb)
# only ONE run for purge_proc = True, then set to False, else will always reset
# keep purge_sb = True for purging ScienceBase items until complete
# purge_sb can also be set to 'reset' to reset upload status to ScienceBase separately
    purge_proc = False
    purge_sb = False
    
# ************************************************************************
    
# call to main                                                 
    main()


In [None]:
## 

***************************************************************************************************************
******   " The section below has ScienceBase code snippets to create, modify, retrieve information "     ******
***************************************************************************************************************




In [None]:
# login to ScienceBase using pysb. retrieve item

sb = pysb.SbSession()
sb.loginc(str(login_name))
time.sleep(5)
test = sb.get_item('587f8830e4b085de6c11f242')
test

In [None]:
# simple print out a list of completed file downloads
# from sb_json file. At present tweak adhoc for print out
# includes hyperlinks (can cut and paste into doc)

with open(tempdir + sb_json) as fp:    
        SB_status = json.load(fp)
    
dataset = 0
for d in SB_status[2]["datasets (id)"]:
    if SB_status[2]["datasets (id)"][d]["sb_id"] != "None":
        dataset = dataset  + 1
        print("Dataset number: ", dataset)
        repl = SB_status[2]["datasets (id)"][d]['dataset_url'] 
        SB_status[2]['datasets (id)'][d]['dataset_url'] = repl
        print "Dataset name:", d
        print "ScienceBase ID: ", SB_status[2]["datasets (id)"][d]["sb_id"]
        print "ScienceBase url:", "https://www.sciencebase.gov/catalog/item/" + SB_status[2]["datasets (id)"][d]["sb_id"]
        print "dataset_url: ", repl
        print

In [None]:
# view pandas dataframe of table of datasets requested

pd.set_option('display.max_colwidth', -1)
info_frame = pd.DataFrame(info_request['table']['rows'], columns=info_request['table']['columnNames'])
    
display(info_frame) 


In [None]:


# *******************************************************
#                   Name Dictionary: 
# *******************************************************

# Create a label dictionary to populate sciencebase items and header of download status file
# modify as needed, specify path to save (tempdir) as working directory

import json
import pprint

name_file_dict = {}
name_file_dict[u'prov_state'] = 'GCOOS MBON data retrieve'
name_file_dict[u'write_names'] = [u'USER:twellman@usgs.gov', u"USER:albenson@usgs.gov", u"USER:sbristol@usgs.gov", u"USER:saulenbach@usgs.gov"]
name_file_dict[u'read_names'] = [u'USER:twellman@usgs.gov', u"USER:albenson@usgs.gov", u"USER:sbristol@usgs.gov", u"USER:saulenbach@usgs.gov"]
name_file_dict[u'publisher'] = {u'contactType': u'organization',
       u'name': u'Marine Biodiversity Oservation Network',
       u'onlineResource': u'http://oceanservice.noaa.gov/news/apr16/mbon.html',
       u'organization': {u'displayText': u'Marine Biodiversity Oservation Network'},
       u'primaryLocation': {},
       u'type': u'Publisher'}
name_file_dict[u'processor'] = {u'contactType': u'organization',
       u'name': u'GCOOS: Gulf of Mexico Coastal Ocean Observing System',
       u'onlineResource': u'http://gcoos.tamu.edu',
       u'organization': {u'displayText': u'GCOOS: Gulf of Mexico Coastal Ocean Observing System'},
       u'primaryLocation': {},
       u'type': u'Processor'}
name_file_dict[u'BCB_contact'] = {u'active': True,
      u'contactType': u'person',
      u'email': u'albenson@usgs.gov',
      u'firstName': u'Abigail',
      u'jobTitle': u'Biologist',
      u'lastName': u'Benson',
      u'middleName': u'L',
      u'name': u'Abigail L Benson',
      u'oldPartyId': 23179,
      u'organization': {u'displayText': u'Scientific Data Integration and Visualization'},
      u'primaryLocation': {u'building': u'DFC Bldg 810',
      u'buildingCode': u'KBT',
      u'faxPhone': u'3032024229',
      u'mailAddress': {u'city': u'Denver',
      u'country': u'USA',
      u'line1': u'Box 25046, Denver Federal Center, Mail Stop 306',
      u'mailStopCode': u'306',
      u'state': u'CO',
      u'zip': u'80225-0046'},
      u'name': u'Abigial L Benson/GIO/USGS/DOI - Primary Location',
      u'streetAddress': {u'city': u'Lakewood',
      u'country': u'US',
      u'line1': u'West 6th Ave. & Kipling St., DFC Bldg. 810',
      u'state': u'CO',
      u'zip': u'80225-0046'}},
      u'type': u'Point of Contact'}
name_file_dict[u'general information'] = [
                {u'purpose' : u'Data Distillery development: MBON (GCOOS) data downloads'},
                {u'file created (date-time)' : 'none'},
                {u'file modified (date-time)' : 'none'},
                {u'data base url': 'none'},
                {u'data search url': ''},
                {u'data search json': ''},
                {u'contacts': u'Abigail Benson, Tristan P. Wellman, Steve Aulenbach, Sky Bristol'},
                {u'email': u'albenson@usgs.gov, twellman@usgs.gov, saulenbach@usgs.gov, sbristol@usgs.gov'},
                {u'organization':u'U.S. Geological Survey, Core Science Analytics, and Synthesis (B.C.B.)'},
                {u'address': u'Denver Federal Center, Building 810, Lakewood, Colorado'}]  

#  Save name file to local directory - incomplete

tempdir = "/Users/twellman/Documents/BCB_data_projects/OBIS_usa_database/ERDDAP_MBON_update_test/"

with open(tempdir + "name_file_dict.json", 'w') as fp:
    json.dump(name_file_dict, fp, indent=4)
pprint.pprint(name_file_dict)

#with open(tempdir + "name_file_dict.json") as fp:    
#    nf_dict = json.load(fp)
#pprint.pprint(nf_dict)

In [None]:
# fiddling with Asynchronous HTTP client

from tornado import ioloop, httpclient

i = 0

def handle_request(response):
    print(response.code)
    global i
    i -= 1
    if i == 0:
        ioloop.IOLoop.instance().stop()

http_client = httpclient.AsyncHTTPClient()
for url in open('urls.txt'):
    i += 1
    http_client.fetch(url.strip(), handle_request, method='HEAD')
ioloop.IOLoop.instance().start()

In [None]:
# Generate uuid and time generation
import uuid
import timeit
timer=timeit.Timer('uuid.uuid1()','import uuid')
timer.repeat(3, 300000)

In [None]:
# Curl process 

import subprocess
params = "name="+name+"&tags="+tags+"&description="+description+"&status="+status
proc = subprocess.Popen(['curl', '-X', 'POST', '-d', 
                              params, url], stdout=subprocess.PIPE)
(out, err) = proc.communicate()
print out,err

In [None]:
%%javascript 
var kernel = Jupyter.notebook.kernel; 
var command = ["notebookPath = ",
               "'", window.document.body.dataset.notebookPath, "'" ].join('')
//alert(command)
kernel.execute(command)
var command = ["notebookName = ",
               "'", window.document.body.dataset.notebookName, "'" ].join('')
//alert(command)
kernel.execute(command)

In [None]:
import os
p = str(os.getcwd()) 
print p + "/" + notebookName