<html>
    <div style = "display: inline-block; width=150px; height=75px;">
        <h2>Validating Data Using nda-tools</h2>
        <ul><li>nda-tools allow users to work with webservices directly in their pipelines</li>
           <li>this will ensure data generated complies with NIMH standards at an earlier stage</li></ul>
    </div>
    
</html>

In [None]:
from NDATools import Configuration, Validation
from ipywidgets import widgets
from IPython.display import display
import os

# create a configuration object using the settings.cfg file included in NDATools package
config = Configuration.ClientConfiguration(os.path.join(os.path.expanduser('~'), '.NDATools/settings.cfg'))

# create a validation object with a list of files for validation and the config object
file_list = input('Enter a list of files for validation:')
file_list = file_list.split(' ')
validation = Validation.Validation(file_list, config=config)#, hide_progress=False)

#if your data has manifest files, upload manifest file locations
manifest_path = input('Enter location of where manifest file(s) is located:')
manifest_path = manifest_path.split(' ')
validation.manifest_path = manifest_path

#call the validation function
button = widgets.Button(description='Validate!')
display(button)

def on_button_clicked(b):
    validation.validate()
    print("Validating data")

button.on_click(on_button_clicked)

In [None]:
from NDATools import BuildPackage
uuid = validation.uuid
associated_files = validation.associated_files


package = BuildPackage.SubmissionPackage(uuid, associated_files, 
                            collection=1860, #2839
                            title='nda-tools-test', 
                            description='testing NDATools',
                            alternate_location=None, 
                            config=config)
        

# this step is critical. The script needs to locate every associated file that needs to be uploaded. 
# the file can be saved locally, or in S3
if associated_files:
    config.source_bucket = None
    config.source_prefix = None
    directories= None
    package.file_search(directories, config.source_bucket, config.source_prefix, 
                        config.aws_access_key, config.aws_secret_key, retry_allowed=True)
    

print(package.full_file_path)

    

In [None]:
# call funtion to build package
button = widgets.Button(description='Build Package!')
display(button)

def on_button_clicked(b):
    package.build_package()
    print('Building package')
    print('\n\nPackage Information:')
    print('validation results: {}'.format(package.validation_results))
    print('submission_package_uuid: {}'.format(package.submission_package_uuid))
    print('created date: {}'.format(package.create_date))
    print('expiration date: {}'.format(package.expiration_date))

button.on_click(on_button_clicked)




In [None]:
from NDATools import Submission

package_id = package.submission_package_uuid
full_file_path = package.full_file_path
submission = Submission.Submission(package_id, full_file_path, config=config)


#call the submission function
button = widgets.Button(description='Submit package data!')
display(button)

def on_button_clicked(b):
    submission.submit()
    print('Submitting data.')
    if submission.submission_id:
        print('\nSubmission ID: {}'.format(str(submission.submission_id)))
    

button.on_click(on_button_clicked)




In [None]:
#call the submission function
button = widgets.Button(description='Upload associated files!')
display(button)

def on_button_clicked(b):
    if associated_files:
        print('Uploading files.')
        submission.submission_upload(hide_progress=False)
    if submission.status != 'Uploading':
        print('\nYou have successfully completed uploading files for submission {}!'.format(submission.submission_id))

button.on_click(on_button_clicked)
 
    

<html>
    <div style = "display: inline-block; width=150px; height=75px;">
        <h2>Downloading Data Using nda-tools</h2>
        <ul><li>As of now, a user must create a download package using the browser interface</li>
           <li>nda-tools allows users to directly download all files in the package</li>
           <li>It provides the ability to download a subset of the data ie. specific number of records of associated files from image03</li>
           <li>Accepts s3 links to download in various forms, explained below</ul>
    </div>
    
</html>

In [None]:
from NDATools import Configuration, Download
import os

# create a configuration object using the settings.cfg file included in NDATools package
config = Configuration.ClientConfiguration(os.path.join(os.path.expanduser('~'), '.NDATools/settings.cfg'))


# create a download object by passing in the directory where files will be saved
dir = os.path.join(os.path.expanduser('~'), 'AWS_downloads')
download = Download.Download(dir, config, verbose=True)


<html>
    <div style = "display: inline-block; width=150px; height=75px;">
        <h2>There are several ways to obtain S3 links:</h2>
        <ul><li>pass in the "text" parameter to indicate all your s3 links are in a .txt file. Then pass in the location of that text file</li>
           <li>pass in the "package" parameter, followed by the package ID.</li>
           <li>pass in "datastructure" to indicate all s3 links from the specified data structure should be downloaded, then enter the path to your data strucutre text file.</li>
           <li>lastly, pass in the "paths" parameter to indicate all s3 links are to be downloaded, and then enter the list of s3 links.</ul>
    </div>
    
</html>

In [None]:
from ipywidgets import widgets
from IPython.display import display

#114980

button = widgets.Button(description='Submit!')

def on_button_clicked(b):
    for v in tab.children:
        if v.value is not "":
            if "package" in v.placeholder:
                download.get_links('package',[v.value])
            if "text" in v.placeholder:
                download.get_links('text',[v.value])
            if "data" in v.placeholder:
                download.get_links('datastructure',[v.value])
            if "s3" in v.placeholder:
                download.get_links('path',[v.value])
        
tab_contents = ['package ID', 'text file', 'data structure file', 's3 links']
children = [widgets.Text(value = '', placeholder='Enter a {}'.format(name)) for name in tab_contents]
tab = widgets.Tab()
tab.children = children

i = 0
for t in tab_contents:
    tab.set_title(i, t)
    i += 1

button.on_click(on_button_clicked)
display(tab,button)



In [None]:
print(download.path_list)

In [None]:
#will generate temporary federated tokens to access NDA buckets in AWS
#NOTE: these credentials expire in 24 hours. Users will need to refresh tokens if download exceeds this time

button = widgets.Button(description='Get Tokens!')

def on_button_clicked(b):
    download.get_tokens()
    print('ACCESS KEY:', download.access_key,
      '\n\nSECRET KEY:', download.secret_key,
      '\n\nSESSION TOKEN:', download.session)

button.on_click(on_button_clicked)

display(button)

In [None]:
# start_workers function creates a fixed number of threads to begin downloading your s3 links 
#in parallel for effeciency

#resume and prev_directory indicate to the program if it should search previously downloaded files before initiating
#the download for each s3 link. 
#used if download is restarted

button = widgets.Button(description='Start Download!')

def on_button_clicked(b):
    download.start_workers(resume=False, prev_directory=None)
button.on_click(on_button_clicked)
display(button)
