# Solutions to Agave Files Exercises

To get started, we need to generate a set of Agave developer client keys (OAuth credentials). Generating OAuth clients uses HTTP Basic Authentication (https://tools.ietf.org/html/rfc2617) with your TACC username and password.

In [2]:
# import the requests library
import requests

# import getpass to prompt for a password
from getpass import getpass

In [3]:
# the base URL for interacting with the Agave API
base_url = 'https://api.tacc.utexas.edu'

In [4]:
# Set up your TACC credentials. Modify the username appropriately
username = 'jstubbs'
password = getpass(prompt='Hello {}. Please enter your TACC password: '.format(username))

Hello jstubbs. Please enter your TACC password: ········


In [5]:
# enter your client name, key and secret generated in the previous module here
client_name = 'cic_institute'
key = 'f_9vaEvm3oxCnNe9Z4VoxRXZGwca'
secret = 'MTXYtMiwOmxNI2mRLrwOwG_9Vpwa'

In [6]:
# create a fresh access and refresh token
data = {'username': username,
       'password': password,
       'grant_type': 'password',
       'scope': 'PRODUCTION'}
rsp = requests.post('{}/token'.format(base_url), data=data, auth=(key, secret))
rsp.status_code

200

In [7]:
access_token = rsp.json()['access_token']
refresh_token = rsp.json()['refresh_token']

In [8]:
# build the Authorization header in a headers dictionary
headers = {'Authorization': 'Bearer {}'.format(access_token)}

# id of the storage system for our class
system_id = 'cic.storage'

In [9]:
# use the files service to list the files in our home directory, which is given by our username.
rsp = requests.get('{}/files/v2/listings/system/{}/{}'.format(base_url, system_id, username), headers=headers)
rsp.status_code

200

In [10]:
rsp.json()['result']

[{'_links': {'history': {'href': 'https://api.tacc.utexas.edu/files/v2/history/system/cic.storage//home/jstubbs'},
   'metadata': {'href': 'https://api.tacc.utexas.edu/meta/v2/data?q=%7B%22associationIds%22%3A%227129190038640988647-242ac113-0001-002%22%7D'},
   'self': {'href': 'https://api.tacc.utexas.edu/files/v2/media/system/cic.storage//home/jstubbs'},
   'system': {'href': 'https://api.tacc.utexas.edu/systems/v2/cic.storage'}},
  'format': 'folder',
  'lastModified': '2017-07-20T18:28:02.000-05:00',
  'length': 29,
  'mimeType': 'text/directory',
  'name': '.',
  'path': '/home/jstubbs',
  'permissions': 'ALL',
  'system': 'cic.storage',
  'type': 'dir'},
 {'_links': {'self': {'href': 'https://api.tacc.utexas.edu/files/v2/media/system/cic.storage//home/jstubbs/foo'},
   'system': {'href': 'https://api.tacc.utexas.edu/systems/v2/cic.storage'}},
  'format': 'raw',
  'lastModified': '2017-07-20T13:28:30.000-05:00',
  'length': 5,
  'mimeType': 'application/octet-stream',
  'name': 'f

In [11]:
# create a directory called 'test123' inside our home directory. To do this, we make a PUT request 
# to the files service and we pass a specific payload
# note as well that we use the 'media' endpoint instead of the listings endpoint.
data = {'action': 'mkdir', 'path': 'test123'}
rsp = requests.put(url='{}/files/v2/media/system/{}/{}'.format(base_url, system_id, username), data=data, headers=headers)
rsp.status_code

201

In [12]:
# check the response
rsp.json()['result']

{'_links': {'history': {'href': 'https://api.tacc.utexas.edu/files/v2/history/system/cic.storage//home/jstubbs/test123'},
  'profile': {'href': 'https://api.tacc.utexas.edu/profiles/v2/jstubbs'},
  'self': {'href': 'https://api.tacc.utexas.edu/files/v2/media/system/cic.storage//home/jstubbs/test123'},
  'system': {'href': 'https://api.tacc.utexas.edu/systems/v2/cic.storage'}},
 'internalUsername': None,
 'lastModified': '2017-07-22T22:48:55.972-05:00',
 'name': 'test123',
 'nativeFormat': 'dir',
 'owner': 'jstubbs',
 'path': 'jstubbs/test123',
 'source': None,
 'status': 'TRANSFORMING_COMPLETED',
 'systemId': 'cic.storage',
 'uuid': '2733614603201605145-242ac113-0001-002'}

In [13]:
# now, let's list our home directory again and check that the directory is there
rsp = requests.get('{}/files/v2/listings/system/{}/{}'.format(base_url, system_id, username), headers=headers)
rsp.status_code

200

In [14]:
rsp.json()['result']

[{'_links': {'history': {'href': 'https://api.tacc.utexas.edu/files/v2/history/system/cic.storage//home/jstubbs'},
   'metadata': {'href': 'https://api.tacc.utexas.edu/meta/v2/data?q=%7B%22associationIds%22%3A%227129190038640988647-242ac113-0001-002%22%7D'},
   'self': {'href': 'https://api.tacc.utexas.edu/files/v2/media/system/cic.storage//home/jstubbs'},
   'system': {'href': 'https://api.tacc.utexas.edu/systems/v2/cic.storage'}},
  'format': 'folder',
  'lastModified': '2017-07-22T22:48:55.000-05:00',
  'length': 44,
  'mimeType': 'text/directory',
  'name': '.',
  'path': '/home/jstubbs',
  'permissions': 'ALL',
  'system': 'cic.storage',
  'type': 'dir'},
 {'_links': {'self': {'href': 'https://api.tacc.utexas.edu/files/v2/media/system/cic.storage//home/jstubbs/foo'},
   'system': {'href': 'https://api.tacc.utexas.edu/systems/v2/cic.storage'}},
  'format': 'raw',
  'lastModified': '2017-07-20T13:28:30.000-05:00',
  'length': 5,
  'mimeType': 'application/octet-stream',
  'name': 'f

In [15]:
# we can also list its contents directly by appending it to the path:
rsp = requests.get('{}/files/v2/listings/system/{}/{}/test123'.format(base_url, system_id, username), headers=headers)
rsp.json()['result']

[{'_links': {'history': {'href': 'https://api.tacc.utexas.edu/files/v2/history/system/cic.storage//home/jstubbs/test123'},
   'metadata': {'href': 'https://api.tacc.utexas.edu/meta/v2/data?q=%7B%22associationIds%22%3A%222733614603201605145-242ac113-0001-002%22%7D'},
   'self': {'href': 'https://api.tacc.utexas.edu/files/v2/media/system/cic.storage//home/jstubbs/test123'},
   'system': {'href': 'https://api.tacc.utexas.edu/systems/v2/cic.storage'}},
  'format': 'folder',
  'lastModified': '2017-07-22T22:48:55.000-05:00',
  'length': 6,
  'mimeType': 'text/directory',
  'name': '.',
  'path': '/home/jstubbs/test123',
  'permissions': 'ALL',
  'system': 'cic.storage',
  'type': 'dir'}]

In [16]:
# first, let's upload a file called foo.txt to our test directory. we'll create the file locally real quick
f= open("foo.txt","w+")
f.write("This is a test. Test 123")
f.close()

In [17]:
# check that our file is there:
! ls -l

total 116
-rw-r--r-- 1 root root  4564 Jul 22 23:48 agave_files.ipynb
-rw-r--r-- 1 root root  5871 Jul 22 19:35 Application_Programming_Interfaces_Intro.ipynb
-rw-r--r-- 1 root root    24 Jul 22 23:49 foo.txt
-rw-r--r-- 1 root root  9144 Jul 22 23:04 Intro_Agave_OAuth.ipynb
-rw-r--r-- 1 root root  6345 Jul 22 22:23 Intro_Authentication_in_HTTP.ipynb
-rw-r--r-- 1 root root 19769 Jul 22 23:48 Solutions_Agave_Files.ipynb
-rw-r--r-- 1 root root 10014 Jul 22 23:24 Solutions_Agave_OAuth.ipynb
-rw-r--r-- 1 root root 26140 Jul 22 21:38 Solutions_to_api_auth.ipynb
-rw-r--r-- 1 root root 14478 Jul 22 22:02 Solutions_to_api_into.ipynb


In [18]:
# now let's upload the file to the test directory:
rsp = requests.post('{}/files/v2/media/system/{}/{}/test123'.format(base_url, system_id, username), 
                    files={'fileToUpload': open('foo.txt', 'rb')}, 
                    headers=headers)
rsp.json()['result']

{'_links': {'history': {'href': 'https://api.tacc.utexas.edu/files/v2/history/system/cic.storage//home/jstubbs/test123/foo.txt'},
  'notification': [],
  'profile': {'href': 'https://api.tacc.utexas.edu/profiles/v2/jstubbs'},
  'self': {'href': 'https://api.tacc.utexas.edu/files/v2/media/system/cic.storage//home/jstubbs/test123/foo.txt'},
  'system': {'href': 'https://api.tacc.utexas.edu/systems/v2/cic.storage'}},
 'internalUsername': None,
 'lastModified': '2017-07-22T22:49:56.905-05:00',
 'name': 'foo.txt',
 'nativeFormat': 'raw',
 'owner': 'jstubbs',
 'path': 'jstubbs/test123/foo.txt',
 'source': 'http://129.114.97.130/foo.txt',
 'status': 'STAGING_QUEUED',
 'systemId': 'cic.storage',
 'uuid': '115445489232965145-242ac113-0001-002'}

Note that the upload was QUEDED; in other words, our file won't be there instantly. Agave collects the data in the file and queues the transfer to the remote system. Usually, this transfer happens pretty quickly, but on days when Agave is doing a large number of transfers, it can sometimes take a while.

Let's check to see if our file is there.

In [19]:
# we can also list its contents directly by appending it to the path:
rsp = requests.get('{}/files/v2/listings/system/{}/{}/test123'.format(base_url, system_id, username), headers=headers)
rsp.json()['result']

[{'_links': {'history': {'href': 'https://api.tacc.utexas.edu/files/v2/history/system/cic.storage//home/jstubbs/test123'},
   'metadata': {'href': 'https://api.tacc.utexas.edu/meta/v2/data?q=%7B%22associationIds%22%3A%222733614603201605145-242ac113-0001-002%22%7D'},
   'self': {'href': 'https://api.tacc.utexas.edu/files/v2/media/system/cic.storage//home/jstubbs/test123'},
   'system': {'href': 'https://api.tacc.utexas.edu/systems/v2/cic.storage'}},
  'format': 'folder',
  'lastModified': '2017-07-22T22:49:57.000-05:00',
  'length': 21,
  'mimeType': 'text/directory',
  'name': '.',
  'path': '/home/jstubbs/test123',
  'permissions': 'ALL',
  'system': 'cic.storage',
  'type': 'dir'},
 {'_links': {'self': {'href': 'https://api.tacc.utexas.edu/files/v2/media/system/cic.storage//home/jstubbs/test123/foo.txt'},
   'system': {'href': 'https://api.tacc.utexas.edu/systems/v2/cic.storage'}},
  'format': 'raw',
  'lastModified': '2017-07-22T22:49:57.000-05:00',
  'length': 24,
  'mimeType': 'te

In [20]:
# finally, let's download our file again in a new directory called temp. We'll make that directory first:
! mkdir temp123

In [21]:
! ls -l 

total 124
-rw-r--r-- 1 root root  4564 Jul 22 23:48 agave_files.ipynb
-rw-r--r-- 1 root root  5871 Jul 22 19:35 Application_Programming_Interfaces_Intro.ipynb
-rw-r--r-- 1 root root    24 Jul 22 23:49 foo.txt
-rw-r--r-- 1 root root  9144 Jul 22 23:04 Intro_Agave_OAuth.ipynb
-rw-r--r-- 1 root root  6345 Jul 22 22:23 Intro_Authentication_in_HTTP.ipynb
-rw-r--r-- 1 root root 20494 Jul 22 23:50 Solutions_Agave_Files.ipynb
-rw-r--r-- 1 root root 10014 Jul 22 23:24 Solutions_Agave_OAuth.ipynb
-rw-r--r-- 1 root root 26140 Jul 22 21:38 Solutions_to_api_auth.ipynb
-rw-r--r-- 1 root root 14478 Jul 22 22:02 Solutions_to_api_into.ipynb
drwxr-xr-x 2 root root  4096 Jul 22 23:50 temp123


In [22]:
# use a GET request to the media endpoint to download the file
# the file comes to us in raw bytes, so we are responsible for writing it to disk.
with open('temp123/foo.txt', 'wb') as f:
    rsp = requests.get('{}/files/v2/media/system/{}/{}/test123/foo.txt'.format(base_url, system_id, username), headers=headers)
    for block in rsp.iter_content(1024):
        if not block:
            break
        f.write(block)

In [23]:
! ls -l temp123/

total 4
-rw-r--r-- 1 root root 24 Jul 22 23:51 foo.txt


In [25]:
! cat temp123/foo.txt

This is a test. Test 123