# Folders

Objectives:   
 * Using the h5pyd Folder class to get information about folder and domain objects from server
 * Create sub-Folders programatically
 * Programatically get a domains ACLs
 * Recursively list all items under a Folder
    
Note: h5py doesn't have an equivalent class since users can use the standard Python methods for working with Posix filesystems.  On the other hand the folder structure supported by HSDS is not generally accessible other than with the HSDS REST API, so h5pyd provides a easy means of interacting with server folder objects.

In [1]:
import os
from datetime import datetime
import h5pyd # this is a HSDS only tutorial

In [2]:
#
# Get folder for the HDFLabTutorial 
#
def getMyFolder():
    DIR_NAME = "HDFLabTutorial/"
    dir = h5pyd.Folder('/home/')  # get folder object for root
    username = os.getenv("JUPYTERHUB_USER")
    myfolder = None
    for name in dir:
        # we should come across the given domain
        if username.startswith(name):
            # check any folders where the name matches at least part of the username
            # e.g. folder: "/home/bob/" for username "bob@acme.com"
            path = '/home/' + name + '/'
            f = h5pyd.Folder(path)
            if f.owner == username:
                myfolder = path
            f.close()
            if myfolder:
                break
    dir.close()
    if not myfolder:
        raise ValueError(f"unable to find home folder for {username}")
    
    # create a workshop subfolder if not already present
    myfolder += DIR_NAME
    try:
        h5pyd.Folder(myfolder)
    except IOError as ioe:
        if ioe.errno != 404:
            raise  # unexpected error
        # not present - create it now
        h5pyd.Folder(myfolder, mode='x')
        print("created folder:", myfolder)
       
    return myfolder

In [3]:
# Get your home folder path
home_path = getMyFolder()
home_path  # this is the folder path where you have permission to write to

'/home/jreadey/HDFLabTutorial/'

In [4]:
# contruct a folder object by passing a valid path
# Use mode='x' since we'll be modifying the contents of the folder
home_folder = h5pyd.Folder(home_path, mode='x')
home_folder # text representation will be the same as the folder path

/home/jreadey/HDFLabTutorial/

In [5]:
# You can use the standard Python way to get the number of objects in the folder
# if you get zero - try running some of the other tutorials first!
len(home_folder)

5

In [6]:
# or iterate through the items
for item in home_folder:
    print(item)

03.h5
04.h5
05.h5
06.h5
07.h5


In [7]:
# create folder for this tutorial
folder_base = home_path + "12/"
base_folder = h5pyd.Folder(folder_base, mode='x')
print("created folder:", folder_base)

created folder: /home/jreadey/HDFLabTutorial/12/


In [8]:
# and some items in this folder
# Folder paths must end with a '/'
domain_names = ("folder1/", "folder2/", "file1")
for name in domain_names:
    path = folder_base + name
    if name.endswith('/'):
        h5pyd.Folder(path, mode='w')
        print("created folder:", path)
    else:
        h5pyd.File(path, mode='w')
        print('created domain:', path)


created folder: /home/jreadey/HDFLabTutorial/12/folder1/
created folder: /home/jreadey/HDFLabTutorial/12/folder2/
created domain: /home/jreadey/HDFLabTutorial/12/file1


In [9]:
# should have len(folder_items) in the folder now
len(base_folder)

3

In [10]:
# iterate through the contents of the folder and note sub-folders vs domains
for name in base_folder:
    item = base_folder[name] # returns a dict
    if 'root' in item:
        print(f"{name} is domain")
    else:
        print(f"{name} is a folder")

file1 is domain
folder1 is a folder
folder2 is a folder


In [11]:
# we can get the ACLs for a folder object
acls = base_folder.getACLs()
print(f"got {len(acls)} ACLs")

got 2 ACLs


In [12]:
# each ACL has these keys
acl_keys = ('userName', 'create', 'read', 'update', 'delete', 'readACL', 'updateACL')
for acl in acls:
    for k in acl_keys:
        print(f"{k}: {acl[k]}")
    print('-----')
    

userName: default
create: False
read: True
update: False
delete: False
readACL: False
updateACL: False
-----
userName: jreadey@hdfgroup.org
create: True
read: True
update: True
delete: True
readACL: True
updateACL: True
-----


In [13]:
# use this function to recursively list folder/domains
def visitDomains(domain, depth=1):
    if depth == 0:
        return 0

    count = 0
    if domain[-1] == '/':
        domain = domain[:-1]  # strip off trailing slash

    try:
        d = h5pyd.Folder(domain + '/')
        dir_class = "domain"
        display_name = domain
        if d.is_folder:
            dir_class = "folder"
            display_name += '/'
         
        owner = d.owner
        if owner is None:
            owner = ""
        if d.modified is None:
            timestamp = ""
        else:
            timestamp = datetime.fromtimestamp(int(d.modified))

        print(f"{owner:35} {dir_class:8} {timestamp} {display_name}")
        count += 1
         
        for name in d:
            item = d[name]
            owner = item["owner"]
            full_path = domain + '/' + name
            
            dir_class = item["class"]
            if item["lastModified"] is None:
                timestamp = ""
            else:
                timestamp = datetime.fromtimestamp(int(item["lastModified"]))

            print(f"{owner:35} {dir_class:8} {timestamp} {full_path}")
            count += 1

            if dir_class == "folder":
                # recurse for folders
                n = visitDomains(domain + '/' + name, depth=(depth - 1))
                count += n

    except IOError as oe:
        if oe.errno in (403, 404, 410):
            # TBD: recently created domains may not be immediately visible to
            # the service Once the flush operation is implemented, this should
            # be an issue for h5pyd apps
            #
            # Also, ignore domains for which we don't have permsssions (403)
            pass
        else:
            print("error getting domain:", domain)
            raise

    return count


In [14]:
# visit all domains starting with /home/username.
# returns number of objects found
visitDomains(home_path, depth=-1)

jreadey@hdfgroup.org                folder   2021-06-06 20:02:23 /home/jreadey/HDFLabTutorial/
jreadey@hdfgroup.org                domain   2022-01-23 19:14:39 /home/jreadey/HDFLabTutorial/03.h5
jreadey@hdfgroup.org                domain   2022-02-14 23:01:56 /home/jreadey/HDFLabTutorial/04.h5
jreadey@hdfgroup.org                domain   2022-01-21 00:45:16 /home/jreadey/HDFLabTutorial/05.h5
jreadey@hdfgroup.org                domain   2022-01-26 20:11:20 /home/jreadey/HDFLabTutorial/06.h5
jreadey@hdfgroup.org                domain   2022-01-21 01:00:13 /home/jreadey/HDFLabTutorial/07.h5
jreadey@hdfgroup.org                folder   2022-02-14 23:03:56 /home/jreadey/HDFLabTutorial/12
jreadey@hdfgroup.org                folder   2022-02-14 23:03:56 /home/jreadey/HDFLabTutorial/12/
jreadey@hdfgroup.org                domain   2022-02-14 23:04:05 /home/jreadey/HDFLabTutorial/12/file1
jreadey@hdfgroup.org                folder   2022-02-14 23:04:04 /home/jreadey/HDFLabTutorial/12/folder1
jr

13