## Directory structure

In [2]:
%ls
# Does the present directory contain the minimal working example directory 2019-11-21?

[34m2019-06-01[m[m/
2019-06-01-testing-downloads.ipynb
2019-06-01-working-example-usrc-manning.ipynb
2019-11-14-revisions-to-metadata-import.ipynb
[34m2019-11-21[m[m/
2019-11-21-minimal-working-example.ipynb
[34m2019-11-25[m[m/
2019-11-25-working-example.ipynb


In [3]:
!mkdir 2019-06-01/data 2019-06-01/out
# Do the source and output subdirectories exist?

mkdir: 2019-06-01/data: File exists
mkdir: 2019-06-01/out: File exists


In [4]:
data = "2019-06-01/data" # Where are the images and metadata tag files?
out  = "2019-06-01/out"  # Where should we put the renamed files and metadata catalog?

## Function definitions

In [19]:
%run -i rdai
# We run rdai script interactively to obtain function definitions

## Creating the metadata catalog

In [14]:
get_fixed_seq()
# Have we generated a fixed sequence for uuids?
# The global variable fixed_seq needs to be defined prior to calling mint_uuid

In [22]:
normalized_catalog = get_normalized_catalog(data)
# We generate a metadata catalog (unnormalized) from the data directory.

catalog = unnormalize_catalog(normalized_catalog)
# We flatten the normalized catalog. 
# Each file in the data directory "has its own entry" in this catalog.
# We'll eventually ignore non-image files.

write_timestamped_catalog(catalog, out)
# We write this version of the metadata catalog to the output directory.

RDAI: Tag EXIF:ImageUniqueID=c7884b78202611ea9288985aebdcd794 already exists in file 2019-06-01/data/nara_id_24407615/Manning-1914_0001.JPG.
RDAI: Tag EXIF:ImageUniqueID=c7acc57a202611ea9288985aebdcd794 already exists in file 2019-06-01/data/nara_id_24407615/Manning-1914_0002.JPG.
RDAI: Tag EXIF:ImageUniqueID=c7cf7be2202611ea9288985aebdcd794 already exists in file 2019-06-01/data/nara_id_24407615/Manning-1914_0003.JPG.
RDAI: Tag EXIF:ImageUniqueID=c7f0c522202611ea9288985aebdcd794 already exists in file 2019-06-01/data/nara_id_24407615/Manning-1914_0004.JPG.
RDAI: Tag EXIF:ImageUniqueID=c8127848202611ea9288985aebdcd794 already exists in file 2019-06-01/data/nara_id_24407615/Manning-1914_0005.JPG.
RDAI: Tag EXIF:ImageUniqueID=c8350de8202611ea9288985aebdcd794 already exists in file 2019-06-01/data/nara_id_24407615/Manning-1914_0006.JPG.
RDAI: Tag EXIF:ImageUniqueID=c8569280202611ea9288985aebdcd794 already exists in file 2019-06-01/data/nara_id_24407615/Manning-1914_0007.JPG.
RDAI: Tag EXI

## Bundling the images to send to the RDA

In [32]:
catalog = read_timestamped_catalog(out)
# We read in the most recent version of the metadata catalog from the out directory.

elementary_family = [c for c in catalog if c['media_type'].startswith("image")]
# We create a list of all the entries in the catalog that are image files.

In [40]:
import os
# We'll perform some file renames between the data directory and the out directory.

In [41]:
# We move all the images in the catalog to the output directory.
for member in elementary_family:
    os.rename(member['file_path'], os.path.join(out, member['uuid']))

In [42]:
# Conversely, we move all the images in the catalog back to the data directory.
for member in elementary_family:
    os.rename(os.path.join(out, member['uuid']), member['file_path'])