# iRODS for TU Delft TNW
This notebook was used to prepare experimental imaging data, collected by Hozanna Miro in March 2018, for Gary Steele and Aurèle Adam.
This notebook was run from a Windows 7 laptop using Anaconda to install Python 3.

## Set up
The code works through the `source` directory, looking for image files. When it finds one, it looks for a `.txt` file of the same name. The `.txt` file contains the metadata.

iRODS metadata is prepared as a text file `metadata` for running as an iCommands script. `dest_iRODS` is the name of the iRODS collection where the files with metadata are stored. It is used solely to create the iCommand. The files can be copied from `source` to `dest_iRODS` using Cyberduck, for example.

To save time while debugging, the creation of summary files and metadata can be turned on and off.

In [None]:
import os

source = "Y:/Hozanna" #Source of data files - in this case a USB-drive
dest_iRODS = "/tempZone/home/susanb" #iRODS destination - only used to generate iCommands
metadata = "D:/metadata.txt" #text file to write iCommands to

list_types_on_disk = True
make_metadata = True

line_end = " \n"

## Metadata
This section parses the metadata written to text files automatically by the imaging set-up.

The text files look something like this:

`[SemImageFile]`

`InstructName=S-1234`

`DataNumber=y-1x0`

`SampleName=`

`ImageName=images.jpg`

`Directory=D:\TU Delft\2016-05-31_wafer3\`

`Date=05/31/2016`

`Time=11:11:11`

`DataSize=2560x1920`

`PixelSize=1.1234567`

`AcceleratingVoltage=1234 Volt`

`Magnification=12345`

`LensMode=High`

`ScreenMode=Full Screen`

`Condition=Vacc=25kV   Mag=x12.0k   WD=2.1mm`

`DataDisplayCombine=1`

This is converted to imeta iCommands.

In [None]:
def write_metadata(text_file, data_object):

    lines = [line.rstrip('\n') for line in open(text_file, "r")]

    for line in lines:
        if (line.count("=") == 0):
            pass
        elif (line.count("Condition") == 1):
            # Assume line has the form: Condition=Vacc=??kV   Mag=x250   WD=???mm
            fields = line.split(" ")
            for field in fields:
                if (field.find("Vacc") >= 0):
                    attname = "Vacc"
                    attvalue = field.split("Vacc=")[1][:-2]
                    attunit = field[-2:]
                    metadata_file.write("imeta add -d '"+data_object+"' '"+attname+"' '"+attvalue+"' '"+attunit+"'"+line_end)
                elif (field.find("Mag") >= 0):
                    attname = "Mag"
                    attvalue = field.split("=")[1]
                    metadata_file.write("imeta add -d '"+data_object+"' '"+attname+"' '"+attvalue+"' ''"+line_end)
                elif (field.find("WD") >= 0):
                    attname = "WD"
                    attvalue = field.split("WD=")[1][:-2]
                    attunit = field[-2:]
                    metadata_file.write("imeta add -d '"+data_object+"' '"+attname+"' '"+attvalue+"' '"+attunit+"'"+line_end)
                else:
                    pass
        elif (line.count("=") == 1):
            attname = line.split("=")[0]
            attvalue = line.split("=")[1]
            if (attvalue == ""):
                pass
            elif ((line.count(" ") == 0) or (attname == "Directory") or (attname == "Media") or 
                  (attname == "ScreenMode") or (attname == "ImageName") or (attname == "SampleName")):
                # Assume that these are strings that can have spaces, but no units
                if (attvalue.find("'") > -1): attvalue = attvalue.replace("'", "")
#SEB could do better than just removing quotes in attribute value
                metadata_file.write("imeta add -d '"+data_object+"' '"+attname+"' '"+attvalue+"' ''"+line_end)
            elif (line.count(" ") == 1):
                metadata_file.write("imeta add -d '"+data_object+"' '"+attname+"' '"+attvalue.split(" ")[0]+"' '"+\
                                    attvalue.split(" ")[1]+"'"+line_end)
            else:
                problems_file.write("Could not interpret metadata line '"+line+"' in file "+text_file+line_end)            
        else:
            problems_file.write("Could not interpret metadata line '"+line+"' in file "+text_file+line_end)

## Search through files looking for metadata
This section searches a (sub-)directory recursively, looking for images of type:
-  .jpg or .JPG
-  .tif or .TIF
-  .bmp or .BMP
-  .jpeg or .JPEG

When it finds one, it calls the code above to create the metadata script, assuming that the `.txt` file containing the metadata is in the same directory as the image file.

In [None]:
def exploretree(dir, formats):

    names = os.listdir(dir)

    for name in names:
        pathname = dir+"/"+name
        rodsname = dest_iRODS+pathname[len(source):]
# SEB if path name is too long, Windows cannot handle it. Skip this path. Could do better
        if (len(pathname) > 156):
            problems_file.write("path name too long "+pathname+line_end)
            continue 

        if os.path.isdir(pathname):
            exploretree(pathname, formats)
        elif os.path.isfile(pathname):
            filename, extension = os.path.splitext(name)
            formats.append(extension)
            if (extension == ".jpg") or (extension == ".tif") or (extension == ".bmp") or (extension == ".jpeg") or \
               (extension == ".JPG") or (extension == ".TIF") or (extension == ".BMP") or (extension == ".JPEG"):
# SEB search for files with these extensions only. This should be extended
                if make_metadata:
# SEB only search for metadata in txt files in the same directory. This should be changed
                    if os.path.exists(dir+"/"+filename+".txt"):
                        write_metadata(dir+"/"+filename+".txt",rodsname)
                    else:
                        problems_file.write("no .txt file available to extract metadata for "+pathname+line_end)
        else:
            problems_file.write("neither collection nor file"+line_end)

## Main section
This section opens files, calls the code above and closes files again.

In [None]:
if(make_metadata): metadata_file = open(metadata, "w")
if list_types_on_disk: types_on_disk = open("types_on_disk.txt", "w")
problems_file = open("problems_file.txt", "w")

formats = []

exploretree(source, formats)
d = {x:formats.count(x) for x in formats}
if list_types_on_disk: types_on_disk.write(str(d))

if(make_metadata): metadata_file.close()
if list_types_on_disk: types_on_disk.close()
problems_file.close()

if list_types_on_disk:
    types_on_disk = open("types_on_disk.txt", "w")
    types_on_disk.write(str(d))
    types_on_disk.close()

with open("problems_file.txt", 'r') as fin:
    print(fin.read())